001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019 package org.apache.commons.compress.archivers.zip; 020 021 import java.io.ByteArrayInputStream; 022 import java.io.ByteArrayOutputStream; 023 import java.io.EOFException; 024 import java.io.IOException; 025 import java.io.InputStream; 026 import java.io.PushbackInputStream; 027 import java.util.zip.CRC32; 028 import java.util.zip.DataFormatException; 029 import java.util.zip.Inflater; 030 import java.util.zip.ZipEntry; 031 import java.util.zip.ZipException; 032 033 import org.apache.commons.compress.archivers.ArchiveEntry; 034 import org.apache.commons.compress.archivers.ArchiveInputStream; 035 036 import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD; 037 import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT; 038 import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD; 039 import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC; 040 041 /** 042 * Implements an input stream that can read Zip archives. 043 * 044 * <p>Note that {@link ZipArchiveEntry#getSize()} may return -1 if the 045 * DEFLATE algorithm is used, as the size information is not available 046 * from the header.</p> 047 * 048 * <p>The {@link ZipFile} class is preferred when reading from files.</p> 049 * 050 * <p>As of Apache Commons Compress it transparently supports Zip64 051 * extensions and thus individual entries and archives larger than 4 052 * GB or with more than 65536 entries.</p> 053 * 054 * @see ZipFile 055 * @NotThreadSafe 056 */ 057 public class ZipArchiveInputStream extends ArchiveInputStream { 058 059 /** 060 * The zip encoding to use for filenames and the file comment. 061 */ 062 private final ZipEncoding zipEncoding; 063 064 /** 065 * Whether to look for and use Unicode extra fields. 066 */ 067 private final boolean useUnicodeExtraFields; 068 069 /** 070 * Wrapped stream, will always be a PushbackInputStream. 071 */ 072 private final InputStream in; 073 074 /** 075 * Inflater used for all deflated entries. 076 */ 077 private final Inflater inf = new Inflater(true); 078 079 /** 080 * Calculates checkusms for all entries. 081 */ 082 private final CRC32 crc = new CRC32(); 083 084 /** 085 * Buffer used to read from the wrapped stream. 086 */ 087 private final Buffer buf = new Buffer(); 088 /** 089 * The entry that is currently being read. 090 */ 091 private CurrentEntry current = null; 092 /** 093 * Whether the stream has been closed. 094 */ 095 private boolean closed = false; 096 /** 097 * Whether the stream has reached the central directory - and thus 098 * found all entries. 099 */ 100 private boolean hitCentralDirectory = false; 101 /** 102 * When reading a stored entry that uses the data descriptor this 103 * stream has to read the full entry and caches it. This is the 104 * cache. 105 */ 106 private ByteArrayInputStream lastStoredEntry = null; 107 108 /** 109 * Whether the stream will try to read STORED entries that use a 110 * data descriptor. 111 */ 112 private boolean allowStoredEntriesWithDataDescriptor = false; 113 114 private static final int LFH_LEN = 30; 115 /* 116 local file header signature 4 bytes (0x04034b50) 117 version needed to extract 2 bytes 118 general purpose bit flag 2 bytes 119 compression method 2 bytes 120 last mod file time 2 bytes 121 last mod file date 2 bytes 122 crc-32 4 bytes 123 compressed size 4 bytes 124 uncompressed size 4 bytes 125 file name length 2 bytes 126 extra field length 2 bytes 127 */ 128 129 private static final int CFH_LEN = 46; 130 /* 131 central file header signature 4 bytes (0x02014b50) 132 version made by 2 bytes 133 version needed to extract 2 bytes 134 general purpose bit flag 2 bytes 135 compression method 2 bytes 136 last mod file time 2 bytes 137 last mod file date 2 bytes 138 crc-32 4 bytes 139 compressed size 4 bytes 140 uncompressed size 4 bytes 141 file name length 2 bytes 142 extra field length 2 bytes 143 file comment length 2 bytes 144 disk number start 2 bytes 145 internal file attributes 2 bytes 146 external file attributes 4 bytes 147 relative offset of local header 4 bytes 148 */ 149 150 private static final long TWO_EXP_32 = ZIP64_MAGIC + 1; 151 152 // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection) 153 private final byte[] LFH_BUF = new byte[LFH_LEN]; 154 private final byte[] SKIP_BUF = new byte[1024]; 155 private final byte[] SHORT_BUF = new byte[SHORT]; 156 private final byte[] WORD_BUF = new byte[WORD]; 157 private final byte[] TWO_DWORD_BUF = new byte[2 * DWORD]; 158 159 private int entriesRead = 0; 160 161 public ZipArchiveInputStream(InputStream inputStream) { 162 this(inputStream, ZipEncodingHelper.UTF8); 163 } 164 165 /** 166 * @param encoding the encoding to use for file names, use null 167 * for the platform's default encoding 168 * @since 1.5 169 */ 170 public ZipArchiveInputStream(InputStream inputStream, String encoding) { 171 this(inputStream, encoding, true); 172 } 173 174 /** 175 * @param encoding the encoding to use for file names, use null 176 * for the platform's default encoding 177 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 178 * Extra Fields (if present) to set the file names. 179 */ 180 public ZipArchiveInputStream(InputStream inputStream, 181 String encoding, 182 boolean useUnicodeExtraFields) { 183 this(inputStream, encoding, useUnicodeExtraFields, false); 184 } 185 186 /** 187 * @param encoding the encoding to use for file names, use null 188 * for the platform's default encoding 189 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 190 * Extra Fields (if present) to set the file names. 191 * @param allowStoredEntriesWithDataDescriptor whether the stream 192 * will try to read STORED entries that use a data descriptor 193 * @since 1.1 194 */ 195 public ZipArchiveInputStream(InputStream inputStream, 196 String encoding, 197 boolean useUnicodeExtraFields, 198 boolean allowStoredEntriesWithDataDescriptor) { 199 zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 200 this.useUnicodeExtraFields = useUnicodeExtraFields; 201 in = new PushbackInputStream(inputStream, buf.buf.length); 202 this.allowStoredEntriesWithDataDescriptor = 203 allowStoredEntriesWithDataDescriptor; 204 } 205 206 public ZipArchiveEntry getNextZipEntry() throws IOException { 207 boolean firstEntry = true; 208 if (closed || hitCentralDirectory) { 209 return null; 210 } 211 if (current != null) { 212 closeEntry(); 213 firstEntry = false; 214 } 215 216 try { 217 if (firstEntry) { 218 // split archives have a special signature before the 219 // first local file header - look for it and fail with 220 // the appropriate error message if this is a split 221 // archive. 222 readFirstLocalFileHeader(LFH_BUF); 223 } else { 224 readFully(LFH_BUF); 225 } 226 } catch (EOFException e) { 227 return null; 228 } 229 230 ZipLong sig = new ZipLong(LFH_BUF); 231 if (sig.equals(ZipLong.CFH_SIG) || sig.equals(ZipLong.AED_SIG)) { 232 hitCentralDirectory = true; 233 skipRemainderOfArchive(); 234 } 235 if (!sig.equals(ZipLong.LFH_SIG)) { 236 return null; 237 } 238 239 int off = WORD; 240 current = new CurrentEntry(); 241 242 int versionMadeBy = ZipShort.getValue(LFH_BUF, off); 243 off += SHORT; 244 current.entry.setPlatform((versionMadeBy >> ZipFile.BYTE_SHIFT) 245 & ZipFile.NIBLET_MASK); 246 247 final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(LFH_BUF, off); 248 final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames(); 249 final ZipEncoding entryEncoding = 250 hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding; 251 current.hasDataDescriptor = gpFlag.usesDataDescriptor(); 252 current.entry.setGeneralPurposeBit(gpFlag); 253 254 off += SHORT; 255 256 current.entry.setMethod(ZipShort.getValue(LFH_BUF, off)); 257 off += SHORT; 258 259 long time = ZipUtil.dosToJavaTime(ZipLong.getValue(LFH_BUF, off)); 260 current.entry.setTime(time); 261 off += WORD; 262 263 ZipLong size = null, cSize = null; 264 if (!current.hasDataDescriptor) { 265 current.entry.setCrc(ZipLong.getValue(LFH_BUF, off)); 266 off += WORD; 267 268 cSize = new ZipLong(LFH_BUF, off); 269 off += WORD; 270 271 size = new ZipLong(LFH_BUF, off); 272 off += WORD; 273 } else { 274 off += 3 * WORD; 275 } 276 277 int fileNameLen = ZipShort.getValue(LFH_BUF, off); 278 279 off += SHORT; 280 281 int extraLen = ZipShort.getValue(LFH_BUF, off); 282 off += SHORT; 283 284 byte[] fileName = new byte[fileNameLen]; 285 readFully(fileName); 286 current.entry.setName(entryEncoding.decode(fileName), fileName); 287 288 byte[] extraData = new byte[extraLen]; 289 readFully(extraData); 290 current.entry.setExtra(extraData); 291 292 if (!hasUTF8Flag && useUnicodeExtraFields) { 293 ZipUtil.setNameAndCommentFromExtraFields(current.entry, fileName, 294 null); 295 } 296 297 processZip64Extra(size, cSize); 298 entriesRead++; 299 return current.entry; 300 } 301 302 /** 303 * Fills the given array with the first local file header and 304 * deals with splitting/spanning markers that may prefix the first 305 * LFH. 306 */ 307 private void readFirstLocalFileHeader(byte[] lfh) throws IOException { 308 readFully(lfh); 309 ZipLong sig = new ZipLong(lfh); 310 if (sig.equals(ZipLong.DD_SIG)) { 311 throw new 312 UnsupportedZipFeatureException(UnsupportedZipFeatureException 313 .Feature.SPLITTING); 314 } 315 if (sig.equals(ZipLong.SINGLE_SEGMENT_SPLIT_MARKER)) { 316 // The archive is not really split as only one segment was 317 // needed in the end. Just skip over the marker. 318 byte[] missedLfhBytes = new byte[4]; 319 readFully(missedLfhBytes); 320 System.arraycopy(lfh, 4, lfh, 0, LFH_LEN - 4); 321 System.arraycopy(missedLfhBytes, 0, lfh, LFH_LEN - 4, 4); 322 } 323 } 324 325 /** 326 * Records whether a Zip64 extra is present and sets the size 327 * information from it if sizes are 0xFFFFFFFF and the entry 328 * doesn't use a data descriptor. 329 */ 330 private void processZip64Extra(ZipLong size, ZipLong cSize) { 331 Zip64ExtendedInformationExtraField z64 = 332 (Zip64ExtendedInformationExtraField) 333 current.entry.getExtraField(Zip64ExtendedInformationExtraField 334 .HEADER_ID); 335 current.usesZip64 = z64 != null; 336 if (!current.hasDataDescriptor) { 337 if (current.usesZip64 && (cSize.equals(ZipLong.ZIP64_MAGIC) 338 || size.equals(ZipLong.ZIP64_MAGIC)) 339 ) { 340 current.entry.setCompressedSize(z64.getCompressedSize() // z64 cannot be null here 341 .getLongValue()); 342 current.entry.setSize(z64.getSize().getLongValue()); 343 } else { 344 current.entry.setCompressedSize(cSize.getValue()); 345 current.entry.setSize(size.getValue()); 346 } 347 } 348 } 349 350 /** {@inheritDoc} */ 351 @Override 352 public ArchiveEntry getNextEntry() throws IOException { 353 return getNextZipEntry(); 354 } 355 356 /** 357 * Whether this class is able to read the given entry. 358 * 359 * <p>May return false if it is set up to use encryption or a 360 * compression method that hasn't been implemented yet.</p> 361 * @since 1.1 362 */ 363 @Override 364 public boolean canReadEntryData(ArchiveEntry ae) { 365 if (ae instanceof ZipArchiveEntry) { 366 ZipArchiveEntry ze = (ZipArchiveEntry) ae; 367 return ZipUtil.canHandleEntryData(ze) 368 && supportsDataDescriptorFor(ze); 369 370 } 371 return false; 372 } 373 374 @Override 375 public int read(byte[] buffer, int start, int length) throws IOException { 376 if (closed) { 377 throw new IOException("The stream is closed"); 378 } 379 if (inf.finished() || current == null) { 380 return -1; 381 } 382 383 // avoid int overflow, check null buffer 384 if (start <= buffer.length && length >= 0 && start >= 0 385 && buffer.length - start >= length) { 386 ZipUtil.checkRequestedFeatures(current.entry); 387 if (!supportsDataDescriptorFor(current.entry)) { 388 throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException 389 .Feature 390 .DATA_DESCRIPTOR, 391 current.entry); 392 } 393 394 if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) { 395 return readStored(buffer, start, length); 396 } 397 return readDeflated(buffer, start, length); 398 } 399 throw new ArrayIndexOutOfBoundsException(); 400 } 401 402 /** 403 * Implementation of read for STORED entries. 404 */ 405 private int readStored(byte[] buffer, int start, int length) 406 throws IOException { 407 408 if (current.hasDataDescriptor) { 409 if (lastStoredEntry == null) { 410 readStoredEntry(); 411 } 412 return lastStoredEntry.read(buffer, start, length); 413 } 414 415 long csize = current.entry.getSize(); 416 if (current.bytesRead >= csize) { 417 return -1; 418 } 419 420 if (buf.offsetInBuffer >= buf.lengthOfLastRead) { 421 buf.offsetInBuffer = 0; 422 if ((buf.lengthOfLastRead = in.read(buf.buf)) == -1) { 423 return -1; 424 } 425 count(buf.lengthOfLastRead); 426 current.bytesReadFromStream += buf.lengthOfLastRead; 427 } 428 429 int availableBytesInBuffer = buf.lengthOfLastRead - buf.offsetInBuffer; 430 int toRead = Math.min(availableBytesInBuffer, length); 431 if ((csize - current.bytesRead) < toRead) { 432 // if it is smaller than toRead then it fits into an int 433 toRead = (int) (csize - current.bytesRead); 434 } 435 System.arraycopy(buf.buf, buf.offsetInBuffer, buffer, start, toRead); 436 buf.offsetInBuffer += toRead; 437 current.bytesRead += toRead; 438 crc.update(buffer, start, toRead); 439 return toRead; 440 } 441 442 /** 443 * Implementation of read for DEFLATED entries. 444 */ 445 private int readDeflated(byte[] buffer, int start, int length) 446 throws IOException { 447 int read = readFromInflater(buffer, start, length); 448 if (read == 0) { 449 if (inf.finished()) { 450 return -1; 451 } else if (inf.needsDictionary()) { 452 throw new ZipException("This archive needs a preset dictionary" 453 + " which is not supported by Commons" 454 + " Compress."); 455 } else if (buf.lengthOfLastRead == -1) { 456 throw new IOException("Truncated ZIP file"); 457 } 458 } 459 crc.update(buffer, start, read); 460 return read; 461 } 462 463 /** 464 * Potentially reads more bytes to fill the inflater's buffer and 465 * reads from it. 466 */ 467 private int readFromInflater(byte[] buffer, int start, int length) 468 throws IOException { 469 int read = 0; 470 do { 471 if (inf.needsInput()) { 472 fill(); 473 if (buf.lengthOfLastRead > 0) { 474 current.bytesReadFromStream += buf.lengthOfLastRead; 475 } else { 476 break; 477 } 478 } 479 try { 480 read = inf.inflate(buffer, start, length); 481 } catch (DataFormatException e) { 482 throw new ZipException(e.getMessage()); 483 } 484 } while (read == 0 && inf.needsInput()); 485 return read; 486 } 487 488 @Override 489 public void close() throws IOException { 490 if (!closed) { 491 closed = true; 492 in.close(); 493 inf.end(); 494 } 495 } 496 497 /** 498 * Skips over and discards value bytes of data from this input 499 * stream. 500 * 501 * <p>This implementation may end up skipping over some smaller 502 * number of bytes, possibly 0, if and only if it reaches the end 503 * of the underlying stream.</p> 504 * 505 * <p>The actual number of bytes skipped is returned.</p> 506 * 507 * @param value the number of bytes to be skipped. 508 * @return the actual number of bytes skipped. 509 * @throws IOException - if an I/O error occurs. 510 * @throws IllegalArgumentException - if value is negative. 511 */ 512 @Override 513 public long skip(long value) throws IOException { 514 if (value >= 0) { 515 long skipped = 0; 516 while (skipped < value) { 517 long rem = value - skipped; 518 int x = read(SKIP_BUF, 0, 519 (int) (SKIP_BUF.length > rem ? rem 520 : SKIP_BUF.length)); 521 if (x == -1) { 522 return skipped; 523 } 524 skipped += x; 525 } 526 return skipped; 527 } 528 throw new IllegalArgumentException(); 529 } 530 531 /** 532 * Checks if the signature matches what is expected for a zip file. 533 * Does not currently handle self-extracting zips which may have arbitrary 534 * leading content. 535 * 536 * @param signature 537 * the bytes to check 538 * @param length 539 * the number of bytes to check 540 * @return true, if this stream is a zip archive stream, false otherwise 541 */ 542 public static boolean matches(byte[] signature, int length) { 543 if (length < ZipArchiveOutputStream.LFH_SIG.length) { 544 return false; 545 } 546 547 return checksig(signature, ZipArchiveOutputStream.LFH_SIG) // normal file 548 || checksig(signature, ZipArchiveOutputStream.EOCD_SIG) // empty zip 549 || checksig(signature, ZipArchiveOutputStream.DD_SIG) // split zip 550 || checksig(signature, 551 ZipLong.SINGLE_SEGMENT_SPLIT_MARKER.getBytes()); 552 } 553 554 private static boolean checksig(byte[] signature, byte[] expected){ 555 for (int i = 0; i < expected.length; i++) { 556 if (signature[i] != expected[i]) { 557 return false; 558 } 559 } 560 return true; 561 } 562 563 /** 564 * Closes the current ZIP archive entry and positions the underlying 565 * stream to the beginning of the next entry. All per-entry variables 566 * and data structures are cleared. 567 * <p> 568 * If the compressed size of this entry is included in the entry header, 569 * then any outstanding bytes are simply skipped from the underlying 570 * stream without uncompressing them. This allows an entry to be safely 571 * closed even if the compression method is unsupported. 572 * <p> 573 * In case we don't know the compressed size of this entry or have 574 * already buffered too much data from the underlying stream to support 575 * uncompression, then the uncompression process is completed and the 576 * end position of the stream is adjusted based on the result of that 577 * process. 578 * 579 * @throws IOException if an error occurs 580 */ 581 private void closeEntry() throws IOException { 582 if (closed) { 583 throw new IOException("The stream is closed"); 584 } 585 if (current == null) { 586 return; 587 } 588 589 // Ensure all entry bytes are read 590 if (current.bytesReadFromStream <= current.entry.getCompressedSize() 591 && !current.hasDataDescriptor) { 592 drainCurrentEntryData(); 593 } else { 594 skip(Long.MAX_VALUE); 595 596 long inB = 597 current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED 598 ? getBytesInflated() : current.bytesRead; 599 600 // this is at most a single read() operation and can't 601 // exceed the range of int 602 int diff = (int) (current.bytesReadFromStream - inB); 603 604 // Pushback any required bytes 605 if (diff > 0) { 606 pushback(buf.buf, buf.lengthOfLastRead - diff, diff); 607 } 608 } 609 610 if (lastStoredEntry == null && current.hasDataDescriptor) { 611 readDataDescriptor(); 612 } 613 614 inf.reset(); 615 buf.reset(); 616 crc.reset(); 617 current = null; 618 lastStoredEntry = null; 619 } 620 621 /** 622 * Read all data of the current entry from the underlying stream 623 * that hasn't been read, yet. 624 */ 625 private void drainCurrentEntryData() throws IOException { 626 long remaining = current.entry.getCompressedSize() 627 - current.bytesReadFromStream; 628 while (remaining > 0) { 629 long n = in.read(buf.buf, 0, (int) Math.min(buf.buf.length, 630 remaining)); 631 if (n < 0) { 632 throw new EOFException( 633 "Truncated ZIP entry: " + current.entry.getName()); 634 } else { 635 count(n); 636 remaining -= n; 637 } 638 } 639 } 640 641 /** 642 * Get the number of bytes Inflater has actually processed. 643 * 644 * <p>for Java < Java7 the getBytes* methods in 645 * Inflater/Deflater seem to return unsigned ints rather than 646 * longs that start over with 0 at 2^32.</p> 647 * 648 * <p>The stream knows how many bytes it has read, but not how 649 * many the Inflater actually consumed - it should be between the 650 * total number of bytes read for the entry and the total number 651 * minus the last read operation. Here we just try to make the 652 * value close enough to the bytes we've read by assuming the 653 * number of bytes consumed must be smaller than (or equal to) the 654 * number of bytes read but not smaller by more than 2^32.</p> 655 */ 656 private long getBytesInflated() { 657 long inB = inf.getBytesRead(); 658 if (current.bytesReadFromStream >= TWO_EXP_32) { 659 while (inB + TWO_EXP_32 <= current.bytesReadFromStream) { 660 inB += TWO_EXP_32; 661 } 662 } 663 return inB; 664 } 665 666 private void fill() throws IOException { 667 if (closed) { 668 throw new IOException("The stream is closed"); 669 } 670 if ((buf.lengthOfLastRead = in.read(buf.buf)) > 0) { 671 count(buf.lengthOfLastRead); 672 inf.setInput(buf.buf, 0, buf.lengthOfLastRead); 673 } 674 } 675 676 private void readFully(byte[] b) throws IOException { 677 int count = 0, x = 0; 678 while (count != b.length) { 679 count += x = in.read(b, count, b.length - count); 680 if (x == -1) { 681 throw new EOFException(); 682 } 683 count(x); 684 } 685 } 686 687 private void readDataDescriptor() throws IOException { 688 readFully(WORD_BUF); 689 ZipLong val = new ZipLong(WORD_BUF); 690 if (ZipLong.DD_SIG.equals(val)) { 691 // data descriptor with signature, skip sig 692 readFully(WORD_BUF); 693 val = new ZipLong(WORD_BUF); 694 } 695 current.entry.setCrc(val.getValue()); 696 697 // if there is a ZIP64 extra field, sizes are eight bytes 698 // each, otherwise four bytes each. Unfortunately some 699 // implementations - namely Java7 - use eight bytes without 700 // using a ZIP64 extra field - 701 // http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7073588 702 703 // just read 16 bytes and check whether bytes nine to twelve 704 // look like one of the signatures of what could follow a data 705 // descriptor (ignoring archive decryption headers for now). 706 // If so, push back eight bytes and assume sizes are four 707 // bytes, otherwise sizes are eight bytes each. 708 readFully(TWO_DWORD_BUF); 709 ZipLong potentialSig = new ZipLong(TWO_DWORD_BUF, DWORD); 710 if (potentialSig.equals(ZipLong.CFH_SIG) 711 || potentialSig.equals(ZipLong.LFH_SIG)) { 712 pushback(TWO_DWORD_BUF, DWORD, DWORD); 713 current.entry.setCompressedSize(ZipLong.getValue(TWO_DWORD_BUF)); 714 current.entry.setSize(ZipLong.getValue(TWO_DWORD_BUF, WORD)); 715 } else { 716 current.entry 717 .setCompressedSize(ZipEightByteInteger 718 .getLongValue(TWO_DWORD_BUF)); 719 current.entry.setSize(ZipEightByteInteger 720 .getLongValue(TWO_DWORD_BUF, DWORD)); 721 } 722 } 723 724 /** 725 * Whether this entry requires a data descriptor this library can work with. 726 * 727 * @return true if allowStoredEntriesWithDataDescriptor is true, 728 * the entry doesn't require any data descriptor or the method is 729 * DEFLATED. 730 */ 731 private boolean supportsDataDescriptorFor(ZipArchiveEntry entry) { 732 return allowStoredEntriesWithDataDescriptor || 733 !entry.getGeneralPurposeBit().usesDataDescriptor() 734 || entry.getMethod() == ZipEntry.DEFLATED; 735 } 736 737 /** 738 * Caches a stored entry that uses the data descriptor. 739 * 740 * <ul> 741 * <li>Reads a stored entry until the signature of a local file 742 * header, central directory header or data descriptor has been 743 * found.</li> 744 * <li>Stores all entry data in lastStoredEntry.</p> 745 * <li>Rewinds the stream to position at the data 746 * descriptor.</li> 747 * <li>reads the data descriptor</li> 748 * </ul> 749 * 750 * <p>After calling this method the entry should know its size, 751 * the entry's data is cached and the stream is positioned at the 752 * next local file or central directory header.</p> 753 */ 754 private void readStoredEntry() throws IOException { 755 ByteArrayOutputStream bos = new ByteArrayOutputStream(); 756 int off = 0; 757 boolean done = false; 758 759 // length of DD without signature 760 int ddLen = current.usesZip64 ? WORD + 2 * DWORD : 3 * WORD; 761 762 while (!done) { 763 int r = in.read(buf.buf, off, 764 ZipArchiveOutputStream.BUFFER_SIZE - off); 765 if (r <= 0) { 766 // read the whole archive without ever finding a 767 // central directory 768 throw new IOException("Truncated ZIP file"); 769 } 770 if (r + off < 4) { 771 // buf is too small to check for a signature, loop 772 off += r; 773 continue; 774 } 775 776 done = bufferContainsSignature(bos, off, r, ddLen); 777 if (!done) { 778 off = cacheBytesRead(bos, off, r, ddLen); 779 } 780 } 781 782 byte[] b = bos.toByteArray(); 783 lastStoredEntry = new ByteArrayInputStream(b); 784 } 785 786 private static final byte[] LFH = ZipLong.LFH_SIG.getBytes(); 787 private static final byte[] CFH = ZipLong.CFH_SIG.getBytes(); 788 private static final byte[] DD = ZipLong.DD_SIG.getBytes(); 789 790 /** 791 * Checks whether the current buffer contains the signature of a 792 * "data decsriptor", "local file header" or 793 * "central directory entry". 794 * 795 * <p>If it contains such a signature, reads the data descriptor 796 * and positions the stream right after the data descriptor.</p> 797 */ 798 private boolean bufferContainsSignature(ByteArrayOutputStream bos, 799 int offset, int lastRead, 800 int expectedDDLen) 801 throws IOException { 802 boolean done = false; 803 int readTooMuch = 0; 804 for (int i = 0; !done && i < lastRead - 4; i++) { 805 if (buf.buf[i] == LFH[0] && buf.buf[i + 1] == LFH[1]) { 806 if ((buf.buf[i + 2] == LFH[2] && buf.buf[i + 3] == LFH[3]) 807 || (buf.buf[i] == CFH[2] && buf.buf[i + 3] == CFH[3])) { 808 // found a LFH or CFH: 809 readTooMuch = offset + lastRead - i - expectedDDLen; 810 done = true; 811 } 812 else if (buf.buf[i + 2] == DD[2] && buf.buf[i + 3] == DD[3]) { 813 // found DD: 814 readTooMuch = offset + lastRead - i; 815 done = true; 816 } 817 if (done) { 818 // * push back bytes read in excess as well as the data 819 // descriptor 820 // * copy the remaining bytes to cache 821 // * read data descriptor 822 pushback(buf.buf, offset + lastRead - readTooMuch, 823 readTooMuch); 824 bos.write(buf.buf, 0, i); 825 readDataDescriptor(); 826 } 827 } 828 } 829 return done; 830 } 831 832 /** 833 * If the last read bytes could hold a data descriptor and an 834 * incomplete signature then save the last bytes to the front of 835 * the buffer and cache everything in front of the potential data 836 * descriptor into the given ByteArrayOutputStream. 837 * 838 * <p>Data descriptor plus incomplete signature (3 bytes in the 839 * worst case) can be 20 bytes max.</p> 840 */ 841 private int cacheBytesRead(ByteArrayOutputStream bos, int offset, 842 int lastRead, int expecteDDLen) { 843 final int cacheable = offset + lastRead - expecteDDLen - 3; 844 if (cacheable > 0) { 845 bos.write(buf.buf, 0, cacheable); 846 System.arraycopy(buf.buf, cacheable, buf.buf, 0, 847 expecteDDLen + 3); 848 offset = expecteDDLen + 3; 849 } else { 850 offset += lastRead; 851 } 852 return offset; 853 } 854 855 private void pushback(byte[] buf, int offset, int length) 856 throws IOException { 857 ((PushbackInputStream) in).unread(buf, offset, length); 858 pushedBackBytes(length); 859 } 860 861 // End of Central Directory Record 862 // end of central dir signature 4 bytes (0x06054b50) 863 // number of this disk 2 bytes 864 // number of the disk with the 865 // start of the central directory 2 bytes 866 // total number of entries in the 867 // central directory on this disk 2 bytes 868 // total number of entries in 869 // the central directory 2 bytes 870 // size of the central directory 4 bytes 871 // offset of start of central 872 // directory with respect to 873 // the starting disk number 4 bytes 874 // .ZIP file comment length 2 bytes 875 // .ZIP file comment (variable size) 876 // 877 878 /** 879 * Reads the stream until it find the "End of central directory 880 * record" and consumes it as well. 881 */ 882 private void skipRemainderOfArchive() throws IOException { 883 // skip over central directory. One LFH has been read too much 884 // already. The calculation discounts file names and extra 885 // data so it will be too short. 886 realSkip(entriesRead * CFH_LEN - LFH_LEN); 887 findEocdRecord(); 888 realSkip(ZipFile.MIN_EOCD_SIZE 889 - WORD /* signature */ - SHORT /* comment len */); 890 readFully(SHORT_BUF); 891 // file comment 892 realSkip(ZipShort.getValue(SHORT_BUF)); 893 } 894 895 /** 896 * Reads forward until the signature of the "End of central 897 * directory" recod is found. 898 */ 899 private void findEocdRecord() throws IOException { 900 int currentByte = -1; 901 boolean skipReadCall = false; 902 while (skipReadCall || (currentByte = readOneByte()) > -1) { 903 skipReadCall = false; 904 if (!isFirstByteOfEocdSig(currentByte)) { 905 continue; 906 } 907 currentByte = readOneByte(); 908 if (currentByte != ZipArchiveOutputStream.EOCD_SIG[1]) { 909 if (currentByte == -1) { 910 break; 911 } 912 skipReadCall = isFirstByteOfEocdSig(currentByte); 913 continue; 914 } 915 currentByte = readOneByte(); 916 if (currentByte != ZipArchiveOutputStream.EOCD_SIG[2]) { 917 if (currentByte == -1) { 918 break; 919 } 920 skipReadCall = isFirstByteOfEocdSig(currentByte); 921 continue; 922 } 923 currentByte = readOneByte(); 924 if (currentByte == -1 925 || currentByte == ZipArchiveOutputStream.EOCD_SIG[3]) { 926 break; 927 } 928 skipReadCall = isFirstByteOfEocdSig(currentByte); 929 } 930 } 931 932 /** 933 * Skips bytes by reading from the underlying stream rather than 934 * the (potentially inflating) archive stream - which {@link 935 * #skip} would do. 936 * 937 * Also updates bytes-read counter. 938 */ 939 private void realSkip(long value) throws IOException { 940 if (value >= 0) { 941 long skipped = 0; 942 while (skipped < value) { 943 long rem = value - skipped; 944 int x = in.read(SKIP_BUF, 0, 945 (int) (SKIP_BUF.length > rem ? rem 946 : SKIP_BUF.length)); 947 if (x == -1) { 948 return; 949 } 950 count(x); 951 skipped += x; 952 } 953 return; 954 } 955 throw new IllegalArgumentException(); 956 } 957 958 /** 959 * Reads bytes by reading from the underlying stream rather than 960 * the (potentially inflating) archive stream - which {@link 961 * #read} would do. 962 * 963 * Also updates bytes-read counter. 964 */ 965 private int readOneByte() throws IOException { 966 int b = in.read(); 967 if (b != -1) { 968 count(1); 969 } 970 return b; 971 } 972 973 private boolean isFirstByteOfEocdSig(int b) { 974 return b == ZipArchiveOutputStream.EOCD_SIG[0]; 975 } 976 977 /** 978 * Structure collecting information for the entry that is 979 * currently being read. 980 */ 981 private static final class CurrentEntry { 982 /** 983 * Current ZIP entry. 984 */ 985 private final ZipArchiveEntry entry = new ZipArchiveEntry(); 986 /** 987 * Does the entry use a data descriptor? 988 */ 989 private boolean hasDataDescriptor; 990 /** 991 * Does the entry have a ZIP64 extended information extra field. 992 */ 993 private boolean usesZip64; 994 /** 995 * Number of bytes of entry content read by the client if the 996 * entry is STORED. 997 */ 998 private long bytesRead; 999 /** 1000 * Number of bytes of entry content read so from the stream. 1001 * 1002 * <p>This may be more than the actual entry's length as some 1003 * stuff gets buffered up and needs to be pushed back when the 1004 * end of the entry has been reached.</p> 1005 */ 1006 private long bytesReadFromStream; 1007 } 1008 1009 /** 1010 * Contains a temporary buffer used to read from the wrapped 1011 * stream together with some information needed for internal 1012 * housekeeping. 1013 */ 1014 private static final class Buffer { 1015 /** 1016 * Buffer used as temporary buffer when reading from the stream. 1017 */ 1018 private final byte[] buf = new byte[ZipArchiveOutputStream.BUFFER_SIZE]; 1019 /** 1020 * {@link #buf buf} may contain data the client hasnt read, yet, 1021 * this is the first byte that hasn't been read so far. 1022 */ 1023 private int offsetInBuffer = 0; 1024 /** 1025 * Number of bytes read from the wrapped stream into {@link #buf 1026 * buf} with the last read operation. 1027 */ 1028 private int lengthOfLastRead = 0; 1029 /** 1030 * Reset internal housekeeping. 1031 */ 1032 private void reset() { 1033 offsetInBuffer = lengthOfLastRead = 0; 1034 } 1035 } 1036 }