001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.archivers.zip; 020 021import java.io.ByteArrayInputStream; 022import java.io.ByteArrayOutputStream; 023import java.io.EOFException; 024import java.io.IOException; 025import java.io.InputStream; 026import java.io.PushbackInputStream; 027import java.nio.ByteBuffer; 028import java.util.zip.CRC32; 029import java.util.zip.DataFormatException; 030import java.util.zip.Inflater; 031import java.util.zip.ZipEntry; 032import java.util.zip.ZipException; 033 034import org.apache.commons.compress.archivers.ArchiveEntry; 035import org.apache.commons.compress.archivers.ArchiveInputStream; 036import org.apache.commons.compress.utils.IOUtils; 037 038import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD; 039import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT; 040import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD; 041import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC; 042 043/** 044 * Implements an input stream that can read Zip archives. 045 * 046 * <p>As of Apache Commons Compress it transparently supports Zip64 047 * extensions and thus individual entries and archives larger than 4 048 * GB or with more than 65536 entries.</p> 049 * 050 * <p>The {@link ZipFile} class is preferred when reading from files 051 * as {@link ZipArchiveInputStream} is limited by not being able to 052 * read the central directory header before returning entries. In 053 * particular {@link ZipArchiveInputStream}</p> 054 * 055 * <ul> 056 * 057 * <li>may return entries that are not part of the central directory 058 * at all and shouldn't be considered part of the archive.</li> 059 * 060 * <li>may return several entries with the same name.</li> 061 * 062 * <li>will not return internal or external attributes.</li> 063 * 064 * <li>may return incomplete extra field data.</li> 065 * 066 * <li>may return unknown sizes and CRC values for entries until the 067 * next entry has been reached if the archive uses the data 068 * descriptor feature.</li> 069 * 070 * </ul> 071 * 072 * @see ZipFile 073 * @NotThreadSafe 074 */ 075public class ZipArchiveInputStream extends ArchiveInputStream { 076 077 /** The zip encoding to use for filenames and the file comment. */ 078 private final ZipEncoding zipEncoding; 079 080 // the provided encoding (for unit tests) 081 final String encoding; 082 083 /** Whether to look for and use Unicode extra fields. */ 084 private final boolean useUnicodeExtraFields; 085 086 /** Wrapped stream, will always be a PushbackInputStream. */ 087 private final InputStream in; 088 089 /** Inflater used for all deflated entries. */ 090 private final Inflater inf = new Inflater(true); 091 092 /** Buffer used to read from the wrapped stream. */ 093 private final ByteBuffer buf = ByteBuffer.allocate(ZipArchiveOutputStream.BUFFER_SIZE); 094 095 /** The entry that is currently being read. */ 096 private CurrentEntry current = null; 097 098 /** Whether the stream has been closed. */ 099 private boolean closed = false; 100 101 /** Whether the stream has reached the central directory - and thus found all entries. */ 102 private boolean hitCentralDirectory = false; 103 104 /** 105 * When reading a stored entry that uses the data descriptor this 106 * stream has to read the full entry and caches it. This is the 107 * cache. 108 */ 109 private ByteArrayInputStream lastStoredEntry = null; 110 111 /** Whether the stream will try to read STORED entries that use a data descriptor. */ 112 private boolean allowStoredEntriesWithDataDescriptor = false; 113 114 private static final int LFH_LEN = 30; 115 /* 116 local file header signature WORD 117 version needed to extract SHORT 118 general purpose bit flag SHORT 119 compression method SHORT 120 last mod file time SHORT 121 last mod file date SHORT 122 crc-32 WORD 123 compressed size WORD 124 uncompressed size WORD 125 file name length SHORT 126 extra field length SHORT 127 */ 128 129 private static final int CFH_LEN = 46; 130 /* 131 central file header signature WORD 132 version made by SHORT 133 version needed to extract SHORT 134 general purpose bit flag SHORT 135 compression method SHORT 136 last mod file time SHORT 137 last mod file date SHORT 138 crc-32 WORD 139 compressed size WORD 140 uncompressed size WORD 141 file name length SHORT 142 extra field length SHORT 143 file comment length SHORT 144 disk number start SHORT 145 internal file attributes SHORT 146 external file attributes WORD 147 relative offset of local header WORD 148 */ 149 150 private static final long TWO_EXP_32 = ZIP64_MAGIC + 1; 151 152 // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection) 153 private final byte[] LFH_BUF = new byte[LFH_LEN]; 154 private final byte[] SKIP_BUF = new byte[1024]; 155 private final byte[] SHORT_BUF = new byte[SHORT]; 156 private final byte[] WORD_BUF = new byte[WORD]; 157 private final byte[] TWO_DWORD_BUF = new byte[2 * DWORD]; 158 159 private int entriesRead = 0; 160 161 /** 162 * Create an instance using UTF-8 encoding 163 * @param inputStream the stream to wrap 164 */ 165 public ZipArchiveInputStream(InputStream inputStream) { 166 this(inputStream, ZipEncodingHelper.UTF8); 167 } 168 169 /** 170 * Create an instance using the specified encoding 171 * @param inputStream the stream to wrap 172 * @param encoding the encoding to use for file names, use null 173 * for the platform's default encoding 174 * @since 1.5 175 */ 176 public ZipArchiveInputStream(InputStream inputStream, String encoding) { 177 this(inputStream, encoding, true); 178 } 179 180 /** 181 * Create an instance using the specified encoding 182 * @param inputStream the stream to wrap 183 * @param encoding the encoding to use for file names, use null 184 * for the platform's default encoding 185 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 186 * Extra Fields (if present) to set the file names. 187 */ 188 public ZipArchiveInputStream(InputStream inputStream, String encoding, boolean useUnicodeExtraFields) { 189 this(inputStream, encoding, useUnicodeExtraFields, false); 190 } 191 192 /** 193 * Create an instance using the specified encoding 194 * @param inputStream the stream to wrap 195 * @param encoding the encoding to use for file names, use null 196 * for the platform's default encoding 197 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 198 * Extra Fields (if present) to set the file names. 199 * @param allowStoredEntriesWithDataDescriptor whether the stream 200 * will try to read STORED entries that use a data descriptor 201 * @since 1.1 202 */ 203 public ZipArchiveInputStream(InputStream inputStream, 204 String encoding, 205 boolean useUnicodeExtraFields, 206 boolean allowStoredEntriesWithDataDescriptor) { 207 this.encoding = encoding; 208 zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 209 this.useUnicodeExtraFields = useUnicodeExtraFields; 210 in = new PushbackInputStream(inputStream, buf.capacity()); 211 this.allowStoredEntriesWithDataDescriptor = 212 allowStoredEntriesWithDataDescriptor; 213 // haven't read anything so far 214 buf.limit(0); 215 } 216 217 public ZipArchiveEntry getNextZipEntry() throws IOException { 218 boolean firstEntry = true; 219 if (closed || hitCentralDirectory) { 220 return null; 221 } 222 if (current != null) { 223 closeEntry(); 224 firstEntry = false; 225 } 226 227 try { 228 if (firstEntry) { 229 // split archives have a special signature before the 230 // first local file header - look for it and fail with 231 // the appropriate error message if this is a split 232 // archive. 233 readFirstLocalFileHeader(LFH_BUF); 234 } else { 235 readFully(LFH_BUF); 236 } 237 } catch (EOFException e) { 238 return null; 239 } 240 241 ZipLong sig = new ZipLong(LFH_BUF); 242 if (sig.equals(ZipLong.CFH_SIG) || sig.equals(ZipLong.AED_SIG)) { 243 hitCentralDirectory = true; 244 skipRemainderOfArchive(); 245 } 246 if (!sig.equals(ZipLong.LFH_SIG)) { 247 return null; 248 } 249 250 int off = WORD; 251 current = new CurrentEntry(); 252 253 int versionMadeBy = ZipShort.getValue(LFH_BUF, off); 254 off += SHORT; 255 current.entry.setPlatform((versionMadeBy >> ZipFile.BYTE_SHIFT) & ZipFile.NIBLET_MASK); 256 257 final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(LFH_BUF, off); 258 final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames(); 259 final ZipEncoding entryEncoding = hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding; 260 current.hasDataDescriptor = gpFlag.usesDataDescriptor(); 261 current.entry.setGeneralPurposeBit(gpFlag); 262 263 off += SHORT; 264 265 current.entry.setMethod(ZipShort.getValue(LFH_BUF, off)); 266 off += SHORT; 267 268 long time = ZipUtil.dosToJavaTime(ZipLong.getValue(LFH_BUF, off)); 269 current.entry.setTime(time); 270 off += WORD; 271 272 ZipLong size = null, cSize = null; 273 if (!current.hasDataDescriptor) { 274 current.entry.setCrc(ZipLong.getValue(LFH_BUF, off)); 275 off += WORD; 276 277 cSize = new ZipLong(LFH_BUF, off); 278 off += WORD; 279 280 size = new ZipLong(LFH_BUF, off); 281 off += WORD; 282 } else { 283 off += 3 * WORD; 284 } 285 286 int fileNameLen = ZipShort.getValue(LFH_BUF, off); 287 288 off += SHORT; 289 290 int extraLen = ZipShort.getValue(LFH_BUF, off); 291 off += SHORT; 292 293 byte[] fileName = new byte[fileNameLen]; 294 readFully(fileName); 295 current.entry.setName(entryEncoding.decode(fileName), fileName); 296 297 byte[] extraData = new byte[extraLen]; 298 readFully(extraData); 299 current.entry.setExtra(extraData); 300 301 if (!hasUTF8Flag && useUnicodeExtraFields) { 302 ZipUtil.setNameAndCommentFromExtraFields(current.entry, fileName, null); 303 } 304 305 processZip64Extra(size, cSize); 306 307 if (current.entry.getCompressedSize() != ZipArchiveEntry.SIZE_UNKNOWN) { 308 if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode()) { 309 current.in = new UnshrinkingInputStream(new BoundedInputStream(in, current.entry.getCompressedSize())); 310 } else if (current.entry.getMethod() == ZipMethod.IMPLODING.getCode()) { 311 current.in = new ExplodingInputStream( 312 current.entry.getGeneralPurposeBit().getSlidingDictionarySize(), 313 current.entry.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), 314 new BoundedInputStream(in, current.entry.getCompressedSize())); 315 } 316 } 317 318 entriesRead++; 319 return current.entry; 320 } 321 322 /** 323 * Fills the given array with the first local file header and 324 * deals with splitting/spanning markers that may prefix the first 325 * LFH. 326 */ 327 private void readFirstLocalFileHeader(byte[] lfh) throws IOException { 328 readFully(lfh); 329 ZipLong sig = new ZipLong(lfh); 330 if (sig.equals(ZipLong.DD_SIG)) { 331 throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.SPLITTING); 332 } 333 334 if (sig.equals(ZipLong.SINGLE_SEGMENT_SPLIT_MARKER)) { 335 // The archive is not really split as only one segment was 336 // needed in the end. Just skip over the marker. 337 byte[] missedLfhBytes = new byte[4]; 338 readFully(missedLfhBytes); 339 System.arraycopy(lfh, 4, lfh, 0, LFH_LEN - 4); 340 System.arraycopy(missedLfhBytes, 0, lfh, LFH_LEN - 4, 4); 341 } 342 } 343 344 /** 345 * Records whether a Zip64 extra is present and sets the size 346 * information from it if sizes are 0xFFFFFFFF and the entry 347 * doesn't use a data descriptor. 348 */ 349 private void processZip64Extra(ZipLong size, ZipLong cSize) { 350 Zip64ExtendedInformationExtraField z64 = 351 (Zip64ExtendedInformationExtraField) 352 current.entry.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID); 353 current.usesZip64 = z64 != null; 354 if (!current.hasDataDescriptor) { 355 if (z64 != null // same as current.usesZip64 but avoids NPE warning 356 && (cSize.equals(ZipLong.ZIP64_MAGIC) || size.equals(ZipLong.ZIP64_MAGIC)) ) { 357 current.entry.setCompressedSize(z64.getCompressedSize().getLongValue()); 358 current.entry.setSize(z64.getSize().getLongValue()); 359 } else { 360 current.entry.setCompressedSize(cSize.getValue()); 361 current.entry.setSize(size.getValue()); 362 } 363 } 364 } 365 366 @Override 367 public ArchiveEntry getNextEntry() throws IOException { 368 return getNextZipEntry(); 369 } 370 371 /** 372 * Whether this class is able to read the given entry. 373 * 374 * <p>May return false if it is set up to use encryption or a 375 * compression method that hasn't been implemented yet.</p> 376 * @since 1.1 377 */ 378 @Override 379 public boolean canReadEntryData(ArchiveEntry ae) { 380 if (ae instanceof ZipArchiveEntry) { 381 ZipArchiveEntry ze = (ZipArchiveEntry) ae; 382 return ZipUtil.canHandleEntryData(ze) 383 && supportsDataDescriptorFor(ze); 384 385 } 386 return false; 387 } 388 389 @Override 390 public int read(byte[] buffer, int offset, int length) throws IOException { 391 if (closed) { 392 throw new IOException("The stream is closed"); 393 } 394 395 if (current == null) { 396 return -1; 397 } 398 399 // avoid int overflow, check null buffer 400 if (offset > buffer.length || length < 0 || offset < 0 || buffer.length - offset < length) { 401 throw new ArrayIndexOutOfBoundsException(); 402 } 403 404 ZipUtil.checkRequestedFeatures(current.entry); 405 if (!supportsDataDescriptorFor(current.entry)) { 406 throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.DATA_DESCRIPTOR, 407 current.entry); 408 } 409 410 int read; 411 if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) { 412 read = readStored(buffer, offset, length); 413 } else if (current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED) { 414 read = readDeflated(buffer, offset, length); 415 } else if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode() 416 || current.entry.getMethod() == ZipMethod.IMPLODING.getCode()) { 417 read = current.in.read(buffer, offset, length); 418 } else { 419 throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(current.entry.getMethod()), 420 current.entry); 421 } 422 423 if (read >= 0) { 424 current.crc.update(buffer, offset, read); 425 } 426 427 return read; 428 } 429 430 /** 431 * Implementation of read for STORED entries. 432 */ 433 private int readStored(byte[] buffer, int offset, int length) throws IOException { 434 435 if (current.hasDataDescriptor) { 436 if (lastStoredEntry == null) { 437 readStoredEntry(); 438 } 439 return lastStoredEntry.read(buffer, offset, length); 440 } 441 442 long csize = current.entry.getSize(); 443 if (current.bytesRead >= csize) { 444 return -1; 445 } 446 447 if (buf.position() >= buf.limit()) { 448 buf.position(0); 449 int l = in.read(buf.array()); 450 if (l == -1) { 451 return -1; 452 } 453 buf.limit(l); 454 455 count(l); 456 current.bytesReadFromStream += l; 457 } 458 459 int toRead = Math.min(buf.remaining(), length); 460 if ((csize - current.bytesRead) < toRead) { 461 // if it is smaller than toRead then it fits into an int 462 toRead = (int) (csize - current.bytesRead); 463 } 464 buf.get(buffer, offset, toRead); 465 current.bytesRead += toRead; 466 return toRead; 467 } 468 469 /** 470 * Implementation of read for DEFLATED entries. 471 */ 472 private int readDeflated(byte[] buffer, int offset, int length) throws IOException { 473 int read = readFromInflater(buffer, offset, length); 474 if (read <= 0) { 475 if (inf.finished()) { 476 return -1; 477 } else if (inf.needsDictionary()) { 478 throw new ZipException("This archive needs a preset dictionary" 479 + " which is not supported by Commons" 480 + " Compress."); 481 } else if (read == -1) { 482 throw new IOException("Truncated ZIP file"); 483 } 484 } 485 return read; 486 } 487 488 /** 489 * Potentially reads more bytes to fill the inflater's buffer and 490 * reads from it. 491 */ 492 private int readFromInflater(byte[] buffer, int offset, int length) throws IOException { 493 int read = 0; 494 do { 495 if (inf.needsInput()) { 496 int l = fill(); 497 if (l > 0) { 498 current.bytesReadFromStream += buf.limit(); 499 } else if (l == -1) { 500 return -1; 501 } else { 502 break; 503 } 504 } 505 try { 506 read = inf.inflate(buffer, offset, length); 507 } catch (DataFormatException e) { 508 throw (IOException) new ZipException(e.getMessage()).initCause(e); 509 } 510 } while (read == 0 && inf.needsInput()); 511 return read; 512 } 513 514 @Override 515 public void close() throws IOException { 516 if (!closed) { 517 closed = true; 518 in.close(); 519 inf.end(); 520 } 521 } 522 523 /** 524 * Skips over and discards value bytes of data from this input 525 * stream. 526 * 527 * <p>This implementation may end up skipping over some smaller 528 * number of bytes, possibly 0, if and only if it reaches the end 529 * of the underlying stream.</p> 530 * 531 * <p>The actual number of bytes skipped is returned.</p> 532 * 533 * @param value the number of bytes to be skipped. 534 * @return the actual number of bytes skipped. 535 * @throws IOException - if an I/O error occurs. 536 * @throws IllegalArgumentException - if value is negative. 537 */ 538 @Override 539 public long skip(long value) throws IOException { 540 if (value >= 0) { 541 long skipped = 0; 542 while (skipped < value) { 543 long rem = value - skipped; 544 int x = read(SKIP_BUF, 0, (int) (SKIP_BUF.length > rem ? rem : SKIP_BUF.length)); 545 if (x == -1) { 546 return skipped; 547 } 548 skipped += x; 549 } 550 return skipped; 551 } 552 throw new IllegalArgumentException(); 553 } 554 555 /** 556 * Checks if the signature matches what is expected for a zip file. 557 * Does not currently handle self-extracting zips which may have arbitrary 558 * leading content. 559 * 560 * @param signature the bytes to check 561 * @param length the number of bytes to check 562 * @return true, if this stream is a zip archive stream, false otherwise 563 */ 564 public static boolean matches(byte[] signature, int length) { 565 if (length < ZipArchiveOutputStream.LFH_SIG.length) { 566 return false; 567 } 568 569 return checksig(signature, ZipArchiveOutputStream.LFH_SIG) // normal file 570 || checksig(signature, ZipArchiveOutputStream.EOCD_SIG) // empty zip 571 || checksig(signature, ZipArchiveOutputStream.DD_SIG) // split zip 572 || checksig(signature, ZipLong.SINGLE_SEGMENT_SPLIT_MARKER.getBytes()); 573 } 574 575 private static boolean checksig(byte[] signature, byte[] expected) { 576 for (int i = 0; i < expected.length; i++) { 577 if (signature[i] != expected[i]) { 578 return false; 579 } 580 } 581 return true; 582 } 583 584 /** 585 * Closes the current ZIP archive entry and positions the underlying 586 * stream to the beginning of the next entry. All per-entry variables 587 * and data structures are cleared. 588 * <p> 589 * If the compressed size of this entry is included in the entry header, 590 * then any outstanding bytes are simply skipped from the underlying 591 * stream without uncompressing them. This allows an entry to be safely 592 * closed even if the compression method is unsupported. 593 * <p> 594 * In case we don't know the compressed size of this entry or have 595 * already buffered too much data from the underlying stream to support 596 * uncompression, then the uncompression process is completed and the 597 * end position of the stream is adjusted based on the result of that 598 * process. 599 * 600 * @throws IOException if an error occurs 601 */ 602 private void closeEntry() throws IOException { 603 if (closed) { 604 throw new IOException("The stream is closed"); 605 } 606 if (current == null) { 607 return; 608 } 609 610 // Ensure all entry bytes are read 611 if (current.bytesReadFromStream <= current.entry.getCompressedSize() 612 && !current.hasDataDescriptor) { 613 drainCurrentEntryData(); 614 } else { 615 skip(Long.MAX_VALUE); 616 617 long inB = current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED 618 ? getBytesInflated() : current.bytesRead; 619 620 // this is at most a single read() operation and can't 621 // exceed the range of int 622 int diff = (int) (current.bytesReadFromStream - inB); 623 624 // Pushback any required bytes 625 if (diff > 0) { 626 pushback(buf.array(), buf.limit() - diff, diff); 627 } 628 } 629 630 if (lastStoredEntry == null && current.hasDataDescriptor) { 631 readDataDescriptor(); 632 } 633 634 inf.reset(); 635 buf.clear().flip(); 636 current = null; 637 lastStoredEntry = null; 638 } 639 640 /** 641 * Read all data of the current entry from the underlying stream 642 * that hasn't been read, yet. 643 */ 644 private void drainCurrentEntryData() throws IOException { 645 long remaining = current.entry.getCompressedSize() - current.bytesReadFromStream; 646 while (remaining > 0) { 647 long n = in.read(buf.array(), 0, (int) Math.min(buf.capacity(), remaining)); 648 if (n < 0) { 649 throw new EOFException("Truncated ZIP entry: " + current.entry.getName()); 650 } else { 651 count(n); 652 remaining -= n; 653 } 654 } 655 } 656 657 /** 658 * Get the number of bytes Inflater has actually processed. 659 * 660 * <p>for Java < Java7 the getBytes* methods in 661 * Inflater/Deflater seem to return unsigned ints rather than 662 * longs that start over with 0 at 2^32.</p> 663 * 664 * <p>The stream knows how many bytes it has read, but not how 665 * many the Inflater actually consumed - it should be between the 666 * total number of bytes read for the entry and the total number 667 * minus the last read operation. Here we just try to make the 668 * value close enough to the bytes we've read by assuming the 669 * number of bytes consumed must be smaller than (or equal to) the 670 * number of bytes read but not smaller by more than 2^32.</p> 671 */ 672 private long getBytesInflated() { 673 long inB = inf.getBytesRead(); 674 if (current.bytesReadFromStream >= TWO_EXP_32) { 675 while (inB + TWO_EXP_32 <= current.bytesReadFromStream) { 676 inB += TWO_EXP_32; 677 } 678 } 679 return inB; 680 } 681 682 private int fill() throws IOException { 683 if (closed) { 684 throw new IOException("The stream is closed"); 685 } 686 int length = in.read(buf.array()); 687 if (length > 0) { 688 buf.limit(length); 689 count(buf.limit()); 690 inf.setInput(buf.array(), 0, buf.limit()); 691 } 692 return length; 693 } 694 695 private void readFully(byte[] b) throws IOException { 696 int count = IOUtils.readFully(in, b); 697 count(count); 698 if (count < b.length) { 699 throw new EOFException(); 700 } 701 } 702 703 private void readDataDescriptor() throws IOException { 704 readFully(WORD_BUF); 705 ZipLong val = new ZipLong(WORD_BUF); 706 if (ZipLong.DD_SIG.equals(val)) { 707 // data descriptor with signature, skip sig 708 readFully(WORD_BUF); 709 val = new ZipLong(WORD_BUF); 710 } 711 current.entry.setCrc(val.getValue()); 712 713 // if there is a ZIP64 extra field, sizes are eight bytes 714 // each, otherwise four bytes each. Unfortunately some 715 // implementations - namely Java7 - use eight bytes without 716 // using a ZIP64 extra field - 717 // http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7073588 718 719 // just read 16 bytes and check whether bytes nine to twelve 720 // look like one of the signatures of what could follow a data 721 // descriptor (ignoring archive decryption headers for now). 722 // If so, push back eight bytes and assume sizes are four 723 // bytes, otherwise sizes are eight bytes each. 724 readFully(TWO_DWORD_BUF); 725 ZipLong potentialSig = new ZipLong(TWO_DWORD_BUF, DWORD); 726 if (potentialSig.equals(ZipLong.CFH_SIG) || potentialSig.equals(ZipLong.LFH_SIG)) { 727 pushback(TWO_DWORD_BUF, DWORD, DWORD); 728 current.entry.setCompressedSize(ZipLong.getValue(TWO_DWORD_BUF)); 729 current.entry.setSize(ZipLong.getValue(TWO_DWORD_BUF, WORD)); 730 } else { 731 current.entry.setCompressedSize(ZipEightByteInteger.getLongValue(TWO_DWORD_BUF)); 732 current.entry.setSize(ZipEightByteInteger.getLongValue(TWO_DWORD_BUF, DWORD)); 733 } 734 } 735 736 /** 737 * Whether this entry requires a data descriptor this library can work with. 738 * 739 * @return true if allowStoredEntriesWithDataDescriptor is true, 740 * the entry doesn't require any data descriptor or the method is 741 * DEFLATED. 742 */ 743 private boolean supportsDataDescriptorFor(ZipArchiveEntry entry) { 744 return !entry.getGeneralPurposeBit().usesDataDescriptor() 745 746 || (allowStoredEntriesWithDataDescriptor && entry.getMethod() == ZipEntry.STORED) 747 || entry.getMethod() == ZipEntry.DEFLATED; 748 } 749 750 /** 751 * Caches a stored entry that uses the data descriptor. 752 * 753 * <ul> 754 * <li>Reads a stored entry until the signature of a local file 755 * header, central directory header or data descriptor has been 756 * found.</li> 757 * <li>Stores all entry data in lastStoredEntry.</p> 758 * <li>Rewinds the stream to position at the data 759 * descriptor.</li> 760 * <li>reads the data descriptor</li> 761 * </ul> 762 * 763 * <p>After calling this method the entry should know its size, 764 * the entry's data is cached and the stream is positioned at the 765 * next local file or central directory header.</p> 766 */ 767 private void readStoredEntry() throws IOException { 768 ByteArrayOutputStream bos = new ByteArrayOutputStream(); 769 int off = 0; 770 boolean done = false; 771 772 // length of DD without signature 773 int ddLen = current.usesZip64 ? WORD + 2 * DWORD : 3 * WORD; 774 775 while (!done) { 776 int r = in.read(buf.array(), off, ZipArchiveOutputStream.BUFFER_SIZE - off); 777 if (r <= 0) { 778 // read the whole archive without ever finding a 779 // central directory 780 throw new IOException("Truncated ZIP file"); 781 } 782 if (r + off < 4) { 783 // buffer too small to check for a signature, loop 784 off += r; 785 continue; 786 } 787 788 done = bufferContainsSignature(bos, off, r, ddLen); 789 if (!done) { 790 off = cacheBytesRead(bos, off, r, ddLen); 791 } 792 } 793 794 byte[] b = bos.toByteArray(); 795 lastStoredEntry = new ByteArrayInputStream(b); 796 } 797 798 private static final byte[] LFH = ZipLong.LFH_SIG.getBytes(); 799 private static final byte[] CFH = ZipLong.CFH_SIG.getBytes(); 800 private static final byte[] DD = ZipLong.DD_SIG.getBytes(); 801 802 /** 803 * Checks whether the current buffer contains the signature of a 804 * "data descriptor", "local file header" or 805 * "central directory entry". 806 * 807 * <p>If it contains such a signature, reads the data descriptor 808 * and positions the stream right after the data descriptor.</p> 809 */ 810 private boolean bufferContainsSignature(ByteArrayOutputStream bos, int offset, int lastRead, int expectedDDLen) 811 throws IOException { 812 813 boolean done = false; 814 int readTooMuch = 0; 815 for (int i = 0; !done && i < lastRead - 4; i++) { 816 if (buf.array()[i] == LFH[0] && buf.array()[i + 1] == LFH[1]) { 817 if ((buf.array()[i + 2] == LFH[2] && buf.array()[i + 3] == LFH[3]) 818 || (buf.array()[i] == CFH[2] && buf.array()[i + 3] == CFH[3])) { 819 // found a LFH or CFH: 820 readTooMuch = offset + lastRead - i - expectedDDLen; 821 done = true; 822 } 823 else if (buf.array()[i + 2] == DD[2] && buf.array()[i + 3] == DD[3]) { 824 // found DD: 825 readTooMuch = offset + lastRead - i; 826 done = true; 827 } 828 if (done) { 829 // * push back bytes read in excess as well as the data 830 // descriptor 831 // * copy the remaining bytes to cache 832 // * read data descriptor 833 pushback(buf.array(), offset + lastRead - readTooMuch, readTooMuch); 834 bos.write(buf.array(), 0, i); 835 readDataDescriptor(); 836 } 837 } 838 } 839 return done; 840 } 841 842 /** 843 * If the last read bytes could hold a data descriptor and an 844 * incomplete signature then save the last bytes to the front of 845 * the buffer and cache everything in front of the potential data 846 * descriptor into the given ByteArrayOutputStream. 847 * 848 * <p>Data descriptor plus incomplete signature (3 bytes in the 849 * worst case) can be 20 bytes max.</p> 850 */ 851 private int cacheBytesRead(ByteArrayOutputStream bos, int offset, int lastRead, int expecteDDLen) { 852 final int cacheable = offset + lastRead - expecteDDLen - 3; 853 if (cacheable > 0) { 854 bos.write(buf.array(), 0, cacheable); 855 System.arraycopy(buf.array(), cacheable, buf.array(), 0, expecteDDLen + 3); 856 offset = expecteDDLen + 3; 857 } else { 858 offset += lastRead; 859 } 860 return offset; 861 } 862 863 private void pushback(byte[] buf, int offset, int length) throws IOException { 864 ((PushbackInputStream) in).unread(buf, offset, length); 865 pushedBackBytes(length); 866 } 867 868 // End of Central Directory Record 869 // end of central dir signature WORD 870 // number of this disk SHORT 871 // number of the disk with the 872 // start of the central directory SHORT 873 // total number of entries in the 874 // central directory on this disk SHORT 875 // total number of entries in 876 // the central directory SHORT 877 // size of the central directory WORD 878 // offset of start of central 879 // directory with respect to 880 // the starting disk number WORD 881 // .ZIP file comment length SHORT 882 // .ZIP file comment up to 64KB 883 // 884 885 /** 886 * Reads the stream until it find the "End of central directory 887 * record" and consumes it as well. 888 */ 889 private void skipRemainderOfArchive() throws IOException { 890 // skip over central directory. One LFH has been read too much 891 // already. The calculation discounts file names and extra 892 // data so it will be too short. 893 realSkip(entriesRead * CFH_LEN - LFH_LEN); 894 findEocdRecord(); 895 realSkip(ZipFile.MIN_EOCD_SIZE - WORD /* signature */ - SHORT /* comment len */); 896 readFully(SHORT_BUF); 897 // file comment 898 realSkip(ZipShort.getValue(SHORT_BUF)); 899 } 900 901 /** 902 * Reads forward until the signature of the "End of central 903 * directory" record is found. 904 */ 905 private void findEocdRecord() throws IOException { 906 int currentByte = -1; 907 boolean skipReadCall = false; 908 while (skipReadCall || (currentByte = readOneByte()) > -1) { 909 skipReadCall = false; 910 if (!isFirstByteOfEocdSig(currentByte)) { 911 continue; 912 } 913 currentByte = readOneByte(); 914 if (currentByte != ZipArchiveOutputStream.EOCD_SIG[1]) { 915 if (currentByte == -1) { 916 break; 917 } 918 skipReadCall = isFirstByteOfEocdSig(currentByte); 919 continue; 920 } 921 currentByte = readOneByte(); 922 if (currentByte != ZipArchiveOutputStream.EOCD_SIG[2]) { 923 if (currentByte == -1) { 924 break; 925 } 926 skipReadCall = isFirstByteOfEocdSig(currentByte); 927 continue; 928 } 929 currentByte = readOneByte(); 930 if (currentByte == -1 931 || currentByte == ZipArchiveOutputStream.EOCD_SIG[3]) { 932 break; 933 } 934 skipReadCall = isFirstByteOfEocdSig(currentByte); 935 } 936 } 937 938 /** 939 * Skips bytes by reading from the underlying stream rather than 940 * the (potentially inflating) archive stream - which {@link 941 * #skip} would do. 942 * 943 * Also updates bytes-read counter. 944 */ 945 private void realSkip(long value) throws IOException { 946 if (value >= 0) { 947 long skipped = 0; 948 while (skipped < value) { 949 long rem = value - skipped; 950 int x = in.read(SKIP_BUF, 0, (int) (SKIP_BUF.length > rem ? rem : SKIP_BUF.length)); 951 if (x == -1) { 952 return; 953 } 954 count(x); 955 skipped += x; 956 } 957 return; 958 } 959 throw new IllegalArgumentException(); 960 } 961 962 /** 963 * Reads bytes by reading from the underlying stream rather than 964 * the (potentially inflating) archive stream - which {@link #read} would do. 965 * 966 * Also updates bytes-read counter. 967 */ 968 private int readOneByte() throws IOException { 969 int b = in.read(); 970 if (b != -1) { 971 count(1); 972 } 973 return b; 974 } 975 976 private boolean isFirstByteOfEocdSig(int b) { 977 return b == ZipArchiveOutputStream.EOCD_SIG[0]; 978 } 979 980 /** 981 * Structure collecting information for the entry that is 982 * currently being read. 983 */ 984 private static final class CurrentEntry { 985 986 /** 987 * Current ZIP entry. 988 */ 989 private final ZipArchiveEntry entry = new ZipArchiveEntry(); 990 991 /** 992 * Does the entry use a data descriptor? 993 */ 994 private boolean hasDataDescriptor; 995 996 /** 997 * Does the entry have a ZIP64 extended information extra field. 998 */ 999 private boolean usesZip64; 1000 1001 /** 1002 * Number of bytes of entry content read by the client if the 1003 * entry is STORED. 1004 */ 1005 private long bytesRead; 1006 1007 /** 1008 * Number of bytes of entry content read so from the stream. 1009 * 1010 * <p>This may be more than the actual entry's length as some 1011 * stuff gets buffered up and needs to be pushed back when the 1012 * end of the entry has been reached.</p> 1013 */ 1014 private long bytesReadFromStream; 1015 1016 /** 1017 * The checksum calculated as the current entry is read. 1018 */ 1019 private final CRC32 crc = new CRC32(); 1020 1021 /** 1022 * The input stream decompressing the data for shrunk and imploded entries. 1023 */ 1024 private InputStream in; 1025 } 1026 1027 /** 1028 * Bounded input stream adapted from commons-io 1029 */ 1030 private class BoundedInputStream extends InputStream { 1031 1032 /** the wrapped input stream */ 1033 private final InputStream in; 1034 1035 /** the max length to provide */ 1036 private final long max; 1037 1038 /** the number of bytes already returned */ 1039 private long pos = 0; 1040 1041 /** 1042 * Creates a new <code>BoundedInputStream</code> that wraps the given input 1043 * stream and limits it to a certain size. 1044 * 1045 * @param in The wrapped input stream 1046 * @param size The maximum number of bytes to return 1047 */ 1048 public BoundedInputStream(final InputStream in, final long size) { 1049 this.max = size; 1050 this.in = in; 1051 } 1052 1053 @Override 1054 public int read() throws IOException { 1055 if (max >= 0 && pos >= max) { 1056 return -1; 1057 } 1058 final int result = in.read(); 1059 pos++; 1060 count(1); 1061 current.bytesReadFromStream++; 1062 return result; 1063 } 1064 1065 @Override 1066 public int read(final byte[] b) throws IOException { 1067 return this.read(b, 0, b.length); 1068 } 1069 1070 @Override 1071 public int read(final byte[] b, final int off, final int len) throws IOException { 1072 if (max >= 0 && pos >= max) { 1073 return -1; 1074 } 1075 final long maxRead = max >= 0 ? Math.min(len, max - pos) : len; 1076 final int bytesRead = in.read(b, off, (int) maxRead); 1077 1078 if (bytesRead == -1) { 1079 return -1; 1080 } 1081 1082 pos += bytesRead; 1083 count(bytesRead); 1084 current.bytesReadFromStream += bytesRead; 1085 return bytesRead; 1086 } 1087 1088 @Override 1089 public long skip(final long n) throws IOException { 1090 final long toSkip = max >= 0 ? Math.min(n, max - pos) : n; 1091 final long skippedBytes = in.skip(toSkip); 1092 pos += skippedBytes; 1093 return skippedBytes; 1094 } 1095 1096 @Override 1097 public int available() throws IOException { 1098 if (max >= 0 && pos >= max) { 1099 return 0; 1100 } 1101 return in.available(); 1102 } 1103 } 1104}