001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 * 017 */ 018package org.apache.commons.compress.archivers.zip; 019 020import java.io.BufferedInputStream; 021import java.io.Closeable; 022import java.io.EOFException; 023import java.io.File; 024import java.io.IOException; 025import java.io.InputStream; 026import java.io.RandomAccessFile; 027import java.util.Arrays; 028import java.util.Collections; 029import java.util.Comparator; 030import java.util.Enumeration; 031import java.util.HashMap; 032import java.util.LinkedList; 033import java.util.List; 034import java.util.Map; 035import java.util.zip.Inflater; 036import java.util.zip.InflaterInputStream; 037import java.util.zip.ZipException; 038 039import org.apache.commons.compress.utils.IOUtils; 040 041import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD; 042import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT; 043import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD; 044import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC; 045import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC_SHORT; 046 047/** 048 * Replacement for <code>java.util.ZipFile</code>. 049 * 050 * <p>This class adds support for file name encodings other than UTF-8 051 * (which is required to work on ZIP files created by native zip tools 052 * and is able to skip a preamble like the one found in self 053 * extracting archives. Furthermore it returns instances of 054 * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code> 055 * instead of <code>java.util.zip.ZipEntry</code>.</p> 056 * 057 * <p>It doesn't extend <code>java.util.zip.ZipFile</code> as it would 058 * have to reimplement all methods anyway. Like 059 * <code>java.util.ZipFile</code>, it uses RandomAccessFile under the 060 * covers and supports compressed and uncompressed entries. As of 061 * Apache Commons Compress 1.3 it also transparently supports Zip64 062 * extensions and thus individual entries and archives larger than 4 063 * GB or with more than 65536 entries.</p> 064 * 065 * <p>The method signatures mimic the ones of 066 * <code>java.util.zip.ZipFile</code>, with a couple of exceptions: 067 * 068 * <ul> 069 * <li>There is no getName method.</li> 070 * <li>entries has been renamed to getEntries.</li> 071 * <li>getEntries and getEntry return 072 * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code> 073 * instances.</li> 074 * <li>close is allowed to throw IOException.</li> 075 * </ul> 076 * 077 */ 078public class ZipFile implements Closeable { 079 private static final int HASH_SIZE = 509; 080 static final int NIBLET_MASK = 0x0f; 081 static final int BYTE_SHIFT = 8; 082 private static final int POS_0 = 0; 083 private static final int POS_1 = 1; 084 private static final int POS_2 = 2; 085 private static final int POS_3 = 3; 086 087 /** 088 * List of entries in the order they appear inside the central 089 * directory. 090 */ 091 private final List<ZipArchiveEntry> entries = 092 new LinkedList<ZipArchiveEntry>(); 093 094 /** 095 * Maps String to list of ZipArchiveEntrys, name -> actual entries. 096 */ 097 private final Map<String, LinkedList<ZipArchiveEntry>> nameMap = 098 new HashMap<String, LinkedList<ZipArchiveEntry>>(HASH_SIZE); 099 100 private static final class OffsetEntry { 101 private long headerOffset = -1; 102 private long dataOffset = -1; 103 } 104 105 /** 106 * The encoding to use for filenames and the file comment. 107 * 108 * <p>For a list of possible values see <a 109 * href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>. 110 * Defaults to UTF-8.</p> 111 */ 112 private final String encoding; 113 114 /** 115 * The zip encoding to use for filenames and the file comment. 116 */ 117 private final ZipEncoding zipEncoding; 118 119 /** 120 * File name of actual source. 121 */ 122 private final String archiveName; 123 124 /** 125 * The actual data source. 126 */ 127 private final RandomAccessFile archive; 128 129 /** 130 * Whether to look for and use Unicode extra fields. 131 */ 132 private final boolean useUnicodeExtraFields; 133 134 /** 135 * Whether the file is closed. 136 */ 137 private volatile boolean closed = true; 138 139 // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection) 140 private final byte[] DWORD_BUF = new byte[DWORD]; 141 private final byte[] WORD_BUF = new byte[WORD]; 142 private final byte[] CFH_BUF = new byte[CFH_LEN]; 143 private final byte[] SHORT_BUF = new byte[SHORT]; 144 145 /** 146 * Opens the given file for reading, assuming "UTF8" for file names. 147 * 148 * @param f the archive. 149 * 150 * @throws IOException if an error occurs while reading the file. 151 */ 152 public ZipFile(File f) throws IOException { 153 this(f, ZipEncodingHelper.UTF8); 154 } 155 156 /** 157 * Opens the given file for reading, assuming "UTF8". 158 * 159 * @param name name of the archive. 160 * 161 * @throws IOException if an error occurs while reading the file. 162 */ 163 public ZipFile(String name) throws IOException { 164 this(new File(name), ZipEncodingHelper.UTF8); 165 } 166 167 /** 168 * Opens the given file for reading, assuming the specified 169 * encoding for file names, scanning unicode extra fields. 170 * 171 * @param name name of the archive. 172 * @param encoding the encoding to use for file names, use null 173 * for the platform's default encoding 174 * 175 * @throws IOException if an error occurs while reading the file. 176 */ 177 public ZipFile(String name, String encoding) throws IOException { 178 this(new File(name), encoding, true); 179 } 180 181 /** 182 * Opens the given file for reading, assuming the specified 183 * encoding for file names and scanning for unicode extra fields. 184 * 185 * @param f the archive. 186 * @param encoding the encoding to use for file names, use null 187 * for the platform's default encoding 188 * 189 * @throws IOException if an error occurs while reading the file. 190 */ 191 public ZipFile(File f, String encoding) throws IOException { 192 this(f, encoding, true); 193 } 194 195 /** 196 * Opens the given file for reading, assuming the specified 197 * encoding for file names. 198 * 199 * @param f the archive. 200 * @param encoding the encoding to use for file names, use null 201 * for the platform's default encoding 202 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 203 * Extra Fields (if present) to set the file names. 204 * 205 * @throws IOException if an error occurs while reading the file. 206 */ 207 public ZipFile(File f, String encoding, boolean useUnicodeExtraFields) 208 throws IOException { 209 this.archiveName = f.getAbsolutePath(); 210 this.encoding = encoding; 211 this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 212 this.useUnicodeExtraFields = useUnicodeExtraFields; 213 archive = new RandomAccessFile(f, "r"); 214 boolean success = false; 215 try { 216 Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag = 217 populateFromCentralDirectory(); 218 resolveLocalFileHeaderData(entriesWithoutUTF8Flag); 219 success = true; 220 } finally { 221 closed = !success; 222 if (!success) { 223 IOUtils.closeQuietly(archive); 224 } 225 } 226 } 227 228 /** 229 * The encoding to use for filenames and the file comment. 230 * 231 * @return null if using the platform's default character encoding. 232 */ 233 public String getEncoding() { 234 return encoding; 235 } 236 237 /** 238 * Closes the archive. 239 * @throws IOException if an error occurs closing the archive. 240 */ 241 public void close() throws IOException { 242 // this flag is only written here and read in finalize() which 243 // can never be run in parallel. 244 // no synchronization needed. 245 closed = true; 246 247 archive.close(); 248 } 249 250 /** 251 * close a zipfile quietly; throw no io fault, do nothing 252 * on a null parameter 253 * @param zipfile file to close, can be null 254 */ 255 public static void closeQuietly(ZipFile zipfile) { 256 IOUtils.closeQuietly(zipfile); 257 } 258 259 /** 260 * Returns all entries. 261 * 262 * <p>Entries will be returned in the same order they appear 263 * within the archive's central directory.</p> 264 * 265 * @return all entries as {@link ZipArchiveEntry} instances 266 */ 267 public Enumeration<ZipArchiveEntry> getEntries() { 268 return Collections.enumeration(entries); 269 } 270 271 /** 272 * Returns all entries in physical order. 273 * 274 * <p>Entries will be returned in the same order their contents 275 * appear within the archive.</p> 276 * 277 * @return all entries as {@link ZipArchiveEntry} instances 278 * 279 * @since 1.1 280 */ 281 public Enumeration<ZipArchiveEntry> getEntriesInPhysicalOrder() { 282 ZipArchiveEntry[] allEntries = entries.toArray(new ZipArchiveEntry[entries.size()]); 283 Arrays.sort(allEntries, OFFSET_COMPARATOR); 284 return Collections.enumeration(Arrays.asList(allEntries)); 285 } 286 287 /** 288 * Returns a named entry - or {@code null} if no entry by 289 * that name exists. 290 * 291 * <p>If multiple entries with the same name exist the first entry 292 * in the archive's central directory by that name is 293 * returned.</p> 294 * 295 * @param name name of the entry. 296 * @return the ZipArchiveEntry corresponding to the given name - or 297 * {@code null} if not present. 298 */ 299 public ZipArchiveEntry getEntry(String name) { 300 LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.get(name); 301 return entriesOfThatName != null ? entriesOfThatName.getFirst() : null; 302 } 303 304 /** 305 * Returns all named entries in the same order they appear within 306 * the archive's central directory. 307 * 308 * @param name name of the entry. 309 * @return the Iterable<ZipArchiveEntry> corresponding to the 310 * given name 311 * @since 1.6 312 */ 313 public Iterable<ZipArchiveEntry> getEntries(String name) { 314 List<ZipArchiveEntry> entriesOfThatName = nameMap.get(name); 315 return entriesOfThatName != null ? entriesOfThatName 316 : Collections.<ZipArchiveEntry>emptyList(); 317 } 318 319 /** 320 * Returns all named entries in the same order their contents 321 * appear within the archive. 322 * 323 * @param name name of the entry. 324 * @return the Iterable<ZipArchiveEntry> corresponding to the 325 * given name 326 * @since 1.6 327 */ 328 public Iterable<ZipArchiveEntry> getEntriesInPhysicalOrder(String name) { 329 ZipArchiveEntry[] entriesOfThatName = new ZipArchiveEntry[0]; 330 if (nameMap.containsKey(name)) { 331 entriesOfThatName = nameMap.get(name).toArray(entriesOfThatName); 332 Arrays.sort(entriesOfThatName, OFFSET_COMPARATOR); 333 } 334 return Arrays.asList(entriesOfThatName); 335 } 336 337 /** 338 * Whether this class is able to read the given entry. 339 * 340 * <p>May return false if it is set up to use encryption or a 341 * compression method that hasn't been implemented yet.</p> 342 * @since 1.1 343 * @param ze the entry 344 * @return whether this class is able to read the given entry. 345 */ 346 public boolean canReadEntryData(ZipArchiveEntry ze) { 347 return ZipUtil.canHandleEntryData(ze); 348 } 349 350 /** 351 * Expose the raw stream of the archive entry (compressed form) 352 * <p/> 353 * This method does not relate to how/if we understand the payload in the 354 * stream, since we really only intend to move it on to somewhere else. 355 * 356 * @param ze The entry to get the stream for 357 * @return The raw input stream containing (possibly) compressed data. 358 */ 359 private InputStream getRawInputStream(ZipArchiveEntry ze) { 360 if (!(ze instanceof Entry)) { 361 return null; 362 } 363 OffsetEntry offsetEntry = ((Entry) ze).getOffsetEntry(); 364 long start = offsetEntry.dataOffset; 365 return new BoundedInputStream(start, ze.getCompressedSize()); 366 } 367 368 369 /** 370 * Transfer selected entries from this zipfile to a given #ZipArchiveOutputStream. 371 * Compression and all other attributes will be as in this file. 372 * This method transfers entries based on the central directory of the zip file. 373 * 374 * @param target The zipArchiveOutputStream to write the entries to 375 * @param predicate A predicate that selects which entries to write 376 * @throws IOException on error 377 */ 378 public void copyRawEntries(ZipArchiveOutputStream target, ZipArchiveEntryPredicate predicate) 379 throws IOException { 380 Enumeration<ZipArchiveEntry> src = getEntriesInPhysicalOrder(); 381 while (src.hasMoreElements()) { 382 ZipArchiveEntry entry = src.nextElement(); 383 if (predicate.test( entry)) { 384 target.addRawArchiveEntry(entry, getRawInputStream(entry)); 385 } 386 } 387 } 388 389 /** 390 * Returns an InputStream for reading the contents of the given entry. 391 * 392 * @param ze the entry to get the stream for. 393 * @return a stream to read the entry from. 394 * @throws IOException if unable to create an input stream from the zipentry 395 * @throws ZipException if the zipentry uses an unsupported feature 396 */ 397 public InputStream getInputStream(ZipArchiveEntry ze) 398 throws IOException, ZipException { 399 if (!(ze instanceof Entry)) { 400 return null; 401 } 402 // cast valididty is checked just above 403 OffsetEntry offsetEntry = ((Entry) ze).getOffsetEntry(); 404 ZipUtil.checkRequestedFeatures(ze); 405 long start = offsetEntry.dataOffset; 406 BoundedInputStream bis = 407 new BoundedInputStream(start, ze.getCompressedSize()); 408 switch (ZipMethod.getMethodByCode(ze.getMethod())) { 409 case STORED: 410 return bis; 411 case UNSHRINKING: 412 return new UnshrinkingInputStream(bis); 413 case IMPLODING: 414 return new ExplodingInputStream(ze.getGeneralPurposeBit().getSlidingDictionarySize(), 415 ze.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), new BufferedInputStream(bis)); 416 case DEFLATED: 417 bis.addDummy(); 418 final Inflater inflater = new Inflater(true); 419 return new InflaterInputStream(bis, inflater) { 420 @Override 421 public void close() throws IOException { 422 super.close(); 423 inflater.end(); 424 } 425 }; 426 default: 427 throw new ZipException("Found unsupported compression method " 428 + ze.getMethod()); 429 } 430 } 431 432 /** 433 * <p> 434 * Convenience method to return the entry's content as a String if isUnixSymlink() 435 * returns true for it, otherwise returns null. 436 * </p> 437 * 438 * <p>This method assumes the symbolic link's file name uses the 439 * same encoding that as been specified for this ZipFile.</p> 440 * 441 * @param entry ZipArchiveEntry object that represents the symbolic link 442 * @return entry's content as a String 443 * @throws IOException problem with content's input stream 444 * @since 1.5 445 */ 446 public String getUnixSymlink(ZipArchiveEntry entry) throws IOException { 447 if (entry != null && entry.isUnixSymlink()) { 448 InputStream in = null; 449 try { 450 in = getInputStream(entry); 451 byte[] symlinkBytes = IOUtils.toByteArray(in); 452 return zipEncoding.decode(symlinkBytes); 453 } finally { 454 if (in != null) { 455 in.close(); 456 } 457 } 458 } else { 459 return null; 460 } 461 } 462 463 /** 464 * Ensures that the close method of this zipfile is called when 465 * there are no more references to it. 466 * @see #close() 467 */ 468 @Override 469 protected void finalize() throws Throwable { 470 try { 471 if (!closed) { 472 System.err.println("Cleaning up unclosed ZipFile for archive " 473 + archiveName); 474 close(); 475 } 476 } finally { 477 super.finalize(); 478 } 479 } 480 481 /** 482 * Length of a "central directory" entry structure without file 483 * name, extra fields or comment. 484 */ 485 private static final int CFH_LEN = 486 /* version made by */ SHORT 487 /* version needed to extract */ + SHORT 488 /* general purpose bit flag */ + SHORT 489 /* compression method */ + SHORT 490 /* last mod file time */ + SHORT 491 /* last mod file date */ + SHORT 492 /* crc-32 */ + WORD 493 /* compressed size */ + WORD 494 /* uncompressed size */ + WORD 495 /* filename length */ + SHORT 496 /* extra field length */ + SHORT 497 /* file comment length */ + SHORT 498 /* disk number start */ + SHORT 499 /* internal file attributes */ + SHORT 500 /* external file attributes */ + WORD 501 /* relative offset of local header */ + WORD; 502 503 private static final long CFH_SIG = 504 ZipLong.getValue(ZipArchiveOutputStream.CFH_SIG); 505 506 /** 507 * Reads the central directory of the given archive and populates 508 * the internal tables with ZipArchiveEntry instances. 509 * 510 * <p>The ZipArchiveEntrys will know all data that can be obtained from 511 * the central directory alone, but not the data that requires the 512 * local file header or additional data to be read.</p> 513 * 514 * @return a map of zipentries that didn't have the language 515 * encoding flag set when read. 516 */ 517 private Map<ZipArchiveEntry, NameAndComment> populateFromCentralDirectory() 518 throws IOException { 519 HashMap<ZipArchiveEntry, NameAndComment> noUTF8Flag = 520 new HashMap<ZipArchiveEntry, NameAndComment>(); 521 522 positionAtCentralDirectory(); 523 524 archive.readFully(WORD_BUF); 525 long sig = ZipLong.getValue(WORD_BUF); 526 527 if (sig != CFH_SIG && startsWithLocalFileHeader()) { 528 throw new IOException("central directory is empty, can't expand" 529 + " corrupt archive."); 530 } 531 532 while (sig == CFH_SIG) { 533 readCentralDirectoryEntry(noUTF8Flag); 534 archive.readFully(WORD_BUF); 535 sig = ZipLong.getValue(WORD_BUF); 536 } 537 return noUTF8Flag; 538 } 539 540 /** 541 * Reads an individual entry of the central directory, creats an 542 * ZipArchiveEntry from it and adds it to the global maps. 543 * 544 * @param noUTF8Flag map used to collect entries that don't have 545 * their UTF-8 flag set and whose name will be set by data read 546 * from the local file header later. The current entry may be 547 * added to this map. 548 */ 549 private void 550 readCentralDirectoryEntry(Map<ZipArchiveEntry, NameAndComment> noUTF8Flag) 551 throws IOException { 552 archive.readFully(CFH_BUF); 553 int off = 0; 554 OffsetEntry offset = new OffsetEntry(); 555 Entry ze = new Entry(offset); 556 557 int versionMadeBy = ZipShort.getValue(CFH_BUF, off); 558 off += SHORT; 559 ze.setPlatform((versionMadeBy >> BYTE_SHIFT) & NIBLET_MASK); 560 561 off += SHORT; // skip version info 562 563 final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(CFH_BUF, off); 564 final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames(); 565 final ZipEncoding entryEncoding = 566 hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding; 567 ze.setGeneralPurposeBit(gpFlag); 568 569 off += SHORT; 570 571 ze.setMethod(ZipShort.getValue(CFH_BUF, off)); 572 off += SHORT; 573 574 long time = ZipUtil.dosToJavaTime(ZipLong.getValue(CFH_BUF, off)); 575 ze.setTime(time); 576 off += WORD; 577 578 ze.setCrc(ZipLong.getValue(CFH_BUF, off)); 579 off += WORD; 580 581 ze.setCompressedSize(ZipLong.getValue(CFH_BUF, off)); 582 off += WORD; 583 584 ze.setSize(ZipLong.getValue(CFH_BUF, off)); 585 off += WORD; 586 587 int fileNameLen = ZipShort.getValue(CFH_BUF, off); 588 off += SHORT; 589 590 int extraLen = ZipShort.getValue(CFH_BUF, off); 591 off += SHORT; 592 593 int commentLen = ZipShort.getValue(CFH_BUF, off); 594 off += SHORT; 595 596 int diskStart = ZipShort.getValue(CFH_BUF, off); 597 off += SHORT; 598 599 ze.setInternalAttributes(ZipShort.getValue(CFH_BUF, off)); 600 off += SHORT; 601 602 ze.setExternalAttributes(ZipLong.getValue(CFH_BUF, off)); 603 off += WORD; 604 605 byte[] fileName = new byte[fileNameLen]; 606 archive.readFully(fileName); 607 ze.setName(entryEncoding.decode(fileName), fileName); 608 609 // LFH offset, 610 offset.headerOffset = ZipLong.getValue(CFH_BUF, off); 611 // data offset will be filled later 612 entries.add(ze); 613 614 byte[] cdExtraData = new byte[extraLen]; 615 archive.readFully(cdExtraData); 616 ze.setCentralDirectoryExtra(cdExtraData); 617 618 setSizesAndOffsetFromZip64Extra(ze, offset, diskStart); 619 620 byte[] comment = new byte[commentLen]; 621 archive.readFully(comment); 622 ze.setComment(entryEncoding.decode(comment)); 623 624 if (!hasUTF8Flag && useUnicodeExtraFields) { 625 noUTF8Flag.put(ze, new NameAndComment(fileName, comment)); 626 } 627 } 628 629 /** 630 * If the entry holds a Zip64 extended information extra field, 631 * read sizes from there if the entry's sizes are set to 632 * 0xFFFFFFFFF, do the same for the offset of the local file 633 * header. 634 * 635 * <p>Ensures the Zip64 extra either knows both compressed and 636 * uncompressed size or neither of both as the internal logic in 637 * ExtraFieldUtils forces the field to create local header data 638 * even if they are never used - and here a field with only one 639 * size would be invalid.</p> 640 */ 641 private void setSizesAndOffsetFromZip64Extra(ZipArchiveEntry ze, 642 OffsetEntry offset, 643 int diskStart) 644 throws IOException { 645 Zip64ExtendedInformationExtraField z64 = 646 (Zip64ExtendedInformationExtraField) 647 ze.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID); 648 if (z64 != null) { 649 boolean hasUncompressedSize = ze.getSize() == ZIP64_MAGIC; 650 boolean hasCompressedSize = ze.getCompressedSize() == ZIP64_MAGIC; 651 boolean hasRelativeHeaderOffset = 652 offset.headerOffset == ZIP64_MAGIC; 653 z64.reparseCentralDirectoryData(hasUncompressedSize, 654 hasCompressedSize, 655 hasRelativeHeaderOffset, 656 diskStart == ZIP64_MAGIC_SHORT); 657 658 if (hasUncompressedSize) { 659 ze.setSize(z64.getSize().getLongValue()); 660 } else if (hasCompressedSize) { 661 z64.setSize(new ZipEightByteInteger(ze.getSize())); 662 } 663 664 if (hasCompressedSize) { 665 ze.setCompressedSize(z64.getCompressedSize().getLongValue()); 666 } else if (hasUncompressedSize) { 667 z64.setCompressedSize(new ZipEightByteInteger(ze.getCompressedSize())); 668 } 669 670 if (hasRelativeHeaderOffset) { 671 offset.headerOffset = 672 z64.getRelativeHeaderOffset().getLongValue(); 673 } 674 } 675 } 676 677 /** 678 * Length of the "End of central directory record" - which is 679 * supposed to be the last structure of the archive - without file 680 * comment. 681 */ 682 static final int MIN_EOCD_SIZE = 683 /* end of central dir signature */ WORD 684 /* number of this disk */ + SHORT 685 /* number of the disk with the */ 686 /* start of the central directory */ + SHORT 687 /* total number of entries in */ 688 /* the central dir on this disk */ + SHORT 689 /* total number of entries in */ 690 /* the central dir */ + SHORT 691 /* size of the central directory */ + WORD 692 /* offset of start of central */ 693 /* directory with respect to */ 694 /* the starting disk number */ + WORD 695 /* zipfile comment length */ + SHORT; 696 697 /** 698 * Maximum length of the "End of central directory record" with a 699 * file comment. 700 */ 701 private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE 702 /* maximum length of zipfile comment */ + ZIP64_MAGIC_SHORT; 703 704 /** 705 * Offset of the field that holds the location of the first 706 * central directory entry inside the "End of central directory 707 * record" relative to the start of the "End of central directory 708 * record". 709 */ 710 private static final int CFD_LOCATOR_OFFSET = 711 /* end of central dir signature */ WORD 712 /* number of this disk */ + SHORT 713 /* number of the disk with the */ 714 /* start of the central directory */ + SHORT 715 /* total number of entries in */ 716 /* the central dir on this disk */ + SHORT 717 /* total number of entries in */ 718 /* the central dir */ + SHORT 719 /* size of the central directory */ + WORD; 720 721 /** 722 * Length of the "Zip64 end of central directory locator" - which 723 * should be right in front of the "end of central directory 724 * record" if one is present at all. 725 */ 726 private static final int ZIP64_EOCDL_LENGTH = 727 /* zip64 end of central dir locator sig */ WORD 728 /* number of the disk with the start */ 729 /* start of the zip64 end of */ 730 /* central directory */ + WORD 731 /* relative offset of the zip64 */ 732 /* end of central directory record */ + DWORD 733 /* total number of disks */ + WORD; 734 735 /** 736 * Offset of the field that holds the location of the "Zip64 end 737 * of central directory record" inside the "Zip64 end of central 738 * directory locator" relative to the start of the "Zip64 end of 739 * central directory locator". 740 */ 741 private static final int ZIP64_EOCDL_LOCATOR_OFFSET = 742 /* zip64 end of central dir locator sig */ WORD 743 /* number of the disk with the start */ 744 /* start of the zip64 end of */ 745 /* central directory */ + WORD; 746 747 /** 748 * Offset of the field that holds the location of the first 749 * central directory entry inside the "Zip64 end of central 750 * directory record" relative to the start of the "Zip64 end of 751 * central directory record". 752 */ 753 private static final int ZIP64_EOCD_CFD_LOCATOR_OFFSET = 754 /* zip64 end of central dir */ 755 /* signature */ WORD 756 /* size of zip64 end of central */ 757 /* directory record */ + DWORD 758 /* version made by */ + SHORT 759 /* version needed to extract */ + SHORT 760 /* number of this disk */ + WORD 761 /* number of the disk with the */ 762 /* start of the central directory */ + WORD 763 /* total number of entries in the */ 764 /* central directory on this disk */ + DWORD 765 /* total number of entries in the */ 766 /* central directory */ + DWORD 767 /* size of the central directory */ + DWORD; 768 769 /** 770 * Searches for either the "Zip64 end of central directory 771 * locator" or the "End of central dir record", parses 772 * it and positions the stream at the first central directory 773 * record. 774 */ 775 private void positionAtCentralDirectory() 776 throws IOException { 777 positionAtEndOfCentralDirectoryRecord(); 778 boolean found = false; 779 boolean searchedForZip64EOCD = 780 archive.getFilePointer() > ZIP64_EOCDL_LENGTH; 781 if (searchedForZip64EOCD) { 782 archive.seek(archive.getFilePointer() - ZIP64_EOCDL_LENGTH); 783 archive.readFully(WORD_BUF); 784 found = Arrays.equals(ZipArchiveOutputStream.ZIP64_EOCD_LOC_SIG, 785 WORD_BUF); 786 } 787 if (!found) { 788 // not a ZIP64 archive 789 if (searchedForZip64EOCD) { 790 skipBytes(ZIP64_EOCDL_LENGTH - WORD); 791 } 792 positionAtCentralDirectory32(); 793 } else { 794 positionAtCentralDirectory64(); 795 } 796 } 797 798 /** 799 * Parses the "Zip64 end of central directory locator", 800 * finds the "Zip64 end of central directory record" using the 801 * parsed information, parses that and positions the stream at the 802 * first central directory record. 803 * 804 * Expects stream to be positioned right behind the "Zip64 805 * end of central directory locator"'s signature. 806 */ 807 private void positionAtCentralDirectory64() 808 throws IOException { 809 skipBytes(ZIP64_EOCDL_LOCATOR_OFFSET 810 - WORD /* signature has already been read */); 811 archive.readFully(DWORD_BUF); 812 archive.seek(ZipEightByteInteger.getLongValue(DWORD_BUF)); 813 archive.readFully(WORD_BUF); 814 if (!Arrays.equals(WORD_BUF, ZipArchiveOutputStream.ZIP64_EOCD_SIG)) { 815 throw new ZipException("archive's ZIP64 end of central " 816 + "directory locator is corrupt."); 817 } 818 skipBytes(ZIP64_EOCD_CFD_LOCATOR_OFFSET 819 - WORD /* signature has already been read */); 820 archive.readFully(DWORD_BUF); 821 archive.seek(ZipEightByteInteger.getLongValue(DWORD_BUF)); 822 } 823 824 /** 825 * Parses the "End of central dir record" and positions 826 * the stream at the first central directory record. 827 * 828 * Expects stream to be positioned at the beginning of the 829 * "End of central dir record". 830 */ 831 private void positionAtCentralDirectory32() 832 throws IOException { 833 skipBytes(CFD_LOCATOR_OFFSET); 834 archive.readFully(WORD_BUF); 835 archive.seek(ZipLong.getValue(WORD_BUF)); 836 } 837 838 /** 839 * Searches for the and positions the stream at the start of the 840 * "End of central dir record". 841 */ 842 private void positionAtEndOfCentralDirectoryRecord() 843 throws IOException { 844 boolean found = tryToLocateSignature(MIN_EOCD_SIZE, MAX_EOCD_SIZE, 845 ZipArchiveOutputStream.EOCD_SIG); 846 if (!found) { 847 throw new ZipException("archive is not a ZIP archive"); 848 } 849 } 850 851 /** 852 * Searches the archive backwards from minDistance to maxDistance 853 * for the given signature, positions the RandomaccessFile right 854 * at the signature if it has been found. 855 */ 856 private boolean tryToLocateSignature(long minDistanceFromEnd, 857 long maxDistanceFromEnd, 858 byte[] sig) throws IOException { 859 boolean found = false; 860 long off = archive.length() - minDistanceFromEnd; 861 final long stopSearching = 862 Math.max(0L, archive.length() - maxDistanceFromEnd); 863 if (off >= 0) { 864 for (; off >= stopSearching; off--) { 865 archive.seek(off); 866 int curr = archive.read(); 867 if (curr == -1) { 868 break; 869 } 870 if (curr == sig[POS_0]) { 871 curr = archive.read(); 872 if (curr == sig[POS_1]) { 873 curr = archive.read(); 874 if (curr == sig[POS_2]) { 875 curr = archive.read(); 876 if (curr == sig[POS_3]) { 877 found = true; 878 break; 879 } 880 } 881 } 882 } 883 } 884 } 885 if (found) { 886 archive.seek(off); 887 } 888 return found; 889 } 890 891 /** 892 * Skips the given number of bytes or throws an EOFException if 893 * skipping failed. 894 */ 895 private void skipBytes(final int count) throws IOException { 896 int totalSkipped = 0; 897 while (totalSkipped < count) { 898 int skippedNow = archive.skipBytes(count - totalSkipped); 899 if (skippedNow <= 0) { 900 throw new EOFException(); 901 } 902 totalSkipped += skippedNow; 903 } 904 } 905 906 /** 907 * Number of bytes in local file header up to the "length of 908 * filename" entry. 909 */ 910 private static final long LFH_OFFSET_FOR_FILENAME_LENGTH = 911 /* local file header signature */ WORD 912 /* version needed to extract */ + SHORT 913 /* general purpose bit flag */ + SHORT 914 /* compression method */ + SHORT 915 /* last mod file time */ + SHORT 916 /* last mod file date */ + SHORT 917 /* crc-32 */ + WORD 918 /* compressed size */ + WORD 919 /* uncompressed size */ + WORD; 920 921 /** 922 * Walks through all recorded entries and adds the data available 923 * from the local file header. 924 * 925 * <p>Also records the offsets for the data to read from the 926 * entries.</p> 927 */ 928 private void resolveLocalFileHeaderData(Map<ZipArchiveEntry, NameAndComment> 929 entriesWithoutUTF8Flag) 930 throws IOException { 931 for (ZipArchiveEntry zipArchiveEntry : entries) { 932 // entries is filled in populateFromCentralDirectory and 933 // never modified 934 Entry ze = (Entry) zipArchiveEntry; 935 OffsetEntry offsetEntry = ze.getOffsetEntry(); 936 long offset = offsetEntry.headerOffset; 937 archive.seek(offset + LFH_OFFSET_FOR_FILENAME_LENGTH); 938 archive.readFully(SHORT_BUF); 939 int fileNameLen = ZipShort.getValue(SHORT_BUF); 940 archive.readFully(SHORT_BUF); 941 int extraFieldLen = ZipShort.getValue(SHORT_BUF); 942 int lenToSkip = fileNameLen; 943 while (lenToSkip > 0) { 944 int skipped = archive.skipBytes(lenToSkip); 945 if (skipped <= 0) { 946 throw new IOException("failed to skip file name in" 947 + " local file header"); 948 } 949 lenToSkip -= skipped; 950 } 951 byte[] localExtraData = new byte[extraFieldLen]; 952 archive.readFully(localExtraData); 953 ze.setExtra(localExtraData); 954 offsetEntry.dataOffset = offset + LFH_OFFSET_FOR_FILENAME_LENGTH 955 + SHORT + SHORT + fileNameLen + extraFieldLen; 956 957 if (entriesWithoutUTF8Flag.containsKey(ze)) { 958 NameAndComment nc = entriesWithoutUTF8Flag.get(ze); 959 ZipUtil.setNameAndCommentFromExtraFields(ze, nc.name, 960 nc.comment); 961 } 962 963 String name = ze.getName(); 964 LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.get(name); 965 if (entriesOfThatName == null) { 966 entriesOfThatName = new LinkedList<ZipArchiveEntry>(); 967 nameMap.put(name, entriesOfThatName); 968 } 969 entriesOfThatName.addLast(ze); 970 } 971 } 972 973 /** 974 * Checks whether the archive starts with a LFH. If it doesn't, 975 * it may be an empty archive. 976 */ 977 private boolean startsWithLocalFileHeader() throws IOException { 978 archive.seek(0); 979 archive.readFully(WORD_BUF); 980 return Arrays.equals(WORD_BUF, ZipArchiveOutputStream.LFH_SIG); 981 } 982 983 /** 984 * InputStream that delegates requests to the underlying 985 * RandomAccessFile, making sure that only bytes from a certain 986 * range can be read. 987 */ 988 private class BoundedInputStream extends InputStream { 989 private long remaining; 990 private long loc; 991 private boolean addDummyByte = false; 992 993 BoundedInputStream(long start, long remaining) { 994 this.remaining = remaining; 995 loc = start; 996 } 997 998 @Override 999 public int read() throws IOException { 1000 if (remaining-- <= 0) { 1001 if (addDummyByte) { 1002 addDummyByte = false; 1003 return 0; 1004 } 1005 return -1; 1006 } 1007 synchronized (archive) { 1008 archive.seek(loc++); 1009 return archive.read(); 1010 } 1011 } 1012 1013 @Override 1014 public int read(byte[] b, int off, int len) throws IOException { 1015 if (remaining <= 0) { 1016 if (addDummyByte) { 1017 addDummyByte = false; 1018 b[off] = 0; 1019 return 1; 1020 } 1021 return -1; 1022 } 1023 1024 if (len <= 0) { 1025 return 0; 1026 } 1027 1028 if (len > remaining) { 1029 len = (int) remaining; 1030 } 1031 int ret = -1; 1032 synchronized (archive) { 1033 archive.seek(loc); 1034 ret = archive.read(b, off, len); 1035 } 1036 if (ret > 0) { 1037 loc += ret; 1038 remaining -= ret; 1039 } 1040 return ret; 1041 } 1042 1043 /** 1044 * Inflater needs an extra dummy byte for nowrap - see 1045 * Inflater's javadocs. 1046 */ 1047 void addDummy() { 1048 addDummyByte = true; 1049 } 1050 } 1051 1052 private static final class NameAndComment { 1053 private final byte[] name; 1054 private final byte[] comment; 1055 private NameAndComment(byte[] name, byte[] comment) { 1056 this.name = name; 1057 this.comment = comment; 1058 } 1059 } 1060 1061 /** 1062 * Compares two ZipArchiveEntries based on their offset within the archive. 1063 * 1064 * <p>Won't return any meaningful results if one of the entries 1065 * isn't part of the archive at all.</p> 1066 * 1067 * @since 1.1 1068 */ 1069 private final Comparator<ZipArchiveEntry> OFFSET_COMPARATOR = 1070 new Comparator<ZipArchiveEntry>() { 1071 public int compare(ZipArchiveEntry e1, ZipArchiveEntry e2) { 1072 if (e1 == e2) { 1073 return 0; 1074 } 1075 1076 Entry ent1 = e1 instanceof Entry ? (Entry) e1 : null; 1077 Entry ent2 = e2 instanceof Entry ? (Entry) e2 : null; 1078 if (ent1 == null) { 1079 return 1; 1080 } 1081 if (ent2 == null) { 1082 return -1; 1083 } 1084 long val = (ent1.getOffsetEntry().headerOffset 1085 - ent2.getOffsetEntry().headerOffset); 1086 return val == 0 ? 0 : val < 0 ? -1 : +1; 1087 } 1088 }; 1089 1090 /** 1091 * Extends ZipArchiveEntry to store the offset within the archive. 1092 */ 1093 private static class Entry extends ZipArchiveEntry { 1094 1095 private final OffsetEntry offsetEntry; 1096 1097 Entry(OffsetEntry offset) { 1098 this.offsetEntry = offset; 1099 } 1100 1101 OffsetEntry getOffsetEntry() { 1102 return offsetEntry; 1103 } 1104 1105 @Override 1106 public int hashCode() { 1107 return 3 * super.hashCode() 1108 + (int) (offsetEntry.headerOffset % Integer.MAX_VALUE); 1109 } 1110 1111 @Override 1112 public boolean equals(Object other) { 1113 if (super.equals(other)) { 1114 // super.equals would return false if other were not an Entry 1115 Entry otherEntry = (Entry) other; 1116 return offsetEntry.headerOffset 1117 == otherEntry.offsetEntry.headerOffset 1118 && offsetEntry.dataOffset 1119 == otherEntry.offsetEntry.dataOffset; 1120 } 1121 return false; 1122 } 1123 } 1124}