1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.compress.archivers.zip;
18
19 import java.io.BufferedInputStream;
20 import java.io.ByteArrayInputStream;
21 import java.io.Closeable;
22 import java.io.EOFException;
23 import java.io.File;
24 import java.io.IOException;
25 import java.io.InputStream;
26 import java.io.SequenceInputStream;
27 import java.nio.ByteBuffer;
28 import java.nio.ByteOrder;
29 import java.nio.channels.FileChannel;
30 import java.nio.channels.SeekableByteChannel;
31 import java.nio.charset.Charset;
32 import java.nio.charset.StandardCharsets;
33 import java.nio.file.Files;
34 import java.nio.file.OpenOption;
35 import java.nio.file.Path;
36 import java.nio.file.StandardOpenOption;
37 import java.util.ArrayList;
38 import java.util.Arrays;
39 import java.util.Collections;
40 import java.util.Comparator;
41 import java.util.EnumSet;
42 import java.util.Enumeration;
43 import java.util.HashMap;
44 import java.util.LinkedList;
45 import java.util.List;
46 import java.util.Map;
47 import java.util.stream.Collectors;
48 import java.util.stream.IntStream;
49 import java.util.zip.Inflater;
50 import java.util.zip.ZipException;
51
52 import org.apache.commons.compress.archivers.EntryStreamOffsets;
53 import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
54 import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream;
55 import org.apache.commons.compress.utils.BoundedArchiveInputStream;
56 import org.apache.commons.compress.utils.BoundedSeekableByteChannelInputStream;
57 import org.apache.commons.compress.utils.IOUtils;
58 import org.apache.commons.compress.utils.InputStreamStatistics;
59 import org.apache.commons.compress.utils.SeekableInMemoryByteChannel;
60 import org.apache.commons.io.Charsets;
61 import org.apache.commons.io.FilenameUtils;
62 import org.apache.commons.io.build.AbstractOrigin.ByteArrayOrigin;
63 import org.apache.commons.io.build.AbstractStreamBuilder;
64 import org.apache.commons.io.input.CountingInputStream;
65
66 /**
67 * Replacement for {@link java.util.zip.ZipFile}.
68 * <p>
69 * This class adds support for file name encodings other than UTF-8 (which is required to work on ZIP files created by native ZIP tools and is able to skip a
70 * preamble like the one found in self extracting archives. Furthermore it returns instances of
71 * {@code org.apache.commons.compress.archivers.zip.ZipArchiveEntry} instead of {@link java.util.zip.ZipEntry}.
72 * </p>
73 * <p>
74 * It doesn't extend {@link java.util.zip.ZipFile} as it would have to reimplement all methods anyway. Like {@link java.util.zip.ZipFile}, it uses
75 * SeekableByteChannel under the covers and supports compressed and uncompressed entries. As of Apache Commons Compress 1.3 it also transparently supports Zip64
76 * extensions and thus individual entries and archives larger than 4 GB or with more than 65,536 entries.
77 * </p>
78 * <p>
79 * The method signatures mimic the ones of {@link java.util.zip.ZipFile}, with a couple of exceptions:
80 * </p>
81 * <ul>
82 * <li>There is no getName method.</li>
83 * <li>entries has been renamed to getEntries.</li>
84 * <li>getEntries and getEntry return {@code org.apache.commons.compress.archivers.zip.ZipArchiveEntry} instances.</li>
85 * <li>close is allowed to throw IOException.</li>
86 * </ul>
87 */
88 public class ZipFile implements Closeable {
89
90 /**
91 * Lock-free implementation of BoundedInputStream. The implementation uses positioned reads on the underlying archive file channel and therefore performs
92 * significantly faster in concurrent environment.
93 */
94 private static class BoundedFileChannelInputStream extends BoundedArchiveInputStream {
95 private final FileChannel archive;
96
97 BoundedFileChannelInputStream(final long start, final long remaining, final FileChannel archive) {
98 super(start, remaining);
99 this.archive = archive;
100 }
101
102 @Override
103 protected int read(final long pos, final ByteBuffer buf) throws IOException {
104 final int read = archive.read(buf, pos);
105 buf.flip();
106 return read;
107 }
108 }
109
110 /**
111 * Builds new {@link ZipFile} instances.
112 * <p>
113 * The channel will be opened for reading, assuming the specified encoding for file names.
114 * </p>
115 * <p>
116 * See {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} to read from an in-memory archive.
117 * </p>
118 * <p>
119 * By default the central directory record and all local file headers of the archive will be read immediately which may take a considerable amount of time
120 * when the archive is big. The {@code ignoreLocalFileHeader} parameter can be set to {@code true} which restricts parsing to the central directory.
121 * Unfortunately the local file header may contain information not present inside of the central directory which will not be available when the argument is
122 * set to {@code true}. This includes the content of the Unicode extra field, so setting {@code
123 * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively.
124 * </p>
125 *
126 * @since 1.26.0
127 */
128 public static class Builder extends AbstractStreamBuilder<ZipFile, Builder> {
129
130 static final Charset DEFAULT_CHARSET = StandardCharsets.UTF_8;
131
132 private SeekableByteChannel seekableByteChannel;
133 private boolean useUnicodeExtraFields = true;
134 private boolean ignoreLocalFileHeader;
135 private long maxNumberOfDisks = 1;
136
137 public Builder() {
138 setCharset(DEFAULT_CHARSET);
139 setCharsetDefault(DEFAULT_CHARSET);
140 }
141
142 @SuppressWarnings("resource") // caller closes
143 @Override
144 public ZipFile get() throws IOException {
145 final SeekableByteChannel actualChannel;
146 final String actualDescription;
147 if (seekableByteChannel != null) {
148 actualChannel = seekableByteChannel;
149 actualDescription = actualChannel.getClass().getSimpleName();
150 } else if (checkOrigin() instanceof ByteArrayOrigin) {
151 actualChannel = new SeekableInMemoryByteChannel(checkOrigin().getByteArray());
152 actualDescription = actualChannel.getClass().getSimpleName();
153 } else {
154 OpenOption[] openOptions = getOpenOptions();
155 if (openOptions.length == 0) {
156 openOptions = new OpenOption[] { StandardOpenOption.READ };
157 }
158 final Path path = getPath();
159 actualChannel = openZipChannel(path, maxNumberOfDisks, openOptions);
160 actualDescription = path.toString();
161 }
162 final boolean closeOnError = seekableByteChannel != null;
163 return new ZipFile(actualChannel, actualDescription, getCharset(), useUnicodeExtraFields, closeOnError, ignoreLocalFileHeader);
164 }
165
166 /**
167 * Sets whether to ignore information stored inside the local file header.
168 *
169 * @param ignoreLocalFileHeader whether to ignore information stored inside.
170 * @return this.
171 */
172 public Builder setIgnoreLocalFileHeader(final boolean ignoreLocalFileHeader) {
173 this.ignoreLocalFileHeader = ignoreLocalFileHeader;
174 return this;
175 }
176
177 /**
178 * Sets max number of multi archive disks, default is 1 (no multi archive).
179 *
180 * @param maxNumberOfDisks max number of multi archive disks.
181 *
182 * @return this.
183 */
184 public Builder setMaxNumberOfDisks(final long maxNumberOfDisks) {
185 this.maxNumberOfDisks = maxNumberOfDisks;
186 return this;
187 }
188
189 /**
190 * The actual channel, overrides any other input aspects like a File, Path, and so on.
191 *
192 * @param seekableByteChannel The actual channel.
193 * @return this.
194 */
195 public Builder setSeekableByteChannel(final SeekableByteChannel seekableByteChannel) {
196 this.seekableByteChannel = seekableByteChannel;
197 return this;
198 }
199
200 /**
201 * Sets whether to use InfoZIP Unicode Extra Fields (if present) to set the file names.
202 *
203 * @param useUnicodeExtraFields whether to use InfoZIP Unicode Extra Fields (if present) to set the file names.
204 * @return this.
205 */
206 public Builder setUseUnicodeExtraFields(final boolean useUnicodeExtraFields) {
207 this.useUnicodeExtraFields = useUnicodeExtraFields;
208 return this;
209 }
210
211 }
212
213 /**
214 * Extends ZipArchiveEntry to store the offset within the archive.
215 */
216 private static final class Entry extends ZipArchiveEntry {
217
218 @Override
219 public boolean equals(final Object other) {
220 if (super.equals(other)) {
221 // super.equals would return false if other were not an Entry
222 final Entry otherEntry = (Entry) other;
223 return getLocalHeaderOffset() == otherEntry.getLocalHeaderOffset() && super.getDataOffset() == otherEntry.getDataOffset()
224 && super.getDiskNumberStart() == otherEntry.getDiskNumberStart();
225 }
226 return false;
227 }
228
229 @Override
230 public int hashCode() {
231 return 3 * super.hashCode() + (int) getLocalHeaderOffset() + (int) (getLocalHeaderOffset() >> 32);
232 }
233 }
234
235 private static final class NameAndComment {
236 private final byte[] name;
237 private final byte[] comment;
238
239 private NameAndComment(final byte[] name, final byte[] comment) {
240 this.name = name;
241 this.comment = comment;
242 }
243 }
244
245 private static final class StoredStatisticsStream extends CountingInputStream implements InputStreamStatistics {
246 StoredStatisticsStream(final InputStream in) {
247 super(in);
248 }
249
250 @Override
251 public long getCompressedCount() {
252 return super.getByteCount();
253 }
254
255 @Override
256 public long getUncompressedCount() {
257 return getCompressedCount();
258 }
259 }
260
261 private static final String DEFAULT_CHARSET_NAME = StandardCharsets.UTF_8.name();
262
263 private static final EnumSet<StandardOpenOption> READ = EnumSet.of(StandardOpenOption.READ);
264
265 private static final int HASH_SIZE = 509;
266 static final int NIBLET_MASK = 0x0f;
267 static final int BYTE_SHIFT = 8;
268 private static final int POS_0 = 0;
269 private static final int POS_1 = 1;
270 private static final int POS_2 = 2;
271 private static final int POS_3 = 3;
272 private static final byte[] ONE_ZERO_BYTE = new byte[1];
273
274 /**
275 * Length of a "central directory" entry structure without file name, extra fields or comment.
276 */
277 private static final int CFH_LEN =
278 // @formatter:off
279 /* version made by */ ZipConstants.SHORT
280 /* version needed to extract */ + ZipConstants.SHORT
281 /* general purpose bit flag */ + ZipConstants.SHORT
282 /* compression method */ + ZipConstants.SHORT
283 /* last mod file time */ + ZipConstants.SHORT
284 /* last mod file date */ + ZipConstants.SHORT
285 /* crc-32 */ + ZipConstants.WORD
286 /* compressed size */ + ZipConstants.WORD
287 /* uncompressed size */ + ZipConstants.WORD
288 /* file name length */ + ZipConstants. SHORT
289 /* extra field length */ + ZipConstants.SHORT
290 /* file comment length */ + ZipConstants.SHORT
291 /* disk number start */ + ZipConstants.SHORT
292 /* internal file attributes */ + ZipConstants.SHORT
293 /* external file attributes */ + ZipConstants.WORD
294 /* relative offset of local header */ + ZipConstants.WORD;
295 // @formatter:on
296
297 private static final long CFH_SIG = ZipLong.getValue(ZipArchiveOutputStream.CFH_SIG);
298
299 /**
300 * Length of the "End of central directory record" - which is supposed to be the last structure of the archive - without file comment.
301 */
302 static final int MIN_EOCD_SIZE =
303 // @formatter:off
304 /* end of central dir signature */ ZipConstants.WORD
305 /* number of this disk */ + ZipConstants.SHORT
306 /* number of the disk with the */
307 /* start of the central directory */ + ZipConstants.SHORT
308 /* total number of entries in */
309 /* the central dir on this disk */ + ZipConstants.SHORT
310 /* total number of entries in */
311 /* the central dir */ + ZipConstants.SHORT
312 /* size of the central directory */ + ZipConstants.WORD
313 /* offset of start of central */
314 /* directory with respect to */
315 /* the starting disk number */ + ZipConstants.WORD
316 /* ZIP file comment length */ + ZipConstants.SHORT;
317 // @formatter:on
318
319 /**
320 * Maximum length of the "End of central directory record" with a file comment.
321 */
322 private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE
323 // @formatter:off
324 /* maximum length of ZIP file comment */ + ZipConstants.ZIP64_MAGIC_SHORT;
325 // @formatter:on
326
327 /**
328 * Offset of the field that holds the location of the length of the central directory inside the "End of central directory record" relative to the start of
329 * the "End of central directory record".
330 */
331 private static final int CFD_LENGTH_OFFSET =
332 // @formatter:off
333 /* end of central dir signature */ ZipConstants.WORD
334 /* number of this disk */ + ZipConstants.SHORT
335 /* number of the disk with the */
336 /* start of the central directory */ + ZipConstants.SHORT
337 /* total number of entries in */
338 /* the central dir on this disk */ + ZipConstants.SHORT
339 /* total number of entries in */
340 /* the central dir */ + ZipConstants.SHORT;
341 // @formatter:on
342
343 /**
344 * Offset of the field that holds the disk number of the first central directory entry inside the "End of central directory record" relative to the start of
345 * the "End of central directory record".
346 */
347 private static final int CFD_DISK_OFFSET =
348 // @formatter:off
349 /* end of central dir signature */ ZipConstants.WORD
350 /* number of this disk */ + ZipConstants.SHORT;
351 // @formatter:on
352
353 /**
354 * Offset of the field that holds the location of the first central directory entry inside the "End of central directory record" relative to the "number of
355 * the disk with the start of the central directory".
356 */
357 private static final int CFD_LOCATOR_RELATIVE_OFFSET =
358 // @formatter:off
359 /* total number of entries in */
360 /* the central dir on this disk */ + ZipConstants.SHORT
361 /* total number of entries in */
362 /* the central dir */ + ZipConstants.SHORT
363 /* size of the central directory */ + ZipConstants.WORD;
364 // @formatter:on
365
366 /**
367 * Length of the "Zip64 end of central directory locator" - which should be right in front of the "end of central directory record" if one is present at
368 * all.
369 */
370 private static final int ZIP64_EOCDL_LENGTH =
371 // @formatter:off
372 /* zip64 end of central dir locator sig */ ZipConstants.WORD
373 /* number of the disk with the start */
374 /* start of the zip64 end of */
375 /* central directory */ + ZipConstants.WORD
376 /* relative offset of the zip64 */
377 /* end of central directory record */ + ZipConstants.DWORD
378 /* total number of disks */ + ZipConstants.WORD;
379 // @formatter:on
380
381 /**
382 * Offset of the field that holds the location of the "Zip64 end of central directory record" inside the "Zip64 end of central directory locator" relative
383 * to the start of the "Zip64 end of central directory locator".
384 */
385 private static final int ZIP64_EOCDL_LOCATOR_OFFSET =
386 // @formatter:off
387 /* zip64 end of central dir locator sig */ ZipConstants.WORD
388 /* number of the disk with the start */
389 /* start of the zip64 end of */
390 /* central directory */ + ZipConstants.WORD;
391 // @formatter:on
392
393 /**
394 * Offset of the field that holds the location of the first central directory entry inside the "Zip64 end of central directory record" relative to the start
395 * of the "Zip64 end of central directory record".
396 */
397 private static final int ZIP64_EOCD_CFD_LOCATOR_OFFSET =
398 // @formatter:off
399 /* zip64 end of central dir */
400 /* signature */ ZipConstants.WORD
401 /* size of zip64 end of central */
402 /* directory record */ + ZipConstants.DWORD
403 /* version made by */ + ZipConstants.SHORT
404 /* version needed to extract */ + ZipConstants.SHORT
405 /* number of this disk */ + ZipConstants.WORD
406 /* number of the disk with the */
407 /* start of the central directory */ + ZipConstants.WORD
408 /* total number of entries in the */
409 /* central directory on this disk */ + ZipConstants.DWORD
410 /* total number of entries in the */
411 /* central directory */ + ZipConstants.DWORD
412 /* size of the central directory */ + ZipConstants.DWORD;
413 // @formatter:on
414
415 /**
416 * Offset of the field that holds the disk number of the first central directory entry inside the "Zip64 end of central directory record" relative to the
417 * start of the "Zip64 end of central directory record".
418 */
419 private static final int ZIP64_EOCD_CFD_DISK_OFFSET =
420 // @formatter:off
421 /* zip64 end of central dir */
422 /* signature */ ZipConstants.WORD
423 /* size of zip64 end of central */
424 /* directory record */ + ZipConstants.DWORD
425 /* version made by */ + ZipConstants.SHORT
426 /* version needed to extract */ + ZipConstants.SHORT
427 /* number of this disk */ + ZipConstants.WORD;
428 // @formatter:on
429
430 /**
431 * Offset of the field that holds the location of the first central directory entry inside the "Zip64 end of central directory record" relative to the
432 * "number of the disk with the start of the central directory".
433 */
434 private static final int ZIP64_EOCD_CFD_LOCATOR_RELATIVE_OFFSET =
435 // @formatter:off
436 /* total number of entries in the */
437 /* central directory on this disk */ ZipConstants.DWORD
438 /* total number of entries in the */
439 /* central directory */ + ZipConstants.DWORD
440 /* size of the central directory */ + ZipConstants.DWORD;
441 // @formatter:on
442
443 /**
444 * Number of bytes in local file header up to the "length of file name" entry.
445 */
446 private static final long LFH_OFFSET_FOR_FILENAME_LENGTH =
447 // @formatter:off
448 /* local file header signature */ ZipConstants.WORD
449 /* version needed to extract */ + ZipConstants.SHORT
450 /* general purpose bit flag */ + ZipConstants.SHORT
451 /* compression method */ + ZipConstants.SHORT
452 /* last mod file time */ + ZipConstants.SHORT
453 /* last mod file date */ + ZipConstants.SHORT
454 /* crc-32 */ + ZipConstants.WORD
455 /* compressed size */ + ZipConstants.WORD
456 /* uncompressed size */ + (long) ZipConstants.WORD;
457 // @formatter:on
458
459 /**
460 * Compares two ZipArchiveEntries based on their offset within the archive.
461 * <p>
462 * Won't return any meaningful results if one of the entries isn't part of the archive at all.
463 * </p>
464 *
465 * @since 1.1
466 */
467 private static final Comparator<ZipArchiveEntry> offsetComparator = Comparator.comparingLong(ZipArchiveEntry::getDiskNumberStart)
468 .thenComparingLong(ZipArchiveEntry::getLocalHeaderOffset);
469
470 /**
471 * Creates a new Builder.
472 *
473 * @return a new Builder.
474 * @since 1.26.0
475 */
476 public static Builder builder() {
477 return new Builder();
478 }
479
480 /**
481 * Closes a ZIP file quietly; throwing no IOException, does nothing on null input.
482 *
483 * @param zipFile file to close, can be null
484 */
485 public static void closeQuietly(final ZipFile zipFile) {
486 org.apache.commons.io.IOUtils.closeQuietly(zipFile);
487 }
488
489 /**
490 * Creates a new SeekableByteChannel for reading.
491 *
492 * @param path the path to the file to open or create
493 * @return a new seekable byte channel
494 * @throws IOException if an I/O error occurs
495 */
496 private static SeekableByteChannel newReadByteChannel(final Path path) throws IOException {
497 return Files.newByteChannel(path, READ);
498 }
499
500 private static SeekableByteChannel openZipChannel(final Path path, final long maxNumberOfDisks, final OpenOption[] openOptions) throws IOException {
501 final FileChannel channel = FileChannel.open(path, StandardOpenOption.READ);
502 final List<FileChannel> channels = new ArrayList<>();
503 try {
504 final boolean is64 = positionAtEndOfCentralDirectoryRecord(channel);
505 long numberOfDisks;
506 if (is64) {
507 channel.position(channel.position() + ZipConstants.WORD + ZipConstants.WORD + ZipConstants.DWORD);
508 final ByteBuffer buf = ByteBuffer.allocate(ZipConstants.WORD);
509 buf.order(ByteOrder.LITTLE_ENDIAN);
510 IOUtils.readFully(channel, buf);
511 buf.flip();
512 numberOfDisks = buf.getInt() & 0xffffffffL;
513 } else {
514 channel.position(channel.position() + ZipConstants.WORD);
515 final ByteBuffer buf = ByteBuffer.allocate(ZipConstants.SHORT);
516 buf.order(ByteOrder.LITTLE_ENDIAN);
517 IOUtils.readFully(channel, buf);
518 buf.flip();
519 numberOfDisks = (buf.getShort() & 0xffff) + 1;
520 }
521 if (numberOfDisks > Math.min(maxNumberOfDisks, Integer.MAX_VALUE)) {
522 throw new IOException("Too many disks for zip archive, max=" + Math.min(maxNumberOfDisks, Integer.MAX_VALUE) + " actual=" + numberOfDisks);
523 }
524
525 if (numberOfDisks <= 1) {
526 return channel;
527 }
528 channel.close();
529
530 final Path parent = path.getParent();
531 final String basename = FilenameUtils.removeExtension(path.getFileName().toString());
532
533 return ZipSplitReadOnlySeekableByteChannel.forPaths(IntStream.range(0, (int) numberOfDisks).mapToObj(i -> {
534 if (i == numberOfDisks - 1) {
535 return path;
536 }
537 final Path lowercase = parent.resolve(String.format("%s.z%02d", basename, i + 1));
538 if (Files.exists(lowercase)) {
539 return lowercase;
540 }
541 final Path uppercase = parent.resolve(String.format("%s.Z%02d", basename, i + 1));
542 if (Files.exists(uppercase)) {
543 return uppercase;
544 }
545 return lowercase;
546 }).collect(Collectors.toList()), openOptions);
547 } catch (final Throwable ex) {
548 org.apache.commons.io.IOUtils.closeQuietly(channel);
549 channels.forEach(org.apache.commons.io.IOUtils::closeQuietly);
550 throw ex;
551 }
552 }
553
554 /**
555 * Searches for the and positions the stream at the start of the "End of central dir record".
556 *
557 * @return true if it's Zip64 end of central directory or false if it's Zip32
558 */
559 private static boolean positionAtEndOfCentralDirectoryRecord(final SeekableByteChannel channel) throws IOException {
560 final boolean found = tryToLocateSignature(channel, MIN_EOCD_SIZE, MAX_EOCD_SIZE, ZipArchiveOutputStream.EOCD_SIG);
561 if (!found) {
562 throw new ZipException("Archive is not a ZIP archive");
563 }
564 boolean found64 = false;
565 final long position = channel.position();
566 if (position > ZIP64_EOCDL_LENGTH) {
567 final ByteBuffer wordBuf = ByteBuffer.allocate(4);
568 channel.position(channel.position() - ZIP64_EOCDL_LENGTH);
569 wordBuf.rewind();
570 IOUtils.readFully(channel, wordBuf);
571 wordBuf.flip();
572 found64 = wordBuf.equals(ByteBuffer.wrap(ZipArchiveOutputStream.ZIP64_EOCD_LOC_SIG));
573 if (!found64) {
574 channel.position(position);
575 } else {
576 channel.position(channel.position() - ZipConstants.WORD);
577 }
578 }
579
580 return found64;
581 }
582
583 /**
584 * Searches the archive backwards from minDistance to maxDistance for the given signature, positions the RandomaccessFile right at the signature if it has
585 * been found.
586 */
587 private static boolean tryToLocateSignature(final SeekableByteChannel channel, final long minDistanceFromEnd, final long maxDistanceFromEnd,
588 final byte[] sig) throws IOException {
589 final ByteBuffer wordBuf = ByteBuffer.allocate(ZipConstants.WORD);
590 boolean found = false;
591 long off = channel.size() - minDistanceFromEnd;
592 final long stopSearching = Math.max(0L, channel.size() - maxDistanceFromEnd);
593 if (off >= 0) {
594 for (; off >= stopSearching; off--) {
595 channel.position(off);
596 try {
597 wordBuf.rewind();
598 IOUtils.readFully(channel, wordBuf);
599 wordBuf.flip();
600 } catch (final EOFException ex) { // NOSONAR
601 break;
602 }
603 int curr = wordBuf.get();
604 if (curr == sig[POS_0]) {
605 curr = wordBuf.get();
606 if (curr == sig[POS_1]) {
607 curr = wordBuf.get();
608 if (curr == sig[POS_2]) {
609 curr = wordBuf.get();
610 if (curr == sig[POS_3]) {
611 found = true;
612 break;
613 }
614 }
615 }
616 }
617 }
618 }
619 if (found) {
620 channel.position(off);
621 }
622 return found;
623 }
624
625 /**
626 * List of entries in the order they appear inside the central directory.
627 */
628 private final List<ZipArchiveEntry> entries = new LinkedList<>();
629
630 /**
631 * Maps String to list of ZipArchiveEntrys, name -> actual entries.
632 */
633 private final Map<String, LinkedList<ZipArchiveEntry>> nameMap = new HashMap<>(HASH_SIZE);
634
635 /**
636 * The encoding to use for file names and the file comment.
637 * <p>
638 * For a list of possible values see <a href="Supported Encodings">https://docs.oracle.com/javase/8/docs/technotes/guides/intl/encoding.doc.html</a>.
639 * Defaults to UTF-8.
640 * </p>
641 */
642 private final Charset encoding;
643
644 /**
645 * The ZIP encoding to use for file names and the file comment.
646 */
647 private final ZipEncoding zipEncoding;
648
649 /**
650 * The actual data source.
651 */
652 private final SeekableByteChannel archive;
653
654 /**
655 * Whether to look for and use Unicode extra fields.
656 */
657 private final boolean useUnicodeExtraFields;
658
659 /**
660 * Whether the file is closed.
661 */
662 private volatile boolean closed = true;
663
664 /**
665 * Whether the ZIP archive is a split ZIP archive
666 */
667 private final boolean isSplitZipArchive;
668
669 // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection)
670 private final byte[] dwordBuf = new byte[ZipConstants.DWORD];
671
672 private final byte[] wordBuf = new byte[ZipConstants.WORD];
673
674 private final byte[] cfhBuf = new byte[CFH_LEN];
675
676 private final byte[] shortBuf = new byte[ZipConstants.SHORT];
677
678 private final ByteBuffer dwordBbuf = ByteBuffer.wrap(dwordBuf);
679
680 private final ByteBuffer wordBbuf = ByteBuffer.wrap(wordBuf);
681
682 private final ByteBuffer cfhBbuf = ByteBuffer.wrap(cfhBuf);
683
684 private final ByteBuffer shortBbuf = ByteBuffer.wrap(shortBuf);
685
686 private long centralDirectoryStartDiskNumber, centralDirectoryStartRelativeOffset;
687
688 private long centralDirectoryStartOffset;
689
690 private long firstLocalFileHeaderOffset;
691
692 /**
693 * Opens the given file for reading, assuming "UTF8" for file names.
694 *
695 * @param file the archive.
696 *
697 * @throws IOException if an error occurs while reading the file.
698 * @deprecated Use {@link Builder#get()}.
699 */
700 @Deprecated
701 public ZipFile(final File file) throws IOException {
702 this(file, DEFAULT_CHARSET_NAME);
703 }
704
705 /**
706 * Opens the given file for reading, assuming the specified encoding for file names and scanning for Unicode extra fields.
707 *
708 * @param file the archive.
709 * @param encoding the encoding to use for file names, use null for the platform's default encoding
710 * @throws IOException if an error occurs while reading the file.
711 * @deprecated Use {@link Builder#get()}.
712 */
713 @Deprecated
714 public ZipFile(final File file, final String encoding) throws IOException {
715 this(file.toPath(), encoding, true);
716 }
717
718 /**
719 * Opens the given file for reading, assuming the specified encoding for file names.
720 *
721 * @param file the archive.
722 * @param encoding the encoding to use for file names, use null for the platform's default encoding
723 * @param useUnicodeExtraFields whether to use InfoZIP Unicode Extra Fields (if present) to set the file names.
724 * @throws IOException if an error occurs while reading the file.
725 * @deprecated Use {@link Builder#get()}.
726 */
727 @Deprecated
728 public ZipFile(final File file, final String encoding, final boolean useUnicodeExtraFields) throws IOException {
729 this(file.toPath(), encoding, useUnicodeExtraFields, false);
730 }
731
732 /**
733 * Opens the given file for reading, assuming the specified encoding for file names.
734 * <p>
735 * By default the central directory record and all local file headers of the archive will be read immediately which may take a considerable amount of time
736 * when the archive is big. The {@code ignoreLocalFileHeader} parameter can be set to {@code true} which restricts parsing to the central directory.
737 * Unfortunately the local file header may contain information not present inside of the central directory which will not be available when the argument is
738 * set to {@code true}. This includes the content of the Unicode extra field, so setting {@code
739 * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively.
740 * </p>
741 *
742 * @param file the archive.
743 * @param encoding the encoding to use for file names, use null for the platform's default encoding
744 * @param useUnicodeExtraFields whether to use InfoZIP Unicode Extra Fields (if present) to set the file names.
745 * @param ignoreLocalFileHeader whether to ignore information stored inside the local file header (see the notes in this method's Javadoc)
746 * @throws IOException if an error occurs while reading the file.
747 * @since 1.19
748 * @deprecated Use {@link Builder#get()}.
749 */
750 @Deprecated
751 @SuppressWarnings("resource") // Caller closes
752 public ZipFile(final File file, final String encoding, final boolean useUnicodeExtraFields, final boolean ignoreLocalFileHeader) throws IOException {
753 this(newReadByteChannel(file.toPath()), file.getAbsolutePath(), encoding, useUnicodeExtraFields, true, ignoreLocalFileHeader);
754 }
755
756 /**
757 * Opens the given path for reading, assuming "UTF-8" for file names.
758 *
759 * @param path path to the archive.
760 * @throws IOException if an error occurs while reading the file.
761 * @since 1.22
762 * @deprecated Use {@link Builder#get()}.
763 */
764 @Deprecated
765 public ZipFile(final Path path) throws IOException {
766 this(path, DEFAULT_CHARSET_NAME);
767 }
768
769 /**
770 * Opens the given path for reading, assuming the specified encoding for file names and scanning for Unicode extra fields.
771 *
772 * @param path path to the archive.
773 * @param encoding the encoding to use for file names, use null for the platform's default encoding
774 * @throws IOException if an error occurs while reading the file.
775 * @since 1.22
776 * @deprecated Use {@link Builder#get()}.
777 */
778 @Deprecated
779 public ZipFile(final Path path, final String encoding) throws IOException {
780 this(path, encoding, true);
781 }
782
783 /**
784 * Opens the given path for reading, assuming the specified encoding for file names.
785 *
786 * @param path path to the archive.
787 * @param encoding the encoding to use for file names, use null for the platform's default encoding
788 * @param useUnicodeExtraFields whether to use InfoZIP Unicode Extra Fields (if present) to set the file names.
789 * @throws IOException if an error occurs while reading the file.
790 * @since 1.22
791 * @deprecated Use {@link Builder#get()}.
792 */
793 @Deprecated
794 public ZipFile(final Path path, final String encoding, final boolean useUnicodeExtraFields) throws IOException {
795 this(path, encoding, useUnicodeExtraFields, false);
796 }
797
798 /**
799 * Opens the given path for reading, assuming the specified encoding for file names.
800 * <p>
801 * By default the central directory record and all local file headers of the archive will be read immediately which may take a considerable amount of time
802 * when the archive is big. The {@code ignoreLocalFileHeader} parameter can be set to {@code true} which restricts parsing to the central directory.
803 * Unfortunately the local file header may contain information not present inside of the central directory which will not be available when the argument is
804 * set to {@code true}. This includes the content of the Unicode extra field, so setting {@code
805 * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively.
806 * </p>
807 *
808 * @param path path to the archive.
809 * @param encoding the encoding to use for file names, use null for the platform's default encoding
810 * @param useUnicodeExtraFields whether to use InfoZIP Unicode Extra Fields (if present) to set the file names.
811 * @param ignoreLocalFileHeader whether to ignore information stored inside the local file header (see the notes in this method's Javadoc)
812 * @throws IOException if an error occurs while reading the file.
813 * @since 1.22
814 * @deprecated Use {@link Builder#get()}.
815 */
816 @SuppressWarnings("resource") // Caller closes
817 @Deprecated
818 public ZipFile(final Path path, final String encoding, final boolean useUnicodeExtraFields, final boolean ignoreLocalFileHeader) throws IOException {
819 this(newReadByteChannel(path), path.toAbsolutePath().toString(), encoding, useUnicodeExtraFields, true, ignoreLocalFileHeader);
820 }
821
822 /**
823 * Opens the given channel for reading, assuming "UTF-8" for file names.
824 * <p>
825 * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive.
826 * </p>
827 *
828 * @param channel the archive.
829 *
830 * @throws IOException if an error occurs while reading the file.
831 * @since 1.13
832 * @deprecated Use {@link Builder#get()}.
833 */
834 @Deprecated
835 public ZipFile(final SeekableByteChannel channel) throws IOException {
836 this(channel, "a SeekableByteChannel", DEFAULT_CHARSET_NAME, true);
837 }
838
839 /**
840 * Opens the given channel for reading, assuming the specified encoding for file names.
841 * <p>
842 * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive.
843 * </p>
844 *
845 * @param channel the archive.
846 * @param encoding the encoding to use for file names, use null for the platform's default encoding
847 * @throws IOException if an error occurs while reading the file.
848 * @since 1.13
849 * @deprecated Use {@link Builder#get()}.
850 */
851 @Deprecated
852 public ZipFile(final SeekableByteChannel channel, final String encoding) throws IOException {
853 this(channel, "a SeekableByteChannel", encoding, true);
854 }
855
856 private ZipFile(final SeekableByteChannel channel, final String channelDescription, final Charset encoding, final boolean useUnicodeExtraFields,
857 final boolean closeOnError, final boolean ignoreLocalFileHeader) throws IOException {
858 this.isSplitZipArchive = channel instanceof ZipSplitReadOnlySeekableByteChannel;
859 this.encoding = Charsets.toCharset(encoding, Builder.DEFAULT_CHARSET);
860 this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
861 this.useUnicodeExtraFields = useUnicodeExtraFields;
862 this.archive = channel;
863 boolean success = false;
864 try {
865 final Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag = populateFromCentralDirectory();
866 if (!ignoreLocalFileHeader) {
867 resolveLocalFileHeaderData(entriesWithoutUTF8Flag);
868 }
869 fillNameMap();
870 success = true;
871 } catch (final IOException e) {
872 throw new IOException("Error reading Zip content from " + channelDescription, e);
873 } finally {
874 this.closed = !success;
875 if (!success && closeOnError) {
876 org.apache.commons.io.IOUtils.closeQuietly(archive);
877 }
878 }
879 }
880
881 /**
882 * Opens the given channel for reading, assuming the specified encoding for file names.
883 * <p>
884 * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive.
885 * </p>
886 *
887 * @param channel the archive.
888 * @param channelDescription description of the archive, used for error messages only.
889 * @param encoding the encoding to use for file names, use null for the platform's default encoding
890 * @param useUnicodeExtraFields whether to use InfoZIP Unicode Extra Fields (if present) to set the file names.
891 * @throws IOException if an error occurs while reading the file.
892 * @since 1.13
893 * @deprecated Use {@link Builder#get()}.
894 */
895 @Deprecated
896 public ZipFile(final SeekableByteChannel channel, final String channelDescription, final String encoding, final boolean useUnicodeExtraFields)
897 throws IOException {
898 this(channel, channelDescription, encoding, useUnicodeExtraFields, false, false);
899 }
900
901 /**
902 * Opens the given channel for reading, assuming the specified encoding for file names.
903 * <p>
904 * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive.
905 * </p>
906 * <p>
907 * By default the central directory record and all local file headers of the archive will be read immediately which may take a considerable amount of time
908 * when the archive is big. The {@code ignoreLocalFileHeader} parameter can be set to {@code true} which restricts parsing to the central directory.
909 * Unfortunately the local file header may contain information not present inside of the central directory which will not be available when the argument is
910 * set to {@code true}. This includes the content of the Unicode extra field, so setting {@code
911 * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively.
912 * </p>
913 *
914 * @param channel the archive.
915 * @param channelDescription description of the archive, used for error messages only.
916 * @param encoding the encoding to use for file names, use null for the platform's default encoding
917 * @param useUnicodeExtraFields whether to use InfoZIP Unicode Extra Fields (if present) to set the file names.
918 * @param ignoreLocalFileHeader whether to ignore information stored inside the local file header (see the notes in this method's Javadoc)
919 * @throws IOException if an error occurs while reading the file.
920 * @since 1.19
921 * @deprecated Use {@link Builder#get()}.
922 */
923 @Deprecated
924 public ZipFile(final SeekableByteChannel channel, final String channelDescription, final String encoding, final boolean useUnicodeExtraFields,
925 final boolean ignoreLocalFileHeader) throws IOException {
926 this(channel, channelDescription, encoding, useUnicodeExtraFields, false, ignoreLocalFileHeader);
927 }
928
929 private ZipFile(final SeekableByteChannel channel, final String channelDescription, final String encoding, final boolean useUnicodeExtraFields,
930 final boolean closeOnError, final boolean ignoreLocalFileHeader) throws IOException {
931 this(channel, channelDescription, Charsets.toCharset(encoding), useUnicodeExtraFields, closeOnError, ignoreLocalFileHeader);
932 }
933
934 /**
935 * Opens the given file for reading, assuming "UTF-8".
936 *
937 * @param name name of the archive.
938 * @throws IOException if an error occurs while reading the file.
939 * @deprecated Use {@link Builder#get()}.
940 */
941 @Deprecated
942 public ZipFile(final String name) throws IOException {
943 this(new File(name).toPath(), DEFAULT_CHARSET_NAME);
944 }
945
946 /**
947 * Opens the given file for reading, assuming the specified encoding for file names, scanning unicode extra fields.
948 *
949 * @param name name of the archive.
950 * @param encoding the encoding to use for file names, use null for the platform's default encoding
951 * @throws IOException if an error occurs while reading the file.
952 * @deprecated Use {@link Builder#get()}.
953 */
954 @Deprecated
955 public ZipFile(final String name, final String encoding) throws IOException {
956 this(new File(name).toPath(), encoding, true);
957 }
958
959 /**
960 * Whether this class is able to read the given entry.
961 * <p>
962 * May return false if it is set up to use encryption or a compression method that hasn't been implemented yet.
963 * </p>
964 *
965 * @since 1.1
966 * @param entry the entry
967 * @return whether this class is able to read the given entry.
968 */
969 public boolean canReadEntryData(final ZipArchiveEntry entry) {
970 return ZipUtil.canHandleEntryData(entry);
971 }
972
973 /**
974 * Closes the archive.
975 *
976 * @throws IOException if an error occurs closing the archive.
977 */
978 @Override
979 public void close() throws IOException {
980 // this flag is only written here and read in finalize() which
981 // can never be run in parallel.
982 // no synchronization needed.
983 closed = true;
984 archive.close();
985 }
986
987 /**
988 * Transfer selected entries from this ZIP file to a given #ZipArchiveOutputStream. Compression and all other attributes will be as in this file.
989 * <p>
990 * This method transfers entries based on the central directory of the ZIP file.
991 * </p>
992 *
993 * @param target The zipArchiveOutputStream to write the entries to
994 * @param predicate A predicate that selects which entries to write
995 * @throws IOException on error
996 */
997 public void copyRawEntries(final ZipArchiveOutputStream target, final ZipArchiveEntryPredicate predicate) throws IOException {
998 final Enumeration<ZipArchiveEntry> src = getEntriesInPhysicalOrder();
999 while (src.hasMoreElements()) {
1000 final ZipArchiveEntry entry = src.nextElement();
1001 if (predicate.test(entry)) {
1002 target.addRawArchiveEntry(entry, getRawInputStream(entry));
1003 }
1004 }
1005 }
1006
1007 /**
1008 * Creates new BoundedInputStream, according to implementation of underlying archive channel.
1009 */
1010 private BoundedArchiveInputStream createBoundedInputStream(final long start, final long remaining) {
1011 if (start < 0 || remaining < 0 || start + remaining < start) {
1012 throw new IllegalArgumentException("Corrupted archive, stream boundaries" + " are out of range");
1013 }
1014 return archive instanceof FileChannel ? new BoundedFileChannelInputStream(start, remaining, (FileChannel) archive)
1015 : new BoundedSeekableByteChannelInputStream(start, remaining, archive);
1016 }
1017
1018 private void fillNameMap() {
1019 entries.forEach(ze -> {
1020 // entries are filled in populateFromCentralDirectory and
1021 // never modified
1022 final String name = ze.getName();
1023 final LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.computeIfAbsent(name, k -> new LinkedList<>());
1024 entriesOfThatName.addLast(ze);
1025 });
1026 }
1027
1028 /**
1029 * Ensures that the close method of this ZIP file is called when there are no more references to it.
1030 *
1031 * @see #close()
1032 */
1033 @Override
1034 protected void finalize() throws Throwable {
1035 try {
1036 if (!closed) {
1037 close();
1038 }
1039 } finally {
1040 super.finalize();
1041 }
1042 }
1043
1044 /**
1045 * Gets an InputStream for reading the content before the first local file header.
1046 *
1047 * @return null if there is no content before the first local file header. Otherwise, returns a stream to read the content before the first local file
1048 * header.
1049 * @since 1.23
1050 */
1051 public InputStream getContentBeforeFirstLocalFileHeader() {
1052 return firstLocalFileHeaderOffset == 0 ? null : createBoundedInputStream(0, firstLocalFileHeaderOffset);
1053 }
1054
1055 private long getDataOffset(final ZipArchiveEntry ze) throws IOException {
1056 final long s = ze.getDataOffset();
1057 if (s == EntryStreamOffsets.OFFSET_UNKNOWN) {
1058 setDataOffset(ze);
1059 return ze.getDataOffset();
1060 }
1061 return s;
1062 }
1063
1064 /**
1065 * Gets the encoding to use for file names and the file comment.
1066 *
1067 * @return null if using the platform's default character encoding.
1068 */
1069 public String getEncoding() {
1070 return encoding.name();
1071 }
1072
1073 /**
1074 * Gets all entries.
1075 * <p>
1076 * Entries will be returned in the same order they appear within the archive's central directory.
1077 * </p>
1078 *
1079 * @return all entries as {@link ZipArchiveEntry} instances
1080 */
1081 public Enumeration<ZipArchiveEntry> getEntries() {
1082 return Collections.enumeration(entries);
1083 }
1084
1085 /**
1086 * Gets all named entries in the same order they appear within the archive's central directory.
1087 *
1088 * @param name name of the entry.
1089 * @return the Iterable<ZipArchiveEntry> corresponding to the given name
1090 * @since 1.6
1091 */
1092 public Iterable<ZipArchiveEntry> getEntries(final String name) {
1093 return nameMap.getOrDefault(name, ZipArchiveEntry.EMPTY_LINKED_LIST);
1094 }
1095
1096 /**
1097 * Gets all entries in physical order.
1098 * <p>
1099 * Entries will be returned in the same order their contents appear within the archive.
1100 * </p>
1101 *
1102 * @return all entries as {@link ZipArchiveEntry} instances
1103 *
1104 * @since 1.1
1105 */
1106 public Enumeration<ZipArchiveEntry> getEntriesInPhysicalOrder() {
1107 final ZipArchiveEntry[] allEntries = entries.toArray(ZipArchiveEntry.EMPTY_ARRAY);
1108 return Collections.enumeration(Arrays.asList(sortByOffset(allEntries)));
1109 }
1110
1111 /**
1112 * Gets all named entries in the same order their contents appear within the archive.
1113 *
1114 * @param name name of the entry.
1115 * @return the Iterable<ZipArchiveEntry> corresponding to the given name
1116 * @since 1.6
1117 */
1118 public Iterable<ZipArchiveEntry> getEntriesInPhysicalOrder(final String name) {
1119 final LinkedList<ZipArchiveEntry> linkedList = nameMap.getOrDefault(name, ZipArchiveEntry.EMPTY_LINKED_LIST);
1120 return Arrays.asList(sortByOffset(linkedList.toArray(ZipArchiveEntry.EMPTY_ARRAY)));
1121 }
1122
1123 /**
1124 * Gets a named entry or {@code null} if no entry by that name exists.
1125 * <p>
1126 * If multiple entries with the same name exist the first entry in the archive's central directory by that name is returned.
1127 * </p>
1128 *
1129 * @param name name of the entry.
1130 * @return the ZipArchiveEntry corresponding to the given name - or {@code null} if not present.
1131 */
1132 public ZipArchiveEntry getEntry(final String name) {
1133 final LinkedList<ZipArchiveEntry> entries = nameMap.get(name);
1134 return entries != null ? entries.getFirst() : null;
1135 }
1136
1137 /**
1138 * Gets the offset of the first local file header in the file.
1139 *
1140 * @return the length of the content before the first local file header
1141 * @since 1.23
1142 */
1143 public long getFirstLocalFileHeaderOffset() {
1144 return firstLocalFileHeaderOffset;
1145 }
1146
1147 /**
1148 * Gets an InputStream for reading the contents of the given entry.
1149 *
1150 * @param entry the entry to get the stream for.
1151 * @return a stream to read the entry from. The returned stream implements {@link InputStreamStatistics}.
1152 * @throws IOException if unable to create an input stream from the zipEntry.
1153 */
1154 public InputStream getInputStream(final ZipArchiveEntry entry) throws IOException {
1155 if (!(entry instanceof Entry)) {
1156 return null;
1157 }
1158 // cast validity is checked just above
1159 ZipUtil.checkRequestedFeatures(entry);
1160
1161 // doesn't get closed if the method is not supported - which
1162 // should never happen because of the checkRequestedFeatures
1163 // call above
1164 final InputStream is = new BufferedInputStream(getRawInputStream(entry)); // NOSONAR
1165 switch (ZipMethod.getMethodByCode(entry.getMethod())) {
1166 case STORED:
1167 return new StoredStatisticsStream(is);
1168 case UNSHRINKING:
1169 return new UnshrinkingInputStream(is);
1170 case IMPLODING:
1171 try {
1172 return new ExplodingInputStream(entry.getGeneralPurposeBit().getSlidingDictionarySize(),
1173 entry.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), is);
1174 } catch (final IllegalArgumentException ex) {
1175 throw new IOException("bad IMPLODE data", ex);
1176 }
1177 case DEFLATED:
1178 final Inflater inflater = new Inflater(true);
1179 // Inflater with nowrap=true has this odd contract for a zero padding
1180 // byte following the data stream; this used to be zlib's requirement
1181 // and has been fixed a long time ago, but the contract persists so
1182 // we comply.
1183 // https://docs.oracle.com/javase/8/docs/api/java/util/zip/Inflater.html#Inflater(boolean)
1184 return new InflaterInputStreamWithStatistics(new SequenceInputStream(is, new ByteArrayInputStream(ONE_ZERO_BYTE)), inflater) {
1185 @Override
1186 public void close() throws IOException {
1187 try {
1188 super.close();
1189 } finally {
1190 inflater.end();
1191 }
1192 }
1193 };
1194 case BZIP2:
1195 return new BZip2CompressorInputStream(is);
1196 case ENHANCED_DEFLATED:
1197 return new Deflate64CompressorInputStream(is);
1198 case AES_ENCRYPTED:
1199 case EXPANDING_LEVEL_1:
1200 case EXPANDING_LEVEL_2:
1201 case EXPANDING_LEVEL_3:
1202 case EXPANDING_LEVEL_4:
1203 case JPEG:
1204 case LZMA:
1205 case PKWARE_IMPLODING:
1206 case PPMD:
1207 case TOKENIZATION:
1208 case UNKNOWN:
1209 case WAVPACK:
1210 case XZ:
1211 default:
1212 throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(entry.getMethod()), entry);
1213 }
1214 }
1215
1216 /**
1217 * Gets the raw stream of the archive entry (compressed form).
1218 * <p>
1219 * This method does not relate to how/if we understand the payload in the stream, since we really only intend to move it on to somewhere else.
1220 * </p>
1221 * <p>
1222 * Since version 1.22, this method will make an attempt to read the entry's data stream offset, even if the {@code ignoreLocalFileHeader} parameter was
1223 * {@code true} in the constructor. An IOException can also be thrown from the body of the method if this lookup fails for some reason.
1224 * </p>
1225 *
1226 * @param entry The entry to get the stream for
1227 * @return The raw input stream containing (possibly) compressed data.
1228 * @since 1.11
1229 * @throws IOException if there is a problem reading data offset (added in version 1.22).
1230 */
1231 public InputStream getRawInputStream(final ZipArchiveEntry entry) throws IOException {
1232 if (!(entry instanceof Entry)) {
1233 return null;
1234 }
1235 final long start = getDataOffset(entry);
1236 if (start == EntryStreamOffsets.OFFSET_UNKNOWN) {
1237 return null;
1238 }
1239 return createBoundedInputStream(start, entry.getCompressedSize());
1240 }
1241
1242 /**
1243 * Gets the entry's content as a String if isUnixSymlink() returns true for it, otherwise returns null.
1244 * <p>
1245 * This method assumes the symbolic link's file name uses the same encoding that as been specified for this ZipFile.
1246 * </p>
1247 *
1248 * @param entry ZipArchiveEntry object that represents the symbolic link
1249 * @return entry's content as a String
1250 * @throws IOException problem with content's input stream
1251 * @since 1.5
1252 */
1253 public String getUnixSymlink(final ZipArchiveEntry entry) throws IOException {
1254 if (entry != null && entry.isUnixSymlink()) {
1255 try (InputStream in = getInputStream(entry)) {
1256 return zipEncoding.decode(org.apache.commons.io.IOUtils.toByteArray(in));
1257 }
1258 }
1259 return null;
1260 }
1261
1262 /**
1263 * Reads the central directory of the given archive and populates the internal tables with ZipArchiveEntry instances.
1264 * <p>
1265 * The ZipArchiveEntrys will know all data that can be obtained from the central directory alone, but not the data that requires the local file header or
1266 * additional data to be read.
1267 * </p>
1268 *
1269 * @return a map of zip entries that didn't have the language encoding flag set when read.
1270 */
1271 private Map<ZipArchiveEntry, NameAndComment> populateFromCentralDirectory() throws IOException {
1272 final HashMap<ZipArchiveEntry, NameAndComment> noUTF8Flag = new HashMap<>();
1273
1274 positionAtCentralDirectory();
1275 centralDirectoryStartOffset = archive.position();
1276
1277 wordBbuf.rewind();
1278 IOUtils.readFully(archive, wordBbuf);
1279 long sig = ZipLong.getValue(wordBuf);
1280
1281 if (sig != CFH_SIG && startsWithLocalFileHeader()) {
1282 throw new IOException("Central directory is empty, can't expand" + " corrupt archive.");
1283 }
1284
1285 while (sig == CFH_SIG) {
1286 readCentralDirectoryEntry(noUTF8Flag);
1287 wordBbuf.rewind();
1288 IOUtils.readFully(archive, wordBbuf);
1289 sig = ZipLong.getValue(wordBuf);
1290 }
1291 return noUTF8Flag;
1292 }
1293
1294 /**
1295 * Searches for either the "Zip64 end of central directory locator" or the "End of central dir record", parses it and positions the
1296 * stream at the first central directory record.
1297 */
1298 private void positionAtCentralDirectory() throws IOException {
1299 final boolean is64 = positionAtEndOfCentralDirectoryRecord(archive);
1300 if (!is64) {
1301 positionAtCentralDirectory32();
1302 } else {
1303 positionAtCentralDirectory64();
1304 }
1305 }
1306
1307 /**
1308 * Parses the "End of central dir record" and positions the stream at the first central directory record.
1309 *
1310 * Expects stream to be positioned at the beginning of the "End of central dir record".
1311 */
1312 private void positionAtCentralDirectory32() throws IOException {
1313 final long endOfCentralDirectoryRecordOffset = archive.position();
1314 if (isSplitZipArchive) {
1315 skipBytes(CFD_DISK_OFFSET);
1316 shortBbuf.rewind();
1317 IOUtils.readFully(archive, shortBbuf);
1318 centralDirectoryStartDiskNumber = ZipShort.getValue(shortBuf);
1319
1320 skipBytes(CFD_LOCATOR_RELATIVE_OFFSET);
1321
1322 wordBbuf.rewind();
1323 IOUtils.readFully(archive, wordBbuf);
1324 centralDirectoryStartRelativeOffset = ZipLong.getValue(wordBuf);
1325 ((ZipSplitReadOnlySeekableByteChannel) archive).position(centralDirectoryStartDiskNumber, centralDirectoryStartRelativeOffset);
1326 } else {
1327 skipBytes(CFD_LENGTH_OFFSET);
1328 wordBbuf.rewind();
1329 IOUtils.readFully(archive, wordBbuf);
1330 final long centralDirectoryLength = ZipLong.getValue(wordBuf);
1331
1332 wordBbuf.rewind();
1333 IOUtils.readFully(archive, wordBbuf);
1334 centralDirectoryStartDiskNumber = 0;
1335 centralDirectoryStartRelativeOffset = ZipLong.getValue(wordBuf);
1336
1337 firstLocalFileHeaderOffset = Long.max(endOfCentralDirectoryRecordOffset - centralDirectoryLength - centralDirectoryStartRelativeOffset, 0L);
1338 archive.position(centralDirectoryStartRelativeOffset + firstLocalFileHeaderOffset);
1339 }
1340 }
1341
1342 /**
1343 * Parses the "Zip64 end of central directory locator", finds the "Zip64 end of central directory record" using the parsed information,
1344 * parses that and positions the stream at the first central directory record.
1345 *
1346 * Expects stream to be positioned right behind the "Zip64 end of central directory locator"'s signature.
1347 */
1348 private void positionAtCentralDirectory64() throws IOException {
1349 skipBytes(ZipConstants.WORD);
1350 if (isSplitZipArchive) {
1351 wordBbuf.rewind();
1352 IOUtils.readFully(archive, wordBbuf);
1353 final long diskNumberOfEOCD = ZipLong.getValue(wordBuf);
1354
1355 dwordBbuf.rewind();
1356 IOUtils.readFully(archive, dwordBbuf);
1357 final long relativeOffsetOfEOCD = ZipEightByteInteger.getLongValue(dwordBuf);
1358 ((ZipSplitReadOnlySeekableByteChannel) archive).position(diskNumberOfEOCD, relativeOffsetOfEOCD);
1359 } else {
1360 skipBytes(ZIP64_EOCDL_LOCATOR_OFFSET - ZipConstants.WORD /* signature has already been read */);
1361 dwordBbuf.rewind();
1362 IOUtils.readFully(archive, dwordBbuf);
1363 archive.position(ZipEightByteInteger.getLongValue(dwordBuf));
1364 }
1365
1366 wordBbuf.rewind();
1367 IOUtils.readFully(archive, wordBbuf);
1368 if (!Arrays.equals(wordBuf, ZipArchiveOutputStream.ZIP64_EOCD_SIG)) {
1369 throw new ZipException("Archive's ZIP64 end of central directory locator is corrupt.");
1370 }
1371
1372 if (isSplitZipArchive) {
1373 skipBytes(ZIP64_EOCD_CFD_DISK_OFFSET - ZipConstants.WORD /* signature has already been read */);
1374 wordBbuf.rewind();
1375 IOUtils.readFully(archive, wordBbuf);
1376 centralDirectoryStartDiskNumber = ZipLong.getValue(wordBuf);
1377
1378 skipBytes(ZIP64_EOCD_CFD_LOCATOR_RELATIVE_OFFSET);
1379
1380 dwordBbuf.rewind();
1381 IOUtils.readFully(archive, dwordBbuf);
1382 centralDirectoryStartRelativeOffset = ZipEightByteInteger.getLongValue(dwordBuf);
1383 ((ZipSplitReadOnlySeekableByteChannel) archive).position(centralDirectoryStartDiskNumber, centralDirectoryStartRelativeOffset);
1384 } else {
1385 skipBytes(ZIP64_EOCD_CFD_LOCATOR_OFFSET - ZipConstants.WORD /* signature has already been read */);
1386 dwordBbuf.rewind();
1387 IOUtils.readFully(archive, dwordBbuf);
1388 centralDirectoryStartDiskNumber = 0;
1389 centralDirectoryStartRelativeOffset = ZipEightByteInteger.getLongValue(dwordBuf);
1390 archive.position(centralDirectoryStartRelativeOffset);
1391 }
1392 }
1393
1394 /**
1395 * Reads an individual entry of the central directory, creates an ZipArchiveEntry from it and adds it to the global maps.
1396 *
1397 * @param noUTF8Flag map used to collect entries that don't have their UTF-8 flag set and whose name will be set by data read from the local file header
1398 * later. The current entry may be added to this map.
1399 */
1400 private void readCentralDirectoryEntry(final Map<ZipArchiveEntry, NameAndComment> noUTF8Flag) throws IOException {
1401 cfhBbuf.rewind();
1402 IOUtils.readFully(archive, cfhBbuf);
1403 int off = 0;
1404 final Entry ze = new Entry();
1405
1406 final int versionMadeBy = ZipShort.getValue(cfhBuf, off);
1407 off += ZipConstants.SHORT;
1408 ze.setVersionMadeBy(versionMadeBy);
1409 ze.setPlatform(versionMadeBy >> BYTE_SHIFT & NIBLET_MASK);
1410
1411 ze.setVersionRequired(ZipShort.getValue(cfhBuf, off));
1412 off += ZipConstants.SHORT; // version required
1413
1414 final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(cfhBuf, off);
1415 final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames();
1416 final ZipEncoding entryEncoding = hasUTF8Flag ? ZipEncodingHelper.ZIP_ENCODING_UTF_8 : zipEncoding;
1417 if (hasUTF8Flag) {
1418 ze.setNameSource(ZipArchiveEntry.NameSource.NAME_WITH_EFS_FLAG);
1419 }
1420 ze.setGeneralPurposeBit(gpFlag);
1421 ze.setRawFlag(ZipShort.getValue(cfhBuf, off));
1422
1423 off += ZipConstants.SHORT;
1424
1425 // noinspection MagicConstant
1426 ze.setMethod(ZipShort.getValue(cfhBuf, off));
1427 off += ZipConstants.SHORT;
1428
1429 final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(cfhBuf, off));
1430 ze.setTime(time);
1431 off += ZipConstants.WORD;
1432
1433 ze.setCrc(ZipLong.getValue(cfhBuf, off));
1434 off += ZipConstants.WORD;
1435
1436 long size = ZipLong.getValue(cfhBuf, off);
1437 if (size < 0) {
1438 throw new IOException("broken archive, entry with negative compressed size");
1439 }
1440 ze.setCompressedSize(size);
1441 off += ZipConstants.WORD;
1442
1443 size = ZipLong.getValue(cfhBuf, off);
1444 if (size < 0) {
1445 throw new IOException("broken archive, entry with negative size");
1446 }
1447 ze.setSize(size);
1448 off += ZipConstants.WORD;
1449
1450 final int fileNameLen = ZipShort.getValue(cfhBuf, off);
1451 off += ZipConstants.SHORT;
1452 if (fileNameLen < 0) {
1453 throw new IOException("broken archive, entry with negative fileNameLen");
1454 }
1455
1456 final int extraLen = ZipShort.getValue(cfhBuf, off);
1457 off += ZipConstants.SHORT;
1458 if (extraLen < 0) {
1459 throw new IOException("broken archive, entry with negative extraLen");
1460 }
1461
1462 final int commentLen = ZipShort.getValue(cfhBuf, off);
1463 off += ZipConstants.SHORT;
1464 if (commentLen < 0) {
1465 throw new IOException("broken archive, entry with negative commentLen");
1466 }
1467
1468 ze.setDiskNumberStart(ZipShort.getValue(cfhBuf, off));
1469 off += ZipConstants.SHORT;
1470
1471 ze.setInternalAttributes(ZipShort.getValue(cfhBuf, off));
1472 off += ZipConstants.SHORT;
1473
1474 ze.setExternalAttributes(ZipLong.getValue(cfhBuf, off));
1475 off += ZipConstants.WORD;
1476
1477 final byte[] fileName = IOUtils.readRange(archive, fileNameLen);
1478 if (fileName.length < fileNameLen) {
1479 throw new EOFException();
1480 }
1481 ze.setName(entryEncoding.decode(fileName), fileName);
1482
1483 // LFH offset,
1484 ze.setLocalHeaderOffset(ZipLong.getValue(cfhBuf, off) + firstLocalFileHeaderOffset);
1485 // data offset will be filled later
1486 entries.add(ze);
1487
1488 final byte[] cdExtraData = IOUtils.readRange(archive, extraLen);
1489 if (cdExtraData.length < extraLen) {
1490 throw new EOFException();
1491 }
1492 try {
1493 ze.setCentralDirectoryExtra(cdExtraData);
1494 } catch (final RuntimeException e) {
1495 final ZipException z = new ZipException("Invalid extra data in entry " + ze.getName());
1496 z.initCause(e);
1497 throw z;
1498 }
1499
1500 setSizesAndOffsetFromZip64Extra(ze);
1501 sanityCheckLFHOffset(ze);
1502
1503 final byte[] comment = IOUtils.readRange(archive, commentLen);
1504 if (comment.length < commentLen) {
1505 throw new EOFException();
1506 }
1507 ze.setComment(entryEncoding.decode(comment));
1508
1509 if (!hasUTF8Flag && useUnicodeExtraFields) {
1510 noUTF8Flag.put(ze, new NameAndComment(fileName, comment));
1511 }
1512
1513 ze.setStreamContiguous(true);
1514 }
1515
1516 /**
1517 * Walks through all recorded entries and adds the data available from the local file header.
1518 * <p>
1519 * Also records the offsets for the data to read from the entries.
1520 * </p>
1521 */
1522 private void resolveLocalFileHeaderData(final Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag) throws IOException {
1523 for (final ZipArchiveEntry zipArchiveEntry : entries) {
1524 // entries are filled in populateFromCentralDirectory and never modified
1525 final Entry ze = (Entry) zipArchiveEntry;
1526 final int[] lens = setDataOffset(ze);
1527 final int fileNameLen = lens[0];
1528 final int extraFieldLen = lens[1];
1529 skipBytes(fileNameLen);
1530 final byte[] localExtraData = IOUtils.readRange(archive, extraFieldLen);
1531 if (localExtraData.length < extraFieldLen) {
1532 throw new EOFException();
1533 }
1534 try {
1535 ze.setExtra(localExtraData);
1536 } catch (final RuntimeException e) {
1537 final ZipException z = new ZipException("Invalid extra data in entry " + ze.getName());
1538 z.initCause(e);
1539 throw z;
1540 }
1541
1542 if (entriesWithoutUTF8Flag.containsKey(ze)) {
1543 final NameAndComment nc = entriesWithoutUTF8Flag.get(ze);
1544 ZipUtil.setNameAndCommentFromExtraFields(ze, nc.name, nc.comment);
1545 }
1546 }
1547 }
1548
1549 private void sanityCheckLFHOffset(final ZipArchiveEntry entry) throws IOException {
1550 if (entry.getDiskNumberStart() < 0) {
1551 throw new IOException("broken archive, entry with negative disk number");
1552 }
1553 if (entry.getLocalHeaderOffset() < 0) {
1554 throw new IOException("broken archive, entry with negative local file header offset");
1555 }
1556 if (isSplitZipArchive) {
1557 if (entry.getDiskNumberStart() > centralDirectoryStartDiskNumber) {
1558 throw new IOException("local file header for " + entry.getName() + " starts on a later disk than central directory");
1559 }
1560 if (entry.getDiskNumberStart() == centralDirectoryStartDiskNumber && entry.getLocalHeaderOffset() > centralDirectoryStartRelativeOffset) {
1561 throw new IOException("local file header for " + entry.getName() + " starts after central directory");
1562 }
1563 } else if (entry.getLocalHeaderOffset() > centralDirectoryStartOffset) {
1564 throw new IOException("local file header for " + entry.getName() + " starts after central directory");
1565 }
1566 }
1567
1568 private int[] setDataOffset(final ZipArchiveEntry entry) throws IOException {
1569 long offset = entry.getLocalHeaderOffset();
1570 if (isSplitZipArchive) {
1571 ((ZipSplitReadOnlySeekableByteChannel) archive).position(entry.getDiskNumberStart(), offset + LFH_OFFSET_FOR_FILENAME_LENGTH);
1572 // the offset should be updated to the global offset
1573 offset = archive.position() - LFH_OFFSET_FOR_FILENAME_LENGTH;
1574 } else {
1575 archive.position(offset + LFH_OFFSET_FOR_FILENAME_LENGTH);
1576 }
1577 wordBbuf.rewind();
1578 IOUtils.readFully(archive, wordBbuf);
1579 wordBbuf.flip();
1580 wordBbuf.get(shortBuf);
1581 final int fileNameLen = ZipShort.getValue(shortBuf);
1582 wordBbuf.get(shortBuf);
1583 final int extraFieldLen = ZipShort.getValue(shortBuf);
1584 entry.setDataOffset(offset + LFH_OFFSET_FOR_FILENAME_LENGTH + ZipConstants.SHORT + ZipConstants.SHORT + fileNameLen + extraFieldLen);
1585 if (entry.getDataOffset() + entry.getCompressedSize() > centralDirectoryStartOffset) {
1586 throw new IOException("data for " + entry.getName() + " overlaps with central directory.");
1587 }
1588 return new int[] { fileNameLen, extraFieldLen };
1589 }
1590
1591 /**
1592 * If the entry holds a Zip64 extended information extra field, read sizes from there if the entry's sizes are set to 0xFFFFFFFFF, do the same for the
1593 * offset of the local file header.
1594 * <p>
1595 * Ensures the Zip64 extra either knows both compressed and uncompressed size or neither of both as the internal logic in ExtraFieldUtils forces the field
1596 * to create local header data even if they are never used - and here a field with only one size would be invalid.
1597 * </p>
1598 */
1599 private void setSizesAndOffsetFromZip64Extra(final ZipArchiveEntry entry) throws IOException {
1600 final ZipExtraField extra = entry.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID);
1601 if (extra != null && !(extra instanceof Zip64ExtendedInformationExtraField)) {
1602 throw new ZipException("archive contains unparseable zip64 extra field");
1603 }
1604 final Zip64ExtendedInformationExtraField z64 = (Zip64ExtendedInformationExtraField) extra;
1605 if (z64 != null) {
1606 final boolean hasUncompressedSize = entry.getSize() == ZipConstants.ZIP64_MAGIC;
1607 final boolean hasCompressedSize = entry.getCompressedSize() == ZipConstants.ZIP64_MAGIC;
1608 final boolean hasRelativeHeaderOffset = entry.getLocalHeaderOffset() == ZipConstants.ZIP64_MAGIC;
1609 final boolean hasDiskStart = entry.getDiskNumberStart() == ZipConstants.ZIP64_MAGIC_SHORT;
1610 z64.reparseCentralDirectoryData(hasUncompressedSize, hasCompressedSize, hasRelativeHeaderOffset, hasDiskStart);
1611
1612 if (hasUncompressedSize) {
1613 final long size = z64.getSize().getLongValue();
1614 if (size < 0) {
1615 throw new IOException("broken archive, entry with negative size");
1616 }
1617 entry.setSize(size);
1618 } else if (hasCompressedSize) {
1619 z64.setSize(new ZipEightByteInteger(entry.getSize()));
1620 }
1621
1622 if (hasCompressedSize) {
1623 final long size = z64.getCompressedSize().getLongValue();
1624 if (size < 0) {
1625 throw new IOException("broken archive, entry with negative compressed size");
1626 }
1627 entry.setCompressedSize(size);
1628 } else if (hasUncompressedSize) {
1629 z64.setCompressedSize(new ZipEightByteInteger(entry.getCompressedSize()));
1630 }
1631
1632 if (hasRelativeHeaderOffset) {
1633 entry.setLocalHeaderOffset(z64.getRelativeHeaderOffset().getLongValue());
1634 }
1635
1636 if (hasDiskStart) {
1637 entry.setDiskNumberStart(z64.getDiskStartNumber().getValue());
1638 }
1639 }
1640 }
1641
1642 /**
1643 * Skips the given number of bytes or throws an EOFException if skipping failed.
1644 */
1645 private void skipBytes(final int count) throws IOException {
1646 final long currentPosition = archive.position();
1647 final long newPosition = currentPosition + count;
1648 if (newPosition > archive.size()) {
1649 throw new EOFException();
1650 }
1651 archive.position(newPosition);
1652 }
1653
1654 /**
1655 * Sorts entries in place by offset.
1656 *
1657 * @param allEntries entries to sort
1658 * @return the given entries, sorted.
1659 */
1660 private ZipArchiveEntry[] sortByOffset(final ZipArchiveEntry[] allEntries) {
1661 Arrays.sort(allEntries, offsetComparator);
1662 return allEntries;
1663 }
1664
1665 /**
1666 * Checks whether the archive starts with an LFH. If it doesn't, it may be an empty archive.
1667 */
1668 private boolean startsWithLocalFileHeader() throws IOException {
1669 archive.position(firstLocalFileHeaderOffset);
1670 wordBbuf.rewind();
1671 IOUtils.readFully(archive, wordBbuf);
1672 return Arrays.equals(wordBuf, ZipArchiveOutputStream.LFH_SIG);
1673 }
1674 }
1675