001/*
002 *  Licensed to the Apache Software Foundation (ASF) under one or more
003 *  contributor license agreements.  See the NOTICE file distributed with
004 *  this work for additional information regarding copyright ownership.
005 *  The ASF licenses this file to You under the Apache License, Version 2.0
006 *  (the "License"); you may not use this file except in compliance with
007 *  the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 *  Unless required by applicable law or agreed to in writing, software
012 *  distributed under the License is distributed on an "AS IS" BASIS,
013 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 *  See the License for the specific language governing permissions and
015 *  limitations under the License.
016 */
017package org.apache.commons.compress.archivers.zip;
018
019import java.io.BufferedInputStream;
020import java.io.ByteArrayInputStream;
021import java.io.Closeable;
022import java.io.EOFException;
023import java.io.File;
024import java.io.IOException;
025import java.io.InputStream;
026import java.io.SequenceInputStream;
027import java.nio.ByteBuffer;
028import java.nio.channels.FileChannel;
029import java.nio.channels.SeekableByteChannel;
030import java.nio.file.Files;
031import java.nio.file.Path;
032import java.nio.file.StandardOpenOption;
033import java.util.Arrays;
034import java.util.Collections;
035import java.util.Comparator;
036import java.util.EnumSet;
037import java.util.Enumeration;
038import java.util.HashMap;
039import java.util.LinkedList;
040import java.util.List;
041import java.util.Map;
042import java.util.zip.Inflater;
043import java.util.zip.ZipException;
044
045import org.apache.commons.compress.archivers.EntryStreamOffsets;
046import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
047import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream;
048import org.apache.commons.compress.utils.BoundedArchiveInputStream;
049import org.apache.commons.compress.utils.BoundedSeekableByteChannelInputStream;
050import org.apache.commons.compress.utils.CharsetNames;
051import org.apache.commons.compress.utils.CountingInputStream;
052import org.apache.commons.compress.utils.IOUtils;
053import org.apache.commons.compress.utils.InputStreamStatistics;
054
055/**
056 * Replacement for {@code java.util.ZipFile}.
057 *
058 * <p>
059 * This class adds support for file name encodings other than UTF-8 (which is required to work on ZIP files created by native ZIP tools and is able to skip a
060 * preamble like the one found in self extracting archives. Furthermore it returns instances of
061 * {@code org.apache.commons.compress.archivers.zip.ZipArchiveEntry} instead of {@code java.util.zip.ZipEntry}.
062 * </p>
063 *
064 * <p>
065 * It doesn't extend {@code java.util.zip.ZipFile} as it would have to reimplement all methods anyway. Like {@code java.util.ZipFile}, it uses
066 * SeekableByteChannel under the covers and supports compressed and uncompressed entries. As of Apache Commons Compress 1.3 it also transparently supports Zip64
067 * extensions and thus individual entries and archives larger than 4 GB or with more than 65536 entries.
068 * </p>
069 *
070 * <p>
071 * The method signatures mimic the ones of {@code java.util.zip.ZipFile}, with a couple of exceptions:
072 *
073 * <ul>
074 * <li>There is no getName method.</li>
075 * <li>entries has been renamed to getEntries.</li>
076 * <li>getEntries and getEntry return {@code org.apache.commons.compress.archivers.zip.ZipArchiveEntry} instances.</li>
077 * <li>close is allowed to throw IOException.</li>
078 * </ul>
079 */
080public class ZipFile implements Closeable {
081
082    /**
083     * Lock-free implementation of BoundedInputStream. The implementation uses positioned reads on the underlying archive file channel and therefore performs
084     * significantly faster in concurrent environment.
085     */
086    private static class BoundedFileChannelInputStream extends BoundedArchiveInputStream {
087        private final FileChannel archive;
088
089        BoundedFileChannelInputStream(final long start, final long remaining, final FileChannel archive) {
090            super(start, remaining);
091            this.archive = archive;
092        }
093
094        @Override
095        protected int read(final long pos, final ByteBuffer buf) throws IOException {
096            final int read = archive.read(buf, pos);
097            buf.flip();
098            return read;
099        }
100    }
101
102    /**
103     * Extends ZipArchiveEntry to store the offset within the archive.
104     */
105    private static final class Entry extends ZipArchiveEntry {
106
107        @Override
108        public boolean equals(final Object other) {
109            if (super.equals(other)) {
110                // super.equals would return false if other were not an Entry
111                final Entry otherEntry = (Entry) other;
112                return getLocalHeaderOffset() == otherEntry.getLocalHeaderOffset() && super.getDataOffset() == otherEntry.getDataOffset()
113                        && super.getDiskNumberStart() == otherEntry.getDiskNumberStart();
114            }
115            return false;
116        }
117
118        @Override
119        public int hashCode() {
120            return 3 * super.hashCode() + (int) getLocalHeaderOffset() + (int) (getLocalHeaderOffset() >> 32);
121        }
122    }
123
124    private static final class NameAndComment {
125        private final byte[] name;
126        private final byte[] comment;
127
128        private NameAndComment(final byte[] name, final byte[] comment) {
129            this.name = name;
130            this.comment = comment;
131        }
132    }
133
134    private static final class StoredStatisticsStream extends CountingInputStream implements InputStreamStatistics {
135        StoredStatisticsStream(final InputStream in) {
136            super(in);
137        }
138
139        @Override
140        public long getCompressedCount() {
141            return super.getBytesRead();
142        }
143
144        @Override
145        public long getUncompressedCount() {
146            return getCompressedCount();
147        }
148    }
149
150    private static final int HASH_SIZE = 509;
151    static final int NIBLET_MASK = 0x0f;
152    static final int BYTE_SHIFT = 8;
153    private static final int POS_0 = 0;
154    private static final int POS_1 = 1;
155    private static final int POS_2 = 2;
156    private static final int POS_3 = 3;
157    private static final byte[] ONE_ZERO_BYTE = new byte[1];
158
159    /**
160     * Length of a "central directory" entry structure without file name, extra fields or comment.
161     */
162    private static final int CFH_LEN =
163    // @formatter:off
164        /* version made by                 */ ZipConstants.SHORT
165        /* version needed to extract       */ + ZipConstants.SHORT
166        /* general purpose bit flag        */ + ZipConstants.SHORT
167        /* compression method              */ + ZipConstants.SHORT
168        /* last mod file time              */ + ZipConstants.SHORT
169        /* last mod file date              */ + ZipConstants.SHORT
170        /* crc-32                          */ + ZipConstants.WORD
171        /* compressed size                 */ + ZipConstants.WORD
172        /* uncompressed size               */ + ZipConstants.WORD
173        /* file name length                */ + ZipConstants. SHORT
174        /* extra field length              */ + ZipConstants.SHORT
175        /* file comment length             */ + ZipConstants.SHORT
176        /* disk number start               */ + ZipConstants.SHORT
177        /* internal file attributes        */ + ZipConstants.SHORT
178        /* external file attributes        */ + ZipConstants.WORD
179        /* relative offset of local header */ + ZipConstants.WORD;
180    // @formatter:on
181
182    private static final long CFH_SIG = ZipLong.getValue(ZipArchiveOutputStream.CFH_SIG);
183
184    /**
185     * Length of the "End of central directory record" - which is supposed to be the last structure of the archive - without file comment.
186     */
187    static final int MIN_EOCD_SIZE =
188    // @formatter:off
189        /* end of central dir signature    */ ZipConstants.WORD
190        /* number of this disk             */ + ZipConstants.SHORT
191        /* number of the disk with the     */
192        /* start of the central directory  */ + ZipConstants.SHORT
193        /* total number of entries in      */
194        /* the central dir on this disk    */ + ZipConstants.SHORT
195        /* total number of entries in      */
196        /* the central dir                 */ + ZipConstants.SHORT
197        /* size of the central directory   */ + ZipConstants.WORD
198        /* offset of start of central      */
199        /* directory with respect to       */
200        /* the starting disk number        */ + ZipConstants.WORD
201        /* ZIP file comment length         */ + ZipConstants.SHORT;
202    // @formatter:on
203
204    /**
205     * Maximum length of the "End of central directory record" with a file comment.
206     */
207    private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE
208    // @formatter:off
209        /* maximum length of ZIP file comment */ + ZipConstants.ZIP64_MAGIC_SHORT;
210    // @formatter:on
211
212    /**
213     * Offset of the field that holds the location of the length of the central directory inside the "End of central directory record" relative to the start of
214     * the "End of central directory record".
215     */
216    private static final int CFD_LENGTH_OFFSET =
217    // @formatter:off
218        /* end of central dir signature    */ ZipConstants.WORD
219        /* number of this disk             */ + ZipConstants.SHORT
220        /* number of the disk with the     */
221        /* start of the central directory  */ + ZipConstants.SHORT
222        /* total number of entries in      */
223        /* the central dir on this disk    */ + ZipConstants.SHORT
224        /* total number of entries in      */
225        /* the central dir                 */ + ZipConstants.SHORT;
226    // @formatter:on
227
228    /**
229     * Offset of the field that holds the disk number of the first central directory entry inside the "End of central directory record" relative to the start of
230     * the "End of central directory record".
231     */
232    private static final int CFD_DISK_OFFSET =
233    // @formatter:off
234            /* end of central dir signature    */ ZipConstants.WORD
235            /* number of this disk             */ + ZipConstants.SHORT;
236    // @formatter:on
237
238    /**
239     * Offset of the field that holds the location of the first central directory entry inside the "End of central directory record" relative to the "number of
240     * the disk with the start of the central directory".
241     */
242    private static final int CFD_LOCATOR_RELATIVE_OFFSET =
243    // @formatter:off
244            /* total number of entries in      */
245            /* the central dir on this disk    */ + ZipConstants.SHORT
246            /* total number of entries in      */
247            /* the central dir                 */ + ZipConstants.SHORT
248            /* size of the central directory   */ + ZipConstants.WORD;
249    // @formatter:on
250
251    /**
252     * Length of the "Zip64 end of central directory locator" - which should be right in front of the "end of central directory record" if one is present at
253     * all.
254     */
255    private static final int ZIP64_EOCDL_LENGTH =
256    // @formatter:off
257        /* zip64 end of central dir locator sig */ ZipConstants.WORD
258        /* number of the disk with the start    */
259        /* start of the zip64 end of            */
260        /* central directory                    */ + ZipConstants.WORD
261        /* relative offset of the zip64         */
262        /* end of central directory record      */ + ZipConstants.DWORD
263        /* total number of disks                */ + ZipConstants.WORD;
264    // @formatter:on
265
266    /**
267     * Offset of the field that holds the location of the "Zip64 end of central directory record" inside the "Zip64 end of central directory locator" relative
268     * to the start of the "Zip64 end of central directory locator".
269     */
270    private static final int ZIP64_EOCDL_LOCATOR_OFFSET =
271    // @formatter:off
272        /* zip64 end of central dir locator sig */ ZipConstants.WORD
273        /* number of the disk with the start    */
274        /* start of the zip64 end of            */
275        /* central directory                    */ + ZipConstants.WORD;
276    // @formatter:on
277
278    /**
279     * Offset of the field that holds the location of the first central directory entry inside the "Zip64 end of central directory record" relative to the start
280     * of the "Zip64 end of central directory record".
281     */
282    private static final int ZIP64_EOCD_CFD_LOCATOR_OFFSET =
283    // @formatter:off
284        /* zip64 end of central dir        */
285        /* signature                       */ ZipConstants.WORD
286        /* size of zip64 end of central    */
287        /* directory record                */ + ZipConstants.DWORD
288        /* version made by                 */ + ZipConstants.SHORT
289        /* version needed to extract       */ + ZipConstants.SHORT
290        /* number of this disk             */ + ZipConstants.WORD
291        /* number of the disk with the     */
292        /* start of the central directory  */ + ZipConstants.WORD
293        /* total number of entries in the  */
294        /* central directory on this disk  */ + ZipConstants.DWORD
295        /* total number of entries in the  */
296        /* central directory               */ + ZipConstants.DWORD
297        /* size of the central directory   */ + ZipConstants.DWORD;
298    // @formatter:on
299
300    /**
301     * Offset of the field that holds the disk number of the first central directory entry inside the "Zip64 end of central directory record" relative to the
302     * start of the "Zip64 end of central directory record".
303     */
304    private static final int ZIP64_EOCD_CFD_DISK_OFFSET =
305    // @formatter:off
306            /* zip64 end of central dir        */
307            /* signature                       */ ZipConstants.WORD
308            /* size of zip64 end of central    */
309            /* directory record                */ + ZipConstants.DWORD
310            /* version made by                 */ + ZipConstants.SHORT
311            /* version needed to extract       */ + ZipConstants.SHORT
312            /* number of this disk             */ + ZipConstants.WORD;
313    // @formatter:on
314
315    /**
316     * Offset of the field that holds the location of the first central directory entry inside the "Zip64 end of central directory record" relative to the
317     * "number of the disk with the start of the central directory".
318     */
319    private static final int ZIP64_EOCD_CFD_LOCATOR_RELATIVE_OFFSET =
320    // @formatter:off
321            /* total number of entries in the  */
322            /* central directory on this disk  */ ZipConstants.DWORD
323            /* total number of entries in the  */
324            /* central directory               */ + ZipConstants.DWORD
325            /* size of the central directory   */ + ZipConstants.DWORD;
326    // @formatter:on
327
328    /**
329     * Number of bytes in local file header up to the &quot;length of file name&quot; entry.
330     */
331    private static final long LFH_OFFSET_FOR_FILENAME_LENGTH =
332    // @formatter:off
333        /* local file header signature     */ ZipConstants.WORD
334        /* version needed to extract       */ + ZipConstants.SHORT
335        /* general purpose bit flag        */ + ZipConstants.SHORT
336        /* compression method              */ + ZipConstants.SHORT
337        /* last mod file time              */ + ZipConstants.SHORT
338        /* last mod file date              */ + ZipConstants.SHORT
339        /* crc-32                          */ + ZipConstants.WORD
340        /* compressed size                 */ + ZipConstants.WORD
341        /* uncompressed size               */ + (long) ZipConstants.WORD;
342    // @formatter:on
343
344    /**
345     * Compares two ZipArchiveEntries based on their offset within the archive.
346     *
347     * <p>
348     * Won't return any meaningful results if one of the entries isn't part of the archive at all.
349     * </p>
350     *
351     * @since 1.1
352     */
353    private static final Comparator<ZipArchiveEntry> offsetComparator = Comparator.comparingLong(ZipArchiveEntry::getDiskNumberStart)
354            .thenComparingLong(ZipArchiveEntry::getLocalHeaderOffset);
355
356    /**
357     * Closes a ZIP file quietly; throwing no IOException, does nothing on null input.
358     *
359     * @param zipFile file to close, can be null
360     */
361    public static void closeQuietly(final ZipFile zipFile) {
362        IOUtils.closeQuietly(zipFile);
363    }
364
365    /**
366     * List of entries in the order they appear inside the central directory.
367     */
368    private final List<ZipArchiveEntry> entries = new LinkedList<>();
369
370    /**
371     * Maps String to list of ZipArchiveEntrys, name -> actual entries.
372     */
373    private final Map<String, LinkedList<ZipArchiveEntry>> nameMap = new HashMap<>(HASH_SIZE);
374
375    /**
376     * The encoding to use for file names and the file comment.
377     *
378     * <p>
379     * For a list of possible values see <a href="Supported Encodings">https://docs.oracle.com/javase/8/docs/technotes/guides/intl/encoding.doc.html</a>.
380     * Defaults to UTF-8.
381     * </p>
382     */
383    private final String encoding;
384
385    /**
386     * The ZIP encoding to use for file names and the file comment.
387     */
388    private final ZipEncoding zipEncoding;
389
390    /**
391     * File name of actual source.
392     */
393    private final String archiveName;
394
395    /**
396     * The actual data source.
397     */
398    private final SeekableByteChannel archive;
399
400    /**
401     * Whether to look for and use Unicode extra fields.
402     */
403    private final boolean useUnicodeExtraFields;
404
405    /**
406     * Whether the file is closed.
407     */
408    private volatile boolean closed = true;
409
410    /**
411     * Whether the ZIP archive is a split ZIP archive
412     */
413    private final boolean isSplitZipArchive;
414
415    // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection)
416    private final byte[] dwordBuf = new byte[ZipConstants.DWORD];
417
418    private final byte[] wordBuf = new byte[ZipConstants.WORD];
419
420    private final byte[] cfhBuf = new byte[CFH_LEN];
421
422    private final byte[] shortBuf = new byte[ZipConstants.SHORT];
423
424    private final ByteBuffer dwordBbuf = ByteBuffer.wrap(dwordBuf);
425
426    private final ByteBuffer wordBbuf = ByteBuffer.wrap(wordBuf);
427
428    private final ByteBuffer cfhBbuf = ByteBuffer.wrap(cfhBuf);
429
430    private final ByteBuffer shortBbuf = ByteBuffer.wrap(shortBuf);
431
432    private long centralDirectoryStartDiskNumber, centralDirectoryStartRelativeOffset;
433
434    private long centralDirectoryStartOffset;
435
436    private long firstLocalFileHeaderOffset;
437
438    /**
439     * Opens the given file for reading, assuming "UTF8" for file names.
440     *
441     * @param f the archive.
442     *
443     * @throws IOException if an error occurs while reading the file.
444     */
445    public ZipFile(final File f) throws IOException {
446        this(f, CharsetNames.UTF_8);
447    }
448
449    /**
450     * Opens the given file for reading, assuming the specified encoding for file names and scanning for Unicode extra fields.
451     *
452     * @param f        the archive.
453     * @param encoding the encoding to use for file names, use null for the platform's default encoding
454     *
455     * @throws IOException if an error occurs while reading the file.
456     */
457    public ZipFile(final File f, final String encoding) throws IOException {
458        this(f.toPath(), encoding, true);
459    }
460
461    /**
462     * Opens the given file for reading, assuming the specified encoding for file names.
463     *
464     * @param f                     the archive.
465     * @param encoding              the encoding to use for file names, use null for the platform's default encoding
466     * @param useUnicodeExtraFields whether to use InfoZIP Unicode Extra Fields (if present) to set the file names.
467     *
468     * @throws IOException if an error occurs while reading the file.
469     */
470    public ZipFile(final File f, final String encoding, final boolean useUnicodeExtraFields) throws IOException {
471        this(f.toPath(), encoding, useUnicodeExtraFields, false);
472    }
473
474    /**
475     * Opens the given file for reading, assuming the specified encoding for file names.
476     *
477     * <p>
478     * By default the central directory record and all local file headers of the archive will be read immediately which may take a considerable amount of time
479     * when the archive is big. The {@code ignoreLocalFileHeader} parameter can be set to {@code true} which restricts parsing to the central directory.
480     * Unfortunately the local file header may contain information not present inside of the central directory which will not be available when the argument is
481     * set to {@code true}. This includes the content of the Unicode extra field, so setting {@code
482     * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively.
483     * </p>
484     *
485     * @param f                     the archive.
486     * @param encoding              the encoding to use for file names, use null for the platform's default encoding
487     * @param useUnicodeExtraFields whether to use InfoZIP Unicode Extra Fields (if present) to set the file names.
488     * @param ignoreLocalFileHeader whether to ignore information stored inside the local file header (see the notes in this method's javadoc)
489     *
490     * @throws IOException if an error occurs while reading the file.
491     * @since 1.19
492     */
493    public ZipFile(final File f, final String encoding, final boolean useUnicodeExtraFields, final boolean ignoreLocalFileHeader) throws IOException {
494        this(Files.newByteChannel(f.toPath(), EnumSet.of(StandardOpenOption.READ)), f.getAbsolutePath(), encoding, useUnicodeExtraFields, true,
495                ignoreLocalFileHeader);
496    }
497
498    /**
499     * Opens the given path for reading, assuming "UTF8" for file names.
500     *
501     * @param path path to the archive.
502     * @throws IOException if an error occurs while reading the file.
503     * @since 1.22
504     */
505    public ZipFile(final Path path) throws IOException {
506        this(path, CharsetNames.UTF_8);
507    }
508
509    /**
510     * Opens the given path for reading, assuming the specified encoding for file names and scanning for Unicode extra fields.
511     *
512     * @param path     path to the archive.
513     * @param encoding the encoding to use for file names, use null for the platform's default encoding
514     * @throws IOException if an error occurs while reading the file.
515     * @since 1.22
516     */
517    public ZipFile(final Path path, final String encoding) throws IOException {
518        this(path, encoding, true);
519    }
520
521    /**
522     * Opens the given path for reading, assuming the specified encoding for file names.
523     *
524     * @param path                  path to the archive.
525     * @param encoding              the encoding to use for file names, use null for the platform's default encoding
526     * @param useUnicodeExtraFields whether to use InfoZIP Unicode Extra Fields (if present) to set the file names.
527     * @throws IOException if an error occurs while reading the file.
528     * @since 1.22
529     */
530    public ZipFile(final Path path, final String encoding, final boolean useUnicodeExtraFields) throws IOException {
531        this(path, encoding, useUnicodeExtraFields, false);
532    }
533
534    /**
535     * Opens the given path for reading, assuming the specified encoding for file names.
536     * <p>
537     * By default the central directory record and all local file headers of the archive will be read immediately which may take a considerable amount of time
538     * when the archive is big. The {@code ignoreLocalFileHeader} parameter can be set to {@code true} which restricts parsing to the central directory.
539     * Unfortunately the local file header may contain information not present inside of the central directory which will not be available when the argument is
540     * set to {@code true}. This includes the content of the Unicode extra field, so setting {@code
541     * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively.
542     * </p>
543     *
544     * @param path                  path to the archive.
545     * @param encoding              the encoding to use for file names, use null for the platform's default encoding
546     * @param useUnicodeExtraFields whether to use InfoZIP Unicode Extra Fields (if present) to set the file names.
547     * @param ignoreLocalFileHeader whether to ignore information stored inside the local file header (see the notes in this method's javadoc)
548     * @throws IOException if an error occurs while reading the file.
549     * @since 1.22
550     */
551    public ZipFile(final Path path, final String encoding, final boolean useUnicodeExtraFields, final boolean ignoreLocalFileHeader) throws IOException {
552        this(Files.newByteChannel(path, EnumSet.of(StandardOpenOption.READ)), path.toAbsolutePath().toString(), encoding, useUnicodeExtraFields, true,
553                ignoreLocalFileHeader);
554    }
555
556    /**
557     * Opens the given channel for reading, assuming "UTF8" for file names.
558     *
559     * <p>
560     * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive.
561     * </p>
562     *
563     * @param channel the archive.
564     *
565     * @throws IOException if an error occurs while reading the file.
566     * @since 1.13
567     */
568    public ZipFile(final SeekableByteChannel channel) throws IOException {
569        this(channel, "unknown archive", CharsetNames.UTF_8, true);
570    }
571
572    /**
573     * Opens the given channel for reading, assuming the specified encoding for file names.
574     *
575     * <p>
576     * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive.
577     * </p>
578     *
579     * @param channel  the archive.
580     * @param encoding the encoding to use for file names, use null for the platform's default encoding
581     *
582     * @throws IOException if an error occurs while reading the file.
583     * @since 1.13
584     */
585    public ZipFile(final SeekableByteChannel channel, final String encoding) throws IOException {
586        this(channel, "unknown archive", encoding, true);
587    }
588
589    /**
590     * Opens the given channel for reading, assuming the specified encoding for file names.
591     *
592     * <p>
593     * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive.
594     * </p>
595     *
596     * @param channel               the archive.
597     * @param archiveName           name of the archive, used for error messages only.
598     * @param encoding              the encoding to use for file names, use null for the platform's default encoding
599     * @param useUnicodeExtraFields whether to use InfoZIP Unicode Extra Fields (if present) to set the file names.
600     *
601     * @throws IOException if an error occurs while reading the file.
602     * @since 1.13
603     */
604    public ZipFile(final SeekableByteChannel channel, final String archiveName, final String encoding, final boolean useUnicodeExtraFields) throws IOException {
605        this(channel, archiveName, encoding, useUnicodeExtraFields, false, false);
606    }
607
608    /**
609     * Opens the given channel for reading, assuming the specified encoding for file names.
610     *
611     * <p>
612     * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive.
613     * </p>
614     *
615     * <p>
616     * By default the central directory record and all local file headers of the archive will be read immediately which may take a considerable amount of time
617     * when the archive is big. The {@code ignoreLocalFileHeader} parameter can be set to {@code true} which restricts parsing to the central directory.
618     * Unfortunately the local file header may contain information not present inside of the central directory which will not be available when the argument is
619     * set to {@code true}. This includes the content of the Unicode extra field, so setting {@code
620     * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively.
621     * </p>
622     *
623     * @param channel               the archive.
624     * @param archiveName           name of the archive, used for error messages only.
625     * @param encoding              the encoding to use for file names, use null for the platform's default encoding
626     * @param useUnicodeExtraFields whether to use InfoZIP Unicode Extra Fields (if present) to set the file names.
627     * @param ignoreLocalFileHeader whether to ignore information stored inside the local file header (see the notes in this method's javadoc)
628     *
629     * @throws IOException if an error occurs while reading the file.
630     * @since 1.19
631     */
632    public ZipFile(final SeekableByteChannel channel, final String archiveName, final String encoding, final boolean useUnicodeExtraFields,
633            final boolean ignoreLocalFileHeader) throws IOException {
634        this(channel, archiveName, encoding, useUnicodeExtraFields, false, ignoreLocalFileHeader);
635    }
636
637    private ZipFile(final SeekableByteChannel channel, final String archiveName, final String encoding, final boolean useUnicodeExtraFields,
638            final boolean closeOnError, final boolean ignoreLocalFileHeader) throws IOException {
639        isSplitZipArchive = channel instanceof ZipSplitReadOnlySeekableByteChannel;
640
641        this.archiveName = archiveName;
642        this.encoding = encoding;
643        this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
644        this.useUnicodeExtraFields = useUnicodeExtraFields;
645        archive = channel;
646        boolean success = false;
647        try {
648            final Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag = populateFromCentralDirectory();
649            if (!ignoreLocalFileHeader) {
650                resolveLocalFileHeaderData(entriesWithoutUTF8Flag);
651            }
652            fillNameMap();
653            success = true;
654        } catch (final IOException e) {
655            throw new IOException("Error on ZipFile " + archiveName, e);
656        } finally {
657            closed = !success;
658            if (!success && closeOnError) {
659                IOUtils.closeQuietly(archive);
660            }
661        }
662    }
663
664    /**
665     * Opens the given file for reading, assuming "UTF8".
666     *
667     * @param name name of the archive.
668     *
669     * @throws IOException if an error occurs while reading the file.
670     */
671    public ZipFile(final String name) throws IOException {
672        this(new File(name).toPath(), CharsetNames.UTF_8);
673    }
674
675    /**
676     * Opens the given file for reading, assuming the specified encoding for file names, scanning unicode extra fields.
677     *
678     * @param name     name of the archive.
679     * @param encoding the encoding to use for file names, use null for the platform's default encoding
680     *
681     * @throws IOException if an error occurs while reading the file.
682     */
683    public ZipFile(final String name, final String encoding) throws IOException {
684        this(new File(name).toPath(), encoding, true);
685    }
686
687    /**
688     * Whether this class is able to read the given entry.
689     *
690     * <p>
691     * May return false if it is set up to use encryption or a compression method that hasn't been implemented yet.
692     * </p>
693     *
694     * @since 1.1
695     * @param entry the entry
696     * @return whether this class is able to read the given entry.
697     */
698    public boolean canReadEntryData(final ZipArchiveEntry entry) {
699        return ZipUtil.canHandleEntryData(entry);
700    }
701
702    /**
703     * Closes the archive.
704     *
705     * @throws IOException if an error occurs closing the archive.
706     */
707    @Override
708    public void close() throws IOException {
709        // this flag is only written here and read in finalize() which
710        // can never be run in parallel.
711        // no synchronization needed.
712        closed = true;
713        archive.close();
714    }
715
716    /**
717     * Transfer selected entries from this ZIP file to a given #ZipArchiveOutputStream. Compression and all other attributes will be as in this file.
718     * <p>
719     * This method transfers entries based on the central directory of the ZIP file.
720     * </p>
721     *
722     * @param target    The zipArchiveOutputStream to write the entries to
723     * @param predicate A predicate that selects which entries to write
724     * @throws IOException on error
725     */
726    public void copyRawEntries(final ZipArchiveOutputStream target, final ZipArchiveEntryPredicate predicate) throws IOException {
727        final Enumeration<ZipArchiveEntry> src = getEntriesInPhysicalOrder();
728        while (src.hasMoreElements()) {
729            final ZipArchiveEntry entry = src.nextElement();
730            if (predicate.test(entry)) {
731                target.addRawArchiveEntry(entry, getRawInputStream(entry));
732            }
733        }
734    }
735
736    /**
737     * Creates new BoundedInputStream, according to implementation of underlying archive channel.
738     */
739    private BoundedArchiveInputStream createBoundedInputStream(final long start, final long remaining) {
740        if (start < 0 || remaining < 0 || start + remaining < start) {
741            throw new IllegalArgumentException("Corrupted archive, stream boundaries" + " are out of range");
742        }
743        return archive instanceof FileChannel ? new BoundedFileChannelInputStream(start, remaining, (FileChannel) archive)
744                : new BoundedSeekableByteChannelInputStream(start, remaining, archive);
745    }
746
747    private void fillNameMap() {
748        entries.forEach(ze -> {
749            // entries are filled in populateFromCentralDirectory and
750            // never modified
751            final String name = ze.getName();
752            final LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.computeIfAbsent(name, k -> new LinkedList<>());
753            entriesOfThatName.addLast(ze);
754        });
755    }
756
757    /**
758     * Ensures that the close method of this ZIP file is called when there are no more references to it.
759     *
760     * @see #close()
761     */
762    @Override
763    protected void finalize() throws Throwable {
764        try {
765            if (!closed) {
766                close();
767            }
768        } finally {
769            super.finalize();
770        }
771    }
772
773    /**
774     * Gets an InputStream for reading the content before the first local file header.
775     *
776     * @return null if there is no content before the first local file header. Otherwise, returns a stream to read the content before the first local file
777     *         header.
778     * @since 1.23
779     */
780    public InputStream getContentBeforeFirstLocalFileHeader() {
781        return firstLocalFileHeaderOffset == 0 ? null : createBoundedInputStream(0, firstLocalFileHeaderOffset);
782    }
783
784    private long getDataOffset(final ZipArchiveEntry ze) throws IOException {
785        final long s = ze.getDataOffset();
786        if (s == EntryStreamOffsets.OFFSET_UNKNOWN) {
787            setDataOffset(ze);
788            return ze.getDataOffset();
789        }
790        return s;
791    }
792
793    /**
794     * Gets the encoding to use for file names and the file comment.
795     *
796     * @return null if using the platform's default character encoding.
797     */
798    public String getEncoding() {
799        return encoding;
800    }
801
802    /**
803     * Gets all entries.
804     *
805     * <p>
806     * Entries will be returned in the same order they appear within the archive's central directory.
807     * </p>
808     *
809     * @return all entries as {@link ZipArchiveEntry} instances
810     */
811    public Enumeration<ZipArchiveEntry> getEntries() {
812        return Collections.enumeration(entries);
813    }
814
815    /**
816     * Gets all named entries in the same order they appear within the archive's central directory.
817     *
818     * @param name name of the entry.
819     * @return the Iterable&lt;ZipArchiveEntry&gt; corresponding to the given name
820     * @since 1.6
821     */
822    public Iterable<ZipArchiveEntry> getEntries(final String name) {
823        return nameMap.getOrDefault(name, ZipArchiveEntry.EMPTY_LINKED_LIST);
824    }
825
826    /**
827     * Gets all entries in physical order.
828     *
829     * <p>
830     * Entries will be returned in the same order their contents appear within the archive.
831     * </p>
832     *
833     * @return all entries as {@link ZipArchiveEntry} instances
834     *
835     * @since 1.1
836     */
837    public Enumeration<ZipArchiveEntry> getEntriesInPhysicalOrder() {
838        final ZipArchiveEntry[] allEntries = entries.toArray(ZipArchiveEntry.EMPTY_ARRAY);
839        return Collections.enumeration(Arrays.asList(sortByOffset(allEntries)));
840    }
841
842    /**
843     * Gets all named entries in the same order their contents appear within the archive.
844     *
845     * @param name name of the entry.
846     * @return the Iterable&lt;ZipArchiveEntry&gt; corresponding to the given name
847     * @since 1.6
848     */
849    public Iterable<ZipArchiveEntry> getEntriesInPhysicalOrder(final String name) {
850        final LinkedList<ZipArchiveEntry> linkedList = nameMap.getOrDefault(name, ZipArchiveEntry.EMPTY_LINKED_LIST);
851        return Arrays.asList(sortByOffset(linkedList.toArray(ZipArchiveEntry.EMPTY_ARRAY)));
852    }
853
854    /**
855     * Gets a named entry or {@code null} if no entry by that name exists.
856     *
857     * <p>
858     * If multiple entries with the same name exist the first entry in the archive's central directory by that name is returned.
859     * </p>
860     *
861     * @param name name of the entry.
862     * @return the ZipArchiveEntry corresponding to the given name - or {@code null} if not present.
863     */
864    public ZipArchiveEntry getEntry(final String name) {
865        final LinkedList<ZipArchiveEntry> entries = nameMap.get(name);
866        return entries != null ? entries.getFirst() : null;
867    }
868
869    /**
870     * Gets the offset of the first local file header in the file.
871     *
872     * @return the length of the content before the first local file header
873     * @since 1.23
874     */
875    public long getFirstLocalFileHeaderOffset() {
876        return firstLocalFileHeaderOffset;
877    }
878
879    /**
880     * Gets an InputStream for reading the contents of the given entry.
881     *
882     * @param entry the entry to get the stream for.
883     * @return a stream to read the entry from. The returned stream implements {@link InputStreamStatistics}.
884     * @throws IOException if unable to create an input stream from the zipEntry.
885     */
886    public InputStream getInputStream(final ZipArchiveEntry entry) throws IOException {
887        if (!(entry instanceof Entry)) {
888            return null;
889        }
890        // cast validity is checked just above
891        ZipUtil.checkRequestedFeatures(entry);
892
893        // doesn't get closed if the method is not supported - which
894        // should never happen because of the checkRequestedFeatures
895        // call above
896        final InputStream is = new BufferedInputStream(getRawInputStream(entry)); // NOSONAR
897        switch (ZipMethod.getMethodByCode(entry.getMethod())) {
898        case STORED:
899            return new StoredStatisticsStream(is);
900        case UNSHRINKING:
901            return new UnshrinkingInputStream(is);
902        case IMPLODING:
903            try {
904                return new ExplodingInputStream(entry.getGeneralPurposeBit().getSlidingDictionarySize(),
905                        entry.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), is);
906            } catch (final IllegalArgumentException ex) {
907                throw new IOException("bad IMPLODE data", ex);
908            }
909        case DEFLATED:
910            final Inflater inflater = new Inflater(true);
911            // Inflater with nowrap=true has this odd contract for a zero padding
912            // byte following the data stream; this used to be zlib's requirement
913            // and has been fixed a long time ago, but the contract persists so
914            // we comply.
915            // https://docs.oracle.com/javase/8/docs/api/java/util/zip/Inflater.html#Inflater(boolean)
916            return new InflaterInputStreamWithStatistics(new SequenceInputStream(is, new ByteArrayInputStream(ONE_ZERO_BYTE)), inflater) {
917                @Override
918                public void close() throws IOException {
919                    try {
920                        super.close();
921                    } finally {
922                        inflater.end();
923                    }
924                }
925            };
926        case BZIP2:
927            return new BZip2CompressorInputStream(is);
928        case ENHANCED_DEFLATED:
929            return new Deflate64CompressorInputStream(is);
930        case AES_ENCRYPTED:
931        case EXPANDING_LEVEL_1:
932        case EXPANDING_LEVEL_2:
933        case EXPANDING_LEVEL_3:
934        case EXPANDING_LEVEL_4:
935        case JPEG:
936        case LZMA:
937        case PKWARE_IMPLODING:
938        case PPMD:
939        case TOKENIZATION:
940        case UNKNOWN:
941        case WAVPACK:
942        case XZ:
943        default:
944            throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(entry.getMethod()), entry);
945        }
946    }
947
948    /**
949     * Gets the raw stream of the archive entry (compressed form).
950     *
951     * <p>
952     * This method does not relate to how/if we understand the payload in the stream, since we really only intend to move it on to somewhere else.
953     * </p>
954     *
955     * <p>
956     * Since version 1.22, this method will make an attempt to read the entry's data stream offset, even if the {@code ignoreLocalFileHeader} parameter was
957     * {@code true} in the constructor. An IOException can also be thrown from the body of the method if this lookup fails for some reason.
958     * </p>
959     *
960     * @param entry The entry to get the stream for
961     * @return The raw input stream containing (possibly) compressed data.
962     * @since 1.11
963     * @throws IOException if there is a problem reading data offset (added in version 1.22).
964     */
965    public InputStream getRawInputStream(final ZipArchiveEntry entry) throws IOException {
966        if (!(entry instanceof Entry)) {
967            return null;
968        }
969        final long start = getDataOffset(entry);
970        if (start == EntryStreamOffsets.OFFSET_UNKNOWN) {
971            return null;
972        }
973        return createBoundedInputStream(start, entry.getCompressedSize());
974    }
975
976    /**
977     * Gets the entry's content as a String if isUnixSymlink() returns true for it, otherwise returns null.
978     * <p>
979     * This method assumes the symbolic link's file name uses the same encoding that as been specified for this ZipFile.
980     * </p>
981     *
982     * @param entry ZipArchiveEntry object that represents the symbolic link
983     * @return entry's content as a String
984     * @throws IOException problem with content's input stream
985     * @since 1.5
986     */
987    public String getUnixSymlink(final ZipArchiveEntry entry) throws IOException {
988        if (entry != null && entry.isUnixSymlink()) {
989            try (InputStream in = getInputStream(entry)) {
990                return zipEncoding.decode(IOUtils.toByteArray(in));
991            }
992        }
993        return null;
994    }
995
996    /**
997     * Reads the central directory of the given archive and populates the internal tables with ZipArchiveEntry instances.
998     *
999     * <p>
1000     * The ZipArchiveEntrys will know all data that can be obtained from the central directory alone, but not the data that requires the local file header or
1001     * additional data to be read.
1002     * </p>
1003     *
1004     * @return a map of zip entries that didn't have the language encoding flag set when read.
1005     */
1006    private Map<ZipArchiveEntry, NameAndComment> populateFromCentralDirectory() throws IOException {
1007        final HashMap<ZipArchiveEntry, NameAndComment> noUTF8Flag = new HashMap<>();
1008
1009        positionAtCentralDirectory();
1010        centralDirectoryStartOffset = archive.position();
1011
1012        wordBbuf.rewind();
1013        IOUtils.readFully(archive, wordBbuf);
1014        long sig = ZipLong.getValue(wordBuf);
1015
1016        if (sig != CFH_SIG && startsWithLocalFileHeader()) {
1017            throw new IOException("Central directory is empty, can't expand" + " corrupt archive.");
1018        }
1019
1020        while (sig == CFH_SIG) {
1021            readCentralDirectoryEntry(noUTF8Flag);
1022            wordBbuf.rewind();
1023            IOUtils.readFully(archive, wordBbuf);
1024            sig = ZipLong.getValue(wordBuf);
1025        }
1026        return noUTF8Flag;
1027    }
1028
1029    /**
1030     * Searches for either the &quot;Zip64 end of central directory locator&quot; or the &quot;End of central dir record&quot;, parses it and positions the
1031     * stream at the first central directory record.
1032     */
1033    private void positionAtCentralDirectory() throws IOException {
1034        positionAtEndOfCentralDirectoryRecord();
1035        boolean found = false;
1036        final boolean searchedForZip64EOCD = archive.position() > ZIP64_EOCDL_LENGTH;
1037        if (searchedForZip64EOCD) {
1038            archive.position(archive.position() - ZIP64_EOCDL_LENGTH);
1039            wordBbuf.rewind();
1040            IOUtils.readFully(archive, wordBbuf);
1041            found = Arrays.equals(ZipArchiveOutputStream.ZIP64_EOCD_LOC_SIG, wordBuf);
1042        }
1043        if (!found) {
1044            // not a ZIP64 archive
1045            if (searchedForZip64EOCD) {
1046                skipBytes(ZIP64_EOCDL_LENGTH - ZipConstants.WORD);
1047            }
1048            positionAtCentralDirectory32();
1049        } else {
1050            positionAtCentralDirectory64();
1051        }
1052    }
1053
1054    /**
1055     * Parses the &quot;End of central dir record&quot; and positions the stream at the first central directory record.
1056     *
1057     * Expects stream to be positioned at the beginning of the &quot;End of central dir record&quot;.
1058     */
1059    private void positionAtCentralDirectory32() throws IOException {
1060        final long endOfCentralDirectoryRecordOffset = archive.position();
1061        if (isSplitZipArchive) {
1062            skipBytes(CFD_DISK_OFFSET);
1063            shortBbuf.rewind();
1064            IOUtils.readFully(archive, shortBbuf);
1065            centralDirectoryStartDiskNumber = ZipShort.getValue(shortBuf);
1066
1067            skipBytes(CFD_LOCATOR_RELATIVE_OFFSET);
1068
1069            wordBbuf.rewind();
1070            IOUtils.readFully(archive, wordBbuf);
1071            centralDirectoryStartRelativeOffset = ZipLong.getValue(wordBuf);
1072            ((ZipSplitReadOnlySeekableByteChannel) archive).position(centralDirectoryStartDiskNumber, centralDirectoryStartRelativeOffset);
1073        } else {
1074            skipBytes(CFD_LENGTH_OFFSET);
1075            wordBbuf.rewind();
1076            IOUtils.readFully(archive, wordBbuf);
1077            final long centralDirectoryLength = ZipLong.getValue(wordBuf);
1078
1079            wordBbuf.rewind();
1080            IOUtils.readFully(archive, wordBbuf);
1081            centralDirectoryStartDiskNumber = 0;
1082            centralDirectoryStartRelativeOffset = ZipLong.getValue(wordBuf);
1083
1084            firstLocalFileHeaderOffset = Long.max(endOfCentralDirectoryRecordOffset - centralDirectoryLength - centralDirectoryStartRelativeOffset, 0L);
1085            archive.position(centralDirectoryStartRelativeOffset + firstLocalFileHeaderOffset);
1086        }
1087    }
1088
1089    /**
1090     * Parses the &quot;Zip64 end of central directory locator&quot;, finds the &quot;Zip64 end of central directory record&quot; using the parsed information,
1091     * parses that and positions the stream at the first central directory record.
1092     *
1093     * Expects stream to be positioned right behind the &quot;Zip64 end of central directory locator&quot;'s signature.
1094     */
1095    private void positionAtCentralDirectory64() throws IOException {
1096        if (isSplitZipArchive) {
1097            wordBbuf.rewind();
1098            IOUtils.readFully(archive, wordBbuf);
1099            final long diskNumberOfEOCD = ZipLong.getValue(wordBuf);
1100
1101            dwordBbuf.rewind();
1102            IOUtils.readFully(archive, dwordBbuf);
1103            final long relativeOffsetOfEOCD = ZipEightByteInteger.getLongValue(dwordBuf);
1104            ((ZipSplitReadOnlySeekableByteChannel) archive).position(diskNumberOfEOCD, relativeOffsetOfEOCD);
1105        } else {
1106            skipBytes(ZIP64_EOCDL_LOCATOR_OFFSET - ZipConstants.WORD /* signature has already been read */);
1107            dwordBbuf.rewind();
1108            IOUtils.readFully(archive, dwordBbuf);
1109            archive.position(ZipEightByteInteger.getLongValue(dwordBuf));
1110        }
1111
1112        wordBbuf.rewind();
1113        IOUtils.readFully(archive, wordBbuf);
1114        if (!Arrays.equals(wordBuf, ZipArchiveOutputStream.ZIP64_EOCD_SIG)) {
1115            throw new ZipException("Archive's ZIP64 end of central directory locator is corrupt.");
1116        }
1117
1118        if (isSplitZipArchive) {
1119            skipBytes(ZIP64_EOCD_CFD_DISK_OFFSET - ZipConstants.WORD /* signature has already been read */);
1120            wordBbuf.rewind();
1121            IOUtils.readFully(archive, wordBbuf);
1122            centralDirectoryStartDiskNumber = ZipLong.getValue(wordBuf);
1123
1124            skipBytes(ZIP64_EOCD_CFD_LOCATOR_RELATIVE_OFFSET);
1125
1126            dwordBbuf.rewind();
1127            IOUtils.readFully(archive, dwordBbuf);
1128            centralDirectoryStartRelativeOffset = ZipEightByteInteger.getLongValue(dwordBuf);
1129            ((ZipSplitReadOnlySeekableByteChannel) archive).position(centralDirectoryStartDiskNumber, centralDirectoryStartRelativeOffset);
1130        } else {
1131            skipBytes(ZIP64_EOCD_CFD_LOCATOR_OFFSET - ZipConstants.WORD /* signature has already been read */);
1132            dwordBbuf.rewind();
1133            IOUtils.readFully(archive, dwordBbuf);
1134            centralDirectoryStartDiskNumber = 0;
1135            centralDirectoryStartRelativeOffset = ZipEightByteInteger.getLongValue(dwordBuf);
1136            archive.position(centralDirectoryStartRelativeOffset);
1137        }
1138    }
1139
1140    /**
1141     * Searches for the and positions the stream at the start of the &quot;End of central dir record&quot;.
1142     */
1143    private void positionAtEndOfCentralDirectoryRecord() throws IOException {
1144        final boolean found = tryToLocateSignature(MIN_EOCD_SIZE, MAX_EOCD_SIZE, ZipArchiveOutputStream.EOCD_SIG);
1145        if (!found) {
1146            throw new ZipException("Archive is not a ZIP archive");
1147        }
1148    }
1149
1150    /**
1151     * Reads an individual entry of the central directory, creates an ZipArchiveEntry from it and adds it to the global maps.
1152     *
1153     * @param noUTF8Flag map used to collect entries that don't have their UTF-8 flag set and whose name will be set by data read from the local file header
1154     *                   later. The current entry may be added to this map.
1155     */
1156    private void readCentralDirectoryEntry(final Map<ZipArchiveEntry, NameAndComment> noUTF8Flag) throws IOException {
1157        cfhBbuf.rewind();
1158        IOUtils.readFully(archive, cfhBbuf);
1159        int off = 0;
1160        final Entry ze = new Entry();
1161
1162        final int versionMadeBy = ZipShort.getValue(cfhBuf, off);
1163        off += ZipConstants.SHORT;
1164        ze.setVersionMadeBy(versionMadeBy);
1165        ze.setPlatform(versionMadeBy >> BYTE_SHIFT & NIBLET_MASK);
1166
1167        ze.setVersionRequired(ZipShort.getValue(cfhBuf, off));
1168        off += ZipConstants.SHORT; // version required
1169
1170        final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(cfhBuf, off);
1171        final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames();
1172        final ZipEncoding entryEncoding = hasUTF8Flag ? ZipEncodingHelper.ZIP_ENCODING_UTF_8 : zipEncoding;
1173        if (hasUTF8Flag) {
1174            ze.setNameSource(ZipArchiveEntry.NameSource.NAME_WITH_EFS_FLAG);
1175        }
1176        ze.setGeneralPurposeBit(gpFlag);
1177        ze.setRawFlag(ZipShort.getValue(cfhBuf, off));
1178
1179        off += ZipConstants.SHORT;
1180
1181        // noinspection MagicConstant
1182        ze.setMethod(ZipShort.getValue(cfhBuf, off));
1183        off += ZipConstants.SHORT;
1184
1185        final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(cfhBuf, off));
1186        ze.setTime(time);
1187        off += ZipConstants.WORD;
1188
1189        ze.setCrc(ZipLong.getValue(cfhBuf, off));
1190        off += ZipConstants.WORD;
1191
1192        long size = ZipLong.getValue(cfhBuf, off);
1193        if (size < 0) {
1194            throw new IOException("broken archive, entry with negative compressed size");
1195        }
1196        ze.setCompressedSize(size);
1197        off += ZipConstants.WORD;
1198
1199        size = ZipLong.getValue(cfhBuf, off);
1200        if (size < 0) {
1201            throw new IOException("broken archive, entry with negative size");
1202        }
1203        ze.setSize(size);
1204        off += ZipConstants.WORD;
1205
1206        final int fileNameLen = ZipShort.getValue(cfhBuf, off);
1207        off += ZipConstants.SHORT;
1208        if (fileNameLen < 0) {
1209            throw new IOException("broken archive, entry with negative fileNameLen");
1210        }
1211
1212        final int extraLen = ZipShort.getValue(cfhBuf, off);
1213        off += ZipConstants.SHORT;
1214        if (extraLen < 0) {
1215            throw new IOException("broken archive, entry with negative extraLen");
1216        }
1217
1218        final int commentLen = ZipShort.getValue(cfhBuf, off);
1219        off += ZipConstants.SHORT;
1220        if (commentLen < 0) {
1221            throw new IOException("broken archive, entry with negative commentLen");
1222        }
1223
1224        ze.setDiskNumberStart(ZipShort.getValue(cfhBuf, off));
1225        off += ZipConstants.SHORT;
1226
1227        ze.setInternalAttributes(ZipShort.getValue(cfhBuf, off));
1228        off += ZipConstants.SHORT;
1229
1230        ze.setExternalAttributes(ZipLong.getValue(cfhBuf, off));
1231        off += ZipConstants.WORD;
1232
1233        final byte[] fileName = IOUtils.readRange(archive, fileNameLen);
1234        if (fileName.length < fileNameLen) {
1235            throw new EOFException();
1236        }
1237        ze.setName(entryEncoding.decode(fileName), fileName);
1238
1239        // LFH offset,
1240        ze.setLocalHeaderOffset(ZipLong.getValue(cfhBuf, off) + firstLocalFileHeaderOffset);
1241        // data offset will be filled later
1242        entries.add(ze);
1243
1244        final byte[] cdExtraData = IOUtils.readRange(archive, extraLen);
1245        if (cdExtraData.length < extraLen) {
1246            throw new EOFException();
1247        }
1248        try {
1249            ze.setCentralDirectoryExtra(cdExtraData);
1250        } catch (final RuntimeException ex) {
1251            final ZipException z = new ZipException("Invalid extra data in entry " + ze.getName());
1252            z.initCause(ex);
1253            throw z;
1254        }
1255
1256        setSizesAndOffsetFromZip64Extra(ze);
1257        sanityCheckLFHOffset(ze);
1258
1259        final byte[] comment = IOUtils.readRange(archive, commentLen);
1260        if (comment.length < commentLen) {
1261            throw new EOFException();
1262        }
1263        ze.setComment(entryEncoding.decode(comment));
1264
1265        if (!hasUTF8Flag && useUnicodeExtraFields) {
1266            noUTF8Flag.put(ze, new NameAndComment(fileName, comment));
1267        }
1268
1269        ze.setStreamContiguous(true);
1270    }
1271
1272    /**
1273     * Walks through all recorded entries and adds the data available from the local file header.
1274     *
1275     * <p>
1276     * Also records the offsets for the data to read from the entries.
1277     * </p>
1278     */
1279    private void resolveLocalFileHeaderData(final Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag) throws IOException {
1280        for (final ZipArchiveEntry zipArchiveEntry : entries) {
1281            // entries are filled in populateFromCentralDirectory and never modified
1282            final Entry ze = (Entry) zipArchiveEntry;
1283            final int[] lens = setDataOffset(ze);
1284            final int fileNameLen = lens[0];
1285            final int extraFieldLen = lens[1];
1286            skipBytes(fileNameLen);
1287            final byte[] localExtraData = IOUtils.readRange(archive, extraFieldLen);
1288            if (localExtraData.length < extraFieldLen) {
1289                throw new EOFException();
1290            }
1291            try {
1292                ze.setExtra(localExtraData);
1293            } catch (final RuntimeException ex) {
1294                final ZipException z = new ZipException("Invalid extra data in entry " + ze.getName());
1295                z.initCause(ex);
1296                throw z;
1297            }
1298
1299            if (entriesWithoutUTF8Flag.containsKey(ze)) {
1300                final NameAndComment nc = entriesWithoutUTF8Flag.get(ze);
1301                ZipUtil.setNameAndCommentFromExtraFields(ze, nc.name, nc.comment);
1302            }
1303        }
1304    }
1305
1306    private void sanityCheckLFHOffset(final ZipArchiveEntry entry) throws IOException {
1307        if (entry.getDiskNumberStart() < 0) {
1308            throw new IOException("broken archive, entry with negative disk number");
1309        }
1310        if (entry.getLocalHeaderOffset() < 0) {
1311            throw new IOException("broken archive, entry with negative local file header offset");
1312        }
1313        if (isSplitZipArchive) {
1314            if (entry.getDiskNumberStart() > centralDirectoryStartDiskNumber) {
1315                throw new IOException("local file header for " + entry.getName() + " starts on a later disk than central directory");
1316            }
1317            if (entry.getDiskNumberStart() == centralDirectoryStartDiskNumber && entry.getLocalHeaderOffset() > centralDirectoryStartRelativeOffset) {
1318                throw new IOException("local file header for " + entry.getName() + " starts after central directory");
1319            }
1320        } else if (entry.getLocalHeaderOffset() > centralDirectoryStartOffset) {
1321            throw new IOException("local file header for " + entry.getName() + " starts after central directory");
1322        }
1323    }
1324
1325    private int[] setDataOffset(final ZipArchiveEntry entry) throws IOException {
1326        long offset = entry.getLocalHeaderOffset();
1327        if (isSplitZipArchive) {
1328            ((ZipSplitReadOnlySeekableByteChannel) archive).position(entry.getDiskNumberStart(), offset + LFH_OFFSET_FOR_FILENAME_LENGTH);
1329            // the offset should be updated to the global offset
1330            offset = archive.position() - LFH_OFFSET_FOR_FILENAME_LENGTH;
1331        } else {
1332            archive.position(offset + LFH_OFFSET_FOR_FILENAME_LENGTH);
1333        }
1334        wordBbuf.rewind();
1335        IOUtils.readFully(archive, wordBbuf);
1336        wordBbuf.flip();
1337        wordBbuf.get(shortBuf);
1338        final int fileNameLen = ZipShort.getValue(shortBuf);
1339        wordBbuf.get(shortBuf);
1340        final int extraFieldLen = ZipShort.getValue(shortBuf);
1341        entry.setDataOffset(offset + LFH_OFFSET_FOR_FILENAME_LENGTH + ZipConstants.SHORT + ZipConstants.SHORT + fileNameLen + extraFieldLen);
1342        if (entry.getDataOffset() + entry.getCompressedSize() > centralDirectoryStartOffset) {
1343            throw new IOException("data for " + entry.getName() + " overlaps with central directory.");
1344        }
1345        return new int[] { fileNameLen, extraFieldLen };
1346    }
1347
1348    /**
1349     * If the entry holds a Zip64 extended information extra field, read sizes from there if the entry's sizes are set to 0xFFFFFFFFF, do the same for the
1350     * offset of the local file header.
1351     *
1352     * <p>
1353     * Ensures the Zip64 extra either knows both compressed and uncompressed size or neither of both as the internal logic in ExtraFieldUtils forces the field
1354     * to create local header data even if they are never used - and here a field with only one size would be invalid.
1355     * </p>
1356     */
1357    private void setSizesAndOffsetFromZip64Extra(final ZipArchiveEntry entry) throws IOException {
1358        final ZipExtraField extra = entry.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID);
1359        if (extra != null && !(extra instanceof Zip64ExtendedInformationExtraField)) {
1360            throw new ZipException("archive contains unparseable zip64 extra field");
1361        }
1362        final Zip64ExtendedInformationExtraField z64 = (Zip64ExtendedInformationExtraField) extra;
1363        if (z64 != null) {
1364            final boolean hasUncompressedSize = entry.getSize() == ZipConstants.ZIP64_MAGIC;
1365            final boolean hasCompressedSize = entry.getCompressedSize() == ZipConstants.ZIP64_MAGIC;
1366            final boolean hasRelativeHeaderOffset = entry.getLocalHeaderOffset() == ZipConstants.ZIP64_MAGIC;
1367            final boolean hasDiskStart = entry.getDiskNumberStart() == ZipConstants.ZIP64_MAGIC_SHORT;
1368            z64.reparseCentralDirectoryData(hasUncompressedSize, hasCompressedSize, hasRelativeHeaderOffset, hasDiskStart);
1369
1370            if (hasUncompressedSize) {
1371                final long size = z64.getSize().getLongValue();
1372                if (size < 0) {
1373                    throw new IOException("broken archive, entry with negative size");
1374                }
1375                entry.setSize(size);
1376            } else if (hasCompressedSize) {
1377                z64.setSize(new ZipEightByteInteger(entry.getSize()));
1378            }
1379
1380            if (hasCompressedSize) {
1381                final long size = z64.getCompressedSize().getLongValue();
1382                if (size < 0) {
1383                    throw new IOException("broken archive, entry with negative compressed size");
1384                }
1385                entry.setCompressedSize(size);
1386            } else if (hasUncompressedSize) {
1387                z64.setCompressedSize(new ZipEightByteInteger(entry.getCompressedSize()));
1388            }
1389
1390            if (hasRelativeHeaderOffset) {
1391                entry.setLocalHeaderOffset(z64.getRelativeHeaderOffset().getLongValue());
1392            }
1393
1394            if (hasDiskStart) {
1395                entry.setDiskNumberStart(z64.getDiskStartNumber().getValue());
1396            }
1397        }
1398    }
1399
1400    /**
1401     * Skips the given number of bytes or throws an EOFException if skipping failed.
1402     */
1403    private void skipBytes(final int count) throws IOException {
1404        final long currentPosition = archive.position();
1405        final long newPosition = currentPosition + count;
1406        if (newPosition > archive.size()) {
1407            throw new EOFException();
1408        }
1409        archive.position(newPosition);
1410    }
1411
1412    /**
1413     * Sorts entries in place by offset.
1414     *
1415     * @param allEntries entries to sort
1416     * @return the given entries, sorted.
1417     */
1418    private ZipArchiveEntry[] sortByOffset(final ZipArchiveEntry[] allEntries) {
1419        Arrays.sort(allEntries, offsetComparator);
1420        return allEntries;
1421    }
1422
1423    /**
1424     * Checks whether the archive starts with an LFH. If it doesn't, it may be an empty archive.
1425     */
1426    private boolean startsWithLocalFileHeader() throws IOException {
1427        archive.position(firstLocalFileHeaderOffset);
1428        wordBbuf.rewind();
1429        IOUtils.readFully(archive, wordBbuf);
1430        return Arrays.equals(wordBuf, ZipArchiveOutputStream.LFH_SIG);
1431    }
1432
1433    /**
1434     * Searches the archive backwards from minDistance to maxDistance for the given signature, positions the RandomaccessFile right at the signature if it has
1435     * been found.
1436     */
1437    private boolean tryToLocateSignature(final long minDistanceFromEnd, final long maxDistanceFromEnd, final byte[] sig) throws IOException {
1438        boolean found = false;
1439        long off = archive.size() - minDistanceFromEnd;
1440        final long stopSearching = Math.max(0L, archive.size() - maxDistanceFromEnd);
1441        if (off >= 0) {
1442            for (; off >= stopSearching; off--) {
1443                archive.position(off);
1444                try {
1445                    wordBbuf.rewind();
1446                    IOUtils.readFully(archive, wordBbuf);
1447                    wordBbuf.flip();
1448                } catch (final EOFException ex) { // NOSONAR
1449                    break;
1450                }
1451                int curr = wordBbuf.get();
1452                if (curr == sig[POS_0]) {
1453                    curr = wordBbuf.get();
1454                    if (curr == sig[POS_1]) {
1455                        curr = wordBbuf.get();
1456                        if (curr == sig[POS_2]) {
1457                            curr = wordBbuf.get();
1458                            if (curr == sig[POS_3]) {
1459                                found = true;
1460                                break;
1461                            }
1462                        }
1463                    }
1464                }
1465            }
1466        }
1467        if (found) {
1468            archive.position(off);
1469        }
1470        return found;
1471    }
1472}