001/*
002 *  Licensed to the Apache Software Foundation (ASF) under one or more
003 *  contributor license agreements.  See the NOTICE file distributed with
004 *  this work for additional information regarding copyright ownership.
005 *  The ASF licenses this file to You under the Apache License, Version 2.0
006 *  (the "License"); you may not use this file except in compliance with
007 *  the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 *  Unless required by applicable law or agreed to in writing, software
012 *  distributed under the License is distributed on an "AS IS" BASIS,
013 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 *  See the License for the specific language governing permissions and
015 *  limitations under the License.
016 */
017
018/*
019 * This package is based on the work done by Timothy Gerard Endres
020 * (time@ice.com) to whom the Ant project is very grateful for his great code.
021 */
022
023package org.apache.commons.compress.archivers.tar;
024
025import java.io.ByteArrayOutputStream;
026import java.io.FileInputStream;
027import java.io.IOException;
028import java.io.InputStream;
029import java.util.ArrayList;
030import java.util.Arrays;
031import java.util.HashMap;
032import java.util.List;
033import java.util.Map;
034
035import org.apache.commons.compress.archivers.ArchiveEntry;
036import org.apache.commons.compress.archivers.ArchiveInputStream;
037import org.apache.commons.compress.archivers.zip.ZipEncoding;
038import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
039import org.apache.commons.compress.utils.ArchiveUtils;
040import org.apache.commons.compress.utils.BoundedInputStream;
041import org.apache.commons.compress.utils.IOUtils;
042
043/**
044 * The TarInputStream reads a UNIX tar archive as an InputStream.
045 * methods are provided to position at each successive entry in
046 * the archive, and the read each entry as a normal input stream
047 * using read().
048 * @NotThreadSafe
049 */
050public class TarArchiveInputStream extends ArchiveInputStream<TarArchiveEntry> {
051
052    private static final int SMALL_BUFFER_SIZE = 256;
053
054    /**
055     * Checks if the signature matches what is expected for a tar file.
056     *
057     * @param signature
058     *            the bytes to check
059     * @param length
060     *            the number of bytes to check
061     * @return true, if this stream is a tar archive stream, false otherwise
062     */
063    public static boolean matches(final byte[] signature, final int length) {
064        if (length < TarConstants.VERSION_OFFSET+TarConstants.VERSIONLEN) {
065            return false;
066        }
067
068        if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_POSIX,
069                signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN)
070            &&
071            ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_POSIX,
072                signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN)
073                ){
074            return true;
075        }
076        if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_GNU,
077                signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN)
078            &&
079            (
080             ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_GNU_SPACE,
081                signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN)
082            ||
083            ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_GNU_ZERO,
084                signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN)
085            )
086                ){
087            return true;
088        }
089        // COMPRESS-107 - recognize Ant tar files
090        return ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_ANT,
091                signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN)
092                &&
093                ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_ANT,
094                        signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN);
095    }
096
097    private final byte[] smallBuf = new byte[SMALL_BUFFER_SIZE];
098
099    /** The size the TAR header */
100    private final int recordSize;
101
102    /** The buffer to store the TAR header **/
103    private final byte[] recordBuffer;
104
105    /** The size of a block */
106    private final int blockSize;
107
108    /** True if file has hit EOF */
109    private boolean hasHitEOF;
110
111    /** Size of the current entry */
112    private long entrySize;
113
114    /** How far into the entry the stream is at */
115    private long entryOffset;
116
117    /** An input stream to read from */
118    private final InputStream inputStream;
119
120    /** Input streams for reading sparse entries **/
121    private List<InputStream> sparseInputStreams;
122
123    /** the index of current input stream being read when reading sparse entries */
124    private int currentSparseInputStreamIndex;
125
126    /** The meta-data about the current entry */
127    private TarArchiveEntry currEntry;
128
129    /** The encoding of the file */
130    private final ZipEncoding zipEncoding;
131
132    // the provided encoding (for unit tests)
133    final String encoding;
134
135    // the global PAX header
136    private Map<String, String> globalPaxHeaders = new HashMap<>();
137
138    // the global sparse headers, this is only used in PAX Format 0.X
139    private final List<TarArchiveStructSparse> globalSparseHeaders = new ArrayList<>();
140
141    private final boolean lenient;
142
143    /**
144     * Constructor for TarInputStream.
145     * @param is the input stream to use
146     */
147    public TarArchiveInputStream(final InputStream is) {
148        this(is, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE);
149    }
150
151    /**
152     * Constructor for TarInputStream.
153     * @param is the input stream to use
154     * @param lenient when set to true illegal values for group/userid, mode, device numbers and timestamp will be
155     * ignored and the fields set to {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an
156     * exception instead.
157     * @since 1.19
158     */
159    public TarArchiveInputStream(final InputStream is, final boolean lenient) {
160        this(is, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, lenient);
161    }
162
163    /**
164     * Constructor for TarInputStream.
165     * @param is the input stream to use
166     * @param blockSize the block size to use
167     */
168    public TarArchiveInputStream(final InputStream is, final int blockSize) {
169        this(is, blockSize, TarConstants.DEFAULT_RCDSIZE);
170    }
171
172    /**
173     * Constructor for TarInputStream.
174     * @param is the input stream to use
175     * @param blockSize the block size to use
176     * @param recordSize the record size to use
177     */
178    public TarArchiveInputStream(final InputStream is, final int blockSize, final int recordSize) {
179        this(is, blockSize, recordSize, null);
180    }
181
182    /**
183     * Constructor for TarInputStream.
184     * @param is the input stream to use
185     * @param blockSize the block size to use
186     * @param recordSize the record size to use
187     * @param encoding name of the encoding to use for file names
188     * @since 1.4
189     */
190    public TarArchiveInputStream(final InputStream is, final int blockSize, final int recordSize,
191                                 final String encoding) {
192        this(is, blockSize, recordSize, encoding, false);
193    }
194
195    /**
196     * Constructor for TarInputStream.
197     * @param is the input stream to use
198     * @param blockSize the block size to use
199     * @param recordSize the record size to use
200     * @param encoding name of the encoding to use for file names
201     * @param lenient when set to true illegal values for group/userid, mode, device numbers and timestamp will be
202     * ignored and the fields set to {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an
203     * exception instead.
204     * @since 1.19
205     */
206    public TarArchiveInputStream(final InputStream is, final int blockSize, final int recordSize,
207                                 final String encoding, final boolean lenient) {
208        this.inputStream = is;
209        this.hasHitEOF = false;
210        this.encoding = encoding;
211        this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
212        this.recordSize = recordSize;
213        this.recordBuffer = new byte[recordSize];
214        this.blockSize = blockSize;
215        this.lenient = lenient;
216    }
217
218    /**
219     * Constructor for TarInputStream.
220     * @param is the input stream to use
221     * @param blockSize the block size to use
222     * @param encoding name of the encoding to use for file names
223     * @since 1.4
224     */
225    public TarArchiveInputStream(final InputStream is, final int blockSize,
226                                 final String encoding) {
227        this(is, blockSize, TarConstants.DEFAULT_RCDSIZE, encoding);
228    }
229
230    /**
231     * Constructor for TarInputStream.
232     * @param is the input stream to use
233     * @param encoding name of the encoding to use for file names
234     * @since 1.4
235     */
236    public TarArchiveInputStream(final InputStream is, final String encoding) {
237        this(is, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE,
238             encoding);
239    }
240
241    private void applyPaxHeadersToCurrentEntry(final Map<String, String> headers, final List<TarArchiveStructSparse> sparseHeaders)
242        throws IOException {
243        currEntry.updateEntryFromPaxHeaders(headers);
244        currEntry.setSparseHeaders(sparseHeaders);
245    }
246
247    /**
248     * Gets the available data that can be read from the current
249     * entry in the archive. This does not indicate how much data
250     * is left in the entire archive, only in the current entry.
251     * This value is determined from the entry's size header field
252     * and the amount of data already read from the current entry.
253     * Integer.MAX_VALUE is returned in case more than Integer.MAX_VALUE
254     * bytes are left in the current entry in the archive.
255     *
256     * @return The number of available bytes for the current entry.
257     * @throws IOException for signature
258     */
259    @Override
260    public int available() throws IOException {
261        if (isDirectory()) {
262            return 0;
263        }
264
265        if (currEntry.getRealSize() - entryOffset > Integer.MAX_VALUE) {
266            return Integer.MAX_VALUE;
267        }
268        return (int) (currEntry.getRealSize() - entryOffset);
269    }
270
271
272    /**
273     * Build the input streams consisting of all-zero input streams and non-zero input streams.
274     * When reading from the non-zero input streams, the data is actually read from the original input stream.
275     * The size of each input stream is introduced by the sparse headers.
276     *
277     * NOTE : Some all-zero input streams and non-zero input streams have the size of 0. We DO NOT store the
278     *        0 size input streams because they are meaningless.
279     */
280    private void buildSparseInputStreams() throws IOException {
281        currentSparseInputStreamIndex = -1;
282        sparseInputStreams = new ArrayList<>();
283
284        final List<TarArchiveStructSparse> sparseHeaders = currEntry.getOrderedSparseHeaders();
285
286        // Stream doesn't need to be closed at all as it doesn't use any resources
287        final InputStream zeroInputStream = new TarArchiveSparseZeroInputStream(); //NOSONAR
288        // logical offset into the extracted entry
289        long offset = 0;
290        for (final TarArchiveStructSparse sparseHeader : sparseHeaders) {
291            final long zeroBlockSize = sparseHeader.getOffset() - offset;
292            if (zeroBlockSize < 0) {
293                // sparse header says to move backwards inside the extracted entry
294                throw new IOException("Corrupted struct sparse detected");
295            }
296
297            // only store the zero block if it is not empty
298            if (zeroBlockSize > 0) {
299                sparseInputStreams.add(new BoundedInputStream(zeroInputStream, sparseHeader.getOffset() - offset));
300            }
301
302            // only store the input streams with non-zero size
303            if (sparseHeader.getNumbytes() > 0) {
304                sparseInputStreams.add(new BoundedInputStream(inputStream, sparseHeader.getNumbytes()));
305            }
306
307            offset = sparseHeader.getOffset() + sparseHeader.getNumbytes();
308        }
309
310        if (!sparseInputStreams.isEmpty()) {
311            currentSparseInputStreamIndex = 0;
312        }
313    }
314
315    /**
316     * Whether this class is able to read the given entry.
317     *
318     * @return The implementation will return true if the {@link ArchiveEntry} is an instance of {@link TarArchiveEntry}
319     */
320    @Override
321    public boolean canReadEntryData(final ArchiveEntry ae) {
322        return ae instanceof TarArchiveEntry;
323    }
324
325    /**
326     * Closes this stream. Calls the TarBuffer's close() method.
327     * @throws IOException on error
328     */
329    @Override
330    public void close() throws IOException {
331        // Close all the input streams in sparseInputStreams
332        if (sparseInputStreams != null) {
333            for (final InputStream inputStream : sparseInputStreams) {
334                inputStream.close();
335            }
336        }
337
338        inputStream.close();
339    }
340
341    /**
342     * This method is invoked once the end of the archive is hit, it
343     * tries to consume the remaining bytes under the assumption that
344     * the tool creating this archive has padded the last block.
345     */
346    private void consumeRemainderOfLastBlock() throws IOException {
347        final long bytesReadOfLastBlock = getBytesRead() % blockSize;
348        if (bytesReadOfLastBlock > 0) {
349            final long skipped = IOUtils.skip(inputStream, blockSize - bytesReadOfLastBlock);
350            count(skipped);
351        }
352    }
353
354    /**
355     * For FileInputStream, the skip always return the number you input, so we
356     * need the available bytes to determine how many bytes are actually skipped
357     *
358     * @param available available bytes returned by inputStream.available()
359     * @param skipped   skipped bytes returned by inputStream.skip()
360     * @param expected  bytes expected to skip
361     * @return number of bytes actually skipped
362     * @throws IOException if a truncated tar archive is detected
363     */
364    private long getActuallySkipped(final long available, final long skipped, final long expected) throws IOException {
365        long actuallySkipped = skipped;
366        if (inputStream instanceof FileInputStream) {
367            actuallySkipped = Math.min(skipped, available);
368        }
369
370        if (actuallySkipped != expected) {
371            throw new IOException("Truncated TAR archive");
372        }
373
374        return actuallySkipped;
375    }
376
377    /**
378     * Gets the current TAR Archive Entry that this input stream is processing
379     *
380     * @return The current Archive Entry
381     */
382    public TarArchiveEntry getCurrentEntry() {
383        return currEntry;
384    }
385
386    /**
387     * Gets the next entry in this tar archive as long name data.
388     *
389     * @return The next entry in the archive as long name data, or null.
390     * @throws IOException on error
391     */
392    protected byte[] getLongNameData() throws IOException {
393        // read in the name
394        final ByteArrayOutputStream longName = new ByteArrayOutputStream();
395        int length = 0;
396        while ((length = read(smallBuf)) >= 0) {
397            longName.write(smallBuf, 0, length);
398        }
399        getNextEntry();
400        if (currEntry == null) {
401            // Bugzilla: 40334
402            // Malformed tar file - long entry name not followed by entry
403            return null;
404        }
405        byte[] longNameData = longName.toByteArray();
406        // remove trailing null terminator(s)
407        length = longNameData.length;
408        while (length > 0 && longNameData[length - 1] == 0) {
409            --length;
410        }
411        if (length != longNameData.length) {
412            longNameData = Arrays.copyOf(longNameData, length);
413        }
414        return longNameData;
415    }
416
417    /**
418     * Returns the next Archive Entry in this Stream.
419     *
420     * @return the next entry,
421     *         or {@code null} if there are no more entries
422     * @throws IOException if the next entry could not be read
423     */
424    @Override
425    public TarArchiveEntry getNextEntry() throws IOException {
426        return getNextTarEntry();
427    }
428
429    /**
430     * Gets the next entry in this tar archive. This will skip
431     * over any remaining data in the current entry, if there
432     * is one, and place the input stream at the header of the
433     * next entry, and read the header and instantiate a new
434     * TarEntry from the header bytes and return that entry.
435     * If there are no more entries in the archive, null will
436     * be returned to indicate that the end of the archive has
437     * been reached.
438     *
439     * @return The next TarEntry in the archive, or null.
440     * @throws IOException on error
441     * @deprecated Use {@link #getNextEntry()}.
442     */
443    @Deprecated
444    public TarArchiveEntry getNextTarEntry() throws IOException {
445        if (isAtEOF()) {
446            return null;
447        }
448
449        if (currEntry != null) {
450            /* Skip will only go to the end of the current entry */
451            IOUtils.skip(this, Long.MAX_VALUE);
452
453            /* skip to the end of the last record */
454            skipRecordPadding();
455        }
456
457        final byte[] headerBuf = getRecord();
458
459        if (headerBuf == null) {
460            /* hit EOF */
461            currEntry = null;
462            return null;
463        }
464
465        try {
466            currEntry = new TarArchiveEntry(globalPaxHeaders, headerBuf, zipEncoding, lenient);
467        } catch (final IllegalArgumentException e) {
468            throw new IOException("Error detected parsing the header", e);
469        }
470
471        entryOffset = 0;
472        entrySize = currEntry.getSize();
473
474        if (currEntry.isGNULongLinkEntry()) {
475            final byte[] longLinkData = getLongNameData();
476            if (longLinkData == null) {
477                // Bugzilla: 40334
478                // Malformed tar file - long link entry name not followed by
479                // entry
480                return null;
481            }
482            currEntry.setLinkName(zipEncoding.decode(longLinkData));
483        }
484
485        if (currEntry.isGNULongNameEntry()) {
486            final byte[] longNameData = getLongNameData();
487            if (longNameData == null) {
488                // Bugzilla: 40334
489                // Malformed tar file - long entry name not followed by
490                // entry
491                return null;
492            }
493
494            // COMPRESS-509 : the name of directories should end with '/'
495            final String name = zipEncoding.decode(longNameData);
496            currEntry.setName(name);
497            if (currEntry.isDirectory() && !name.endsWith("/")) {
498                currEntry.setName(name + "/");
499            }
500        }
501
502        if (currEntry.isGlobalPaxHeader()){ // Process Global Pax headers
503            readGlobalPaxHeaders();
504        }
505
506        try {
507            if (currEntry.isPaxHeader()){ // Process Pax headers
508                paxHeaders();
509            } else if (!globalPaxHeaders.isEmpty()) {
510                applyPaxHeadersToCurrentEntry(globalPaxHeaders, globalSparseHeaders);
511            }
512        } catch (final NumberFormatException e) {
513            throw new IOException("Error detected parsing the pax header", e);
514        }
515
516        if (currEntry.isOldGNUSparse()){ // Process sparse files
517            readOldGNUSparse();
518        }
519
520        // If the size of the next element in the archive has changed
521        // due to a new size being reported in the posix header
522        // information, we update entrySize here so that it contains
523        // the correct value.
524        entrySize = currEntry.getSize();
525
526        return currEntry;
527    }
528
529    /**
530     * Gets the next record in this tar archive. This will skip
531     * over any remaining data in the current entry, if there
532     * is one, and place the input stream at the header of the
533     * next entry.
534     *
535     * <p>If there are no more entries in the archive, null will be
536     * returned to indicate that the end of the archive has been
537     * reached.  At the same time the {@code hasHitEOF} marker will be
538     * set to true.</p>
539     *
540     * @return The next header in the archive, or null.
541     * @throws IOException on error
542     */
543    private byte[] getRecord() throws IOException {
544        byte[] headerBuf = readRecord();
545        setAtEOF(isEOFRecord(headerBuf));
546        if (isAtEOF() && headerBuf != null) {
547            tryToConsumeSecondEOFRecord();
548            consumeRemainderOfLastBlock();
549            headerBuf = null;
550        }
551        return headerBuf;
552    }
553
554    /**
555     * Gets the record size being used by this stream's buffer.
556     *
557     * @return The TarBuffer record size.
558     */
559    public int getRecordSize() {
560        return recordSize;
561    }
562
563    protected final boolean isAtEOF() {
564        return hasHitEOF;
565    }
566
567    private boolean isDirectory() {
568        return currEntry != null && currEntry.isDirectory();
569    }
570
571    /**
572     * Determine if an archive record indicate End of Archive. End of
573     * archive is indicated by a record that consists entirely of null bytes.
574     *
575     * @param record The record data to check.
576     * @return true if the record data is an End of Archive
577     */
578    protected boolean isEOFRecord(final byte[] record) {
579        return record == null || ArchiveUtils.isArrayZero(record, recordSize);
580    }
581
582    /**
583     * Since we do not support marking just yet, we do nothing.
584     *
585     * @param markLimit The limit to mark.
586     */
587    @Override
588    public synchronized void mark(final int markLimit) {
589    }
590
591    /**
592     * Since we do not support marking just yet, we return false.
593     *
594     * @return False.
595     */
596    @Override
597    public boolean markSupported() {
598        return false;
599    }
600
601    /**
602     * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes)
603     * may appear multi times, and they look like:
604     *
605     * GNU.sparse.size=size
606     * GNU.sparse.numblocks=numblocks
607     * repeat numblocks times
608     *   GNU.sparse.offset=offset
609     *   GNU.sparse.numbytes=numbytes
610     * end repeat
611     *
612     *
613     * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map
614     *
615     * GNU.sparse.map
616     *    Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
617     *
618     *
619     * For PAX Format 1.X:
620     * The sparse map itself is stored in the file data block, preceding the actual file data.
621     * It consists of a series of decimal numbers delimited by newlines. The map is padded with nulls to the nearest block boundary.
622     * The first number gives the number of entries in the map. Following are map entries, each one consisting of two numbers
623     * giving the offset and size of the data block it describes.
624     * @throws IOException
625     */
626    private void paxHeaders() throws IOException {
627        List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>();
628        final Map<String, String> headers = TarUtils.parsePaxHeaders(this, sparseHeaders, globalPaxHeaders, entrySize);
629
630        // for 0.1 PAX Headers
631        if (headers.containsKey(TarGnuSparseKeys.MAP)) {
632            sparseHeaders = new ArrayList<>(TarUtils.parseFromPAX01SparseHeaders(headers.get(TarGnuSparseKeys.MAP)));
633        }
634        getNextEntry(); // Get the actual file entry
635        if (currEntry == null) {
636            throw new IOException("premature end of tar archive. Didn't find any entry after PAX header.");
637        }
638        applyPaxHeadersToCurrentEntry(headers, sparseHeaders);
639
640        // for 1.0 PAX Format, the sparse map is stored in the file data block
641        if (currEntry.isPaxGNU1XSparse()) {
642            sparseHeaders = TarUtils.parsePAX1XSparseHeaders(inputStream, recordSize);
643            currEntry.setSparseHeaders(sparseHeaders);
644        }
645
646        // sparse headers are all done reading, we need to build
647        // sparse input streams using these sparse headers
648        buildSparseInputStreams();
649    }
650
651    /**
652     * Reads bytes from the current tar archive entry.
653     *
654     * This method is aware of the boundaries of the current
655     * entry in the archive and will deal with them as if they
656     * were this stream's start and EOF.
657     *
658     * @param buf The buffer into which to place bytes read.
659     * @param offset The offset at which to place bytes read.
660     * @param numToRead The number of bytes to read.
661     * @return The number of bytes read, or -1 at EOF.
662     * @throws IOException on error
663     */
664    @Override
665    public int read(final byte[] buf, final int offset, int numToRead) throws IOException {
666        if (numToRead == 0) {
667            return 0;
668        }
669        int totalRead = 0;
670
671        if (isAtEOF() || isDirectory()) {
672            return -1;
673        }
674
675        if (currEntry == null) {
676            throw new IllegalStateException("No current tar entry");
677        }
678
679        if (entryOffset >= currEntry.getRealSize()) {
680            return -1;
681        }
682
683        numToRead = Math.min(numToRead, available());
684
685        if (currEntry.isSparse()) {
686            // for sparse entries, we need to read them in another way
687            totalRead = readSparse(buf, offset, numToRead);
688        } else {
689            totalRead = inputStream.read(buf, offset, numToRead);
690        }
691
692        if (totalRead == -1) {
693            if (numToRead > 0) {
694                throw new IOException("Truncated TAR archive");
695            }
696            setAtEOF(true);
697        } else {
698            count(totalRead);
699            entryOffset += totalRead;
700        }
701
702        return totalRead;
703    }
704
705    private void readGlobalPaxHeaders() throws IOException {
706        globalPaxHeaders = TarUtils.parsePaxHeaders(this, globalSparseHeaders, globalPaxHeaders, entrySize);
707        getNextEntry(); // Get the actual file entry
708
709        if (currEntry == null) {
710            throw new IOException("Error detected parsing the pax header");
711        }
712    }
713
714    /**
715     * Adds the sparse chunks from the current entry to the sparse chunks,
716     * including any additional sparse entries following the current entry.
717     *
718     * @throws IOException on error
719     */
720    private void readOldGNUSparse() throws IOException {
721        if (currEntry.isExtended()) {
722            TarArchiveSparseEntry entry;
723            do {
724                final byte[] headerBuf = getRecord();
725                if (headerBuf == null) {
726                    throw new IOException("premature end of tar archive. Didn't find extended_header after header with extended flag.");
727                }
728                entry = new TarArchiveSparseEntry(headerBuf);
729                currEntry.getSparseHeaders().addAll(entry.getSparseHeaders());
730            } while (entry.isExtended());
731        }
732
733        // sparse headers are all done reading, we need to build
734        // sparse input streams using these sparse headers
735        buildSparseInputStreams();
736    }
737
738    /**
739     * Read a record from the input stream and return the data.
740     *
741     * @return The record data or null if EOF has been hit.
742     * @throws IOException on error
743     */
744    protected byte[] readRecord() throws IOException {
745        final int readNow = IOUtils.readFully(inputStream, recordBuffer);
746        count(readNow);
747        if (readNow != recordSize) {
748            return null;
749        }
750
751        return recordBuffer;
752    }
753
754    /**
755     * For sparse tar entries, there are many "holes"(consisting of all 0) in the file. Only the non-zero data is
756     * stored in tar files, and they are stored separately. The structure of non-zero data is introduced by the
757     * sparse headers using the offset, where a block of non-zero data starts, and numbytes, the length of the
758     * non-zero data block.
759     * When reading sparse entries, the actual data is read out with "holes" and non-zero data combined together
760     * according to the sparse headers.
761     *
762     * @param buf The buffer into which to place bytes read.
763     * @param offset The offset at which to place bytes read.
764     * @param numToRead The number of bytes to read.
765     * @return The number of bytes read, or -1 at EOF.
766     * @throws IOException on error
767     */
768    private int readSparse(final byte[] buf, final int offset, final int numToRead) throws IOException {
769        // if there are no actual input streams, just read from the original input stream
770        if (sparseInputStreams == null || sparseInputStreams.isEmpty()) {
771            return inputStream.read(buf, offset, numToRead);
772        }
773
774        if (currentSparseInputStreamIndex >= sparseInputStreams.size()) {
775            return -1;
776        }
777
778        final InputStream currentInputStream = sparseInputStreams.get(currentSparseInputStreamIndex);
779        final int readLen = currentInputStream.read(buf, offset, numToRead);
780
781        // if the current input stream is the last input stream,
782        // just return the number of bytes read from current input stream
783        if (currentSparseInputStreamIndex == sparseInputStreams.size() - 1) {
784            return readLen;
785        }
786
787        // if EOF of current input stream is meet, open a new input stream and recursively call read
788        if (readLen == -1) {
789            currentSparseInputStreamIndex++;
790            return readSparse(buf, offset, numToRead);
791        }
792
793        // if the rest data of current input stream is not long enough, open a new input stream
794        // and recursively call read
795        if (readLen < numToRead) {
796            currentSparseInputStreamIndex++;
797            final int readLenOfNext = readSparse(buf, offset + readLen, numToRead - readLen);
798            if (readLenOfNext == -1) {
799                return readLen;
800            }
801
802            return readLen + readLenOfNext;
803        }
804
805        // if the rest data of current input stream is enough(which means readLen == len), just return readLen
806        return readLen;
807    }
808
809    /**
810     * Since we do not support marking just yet, we do nothing.
811     */
812    @Override
813    public synchronized void reset() {
814    }
815
816    protected final void setAtEOF(final boolean b) {
817        hasHitEOF = b;
818    }
819
820    protected final void setCurrentEntry(final TarArchiveEntry e) {
821        currEntry = e;
822    }
823
824    /**
825     * Skips over and discards {@code n} bytes of data from this input
826     * stream. The {@code skip} method may, for a variety of reasons, end
827     * up skipping over some smaller number of bytes, possibly {@code 0}.
828     * This may result from any of a number of conditions; reaching end of file
829     * or end of entry before {@code n} bytes have been skipped; are only
830     * two possibilities. The actual number of bytes skipped is returned. If
831     * {@code n} is negative, no bytes are skipped.
832     *
833     *
834     * @param n
835     *            the number of bytes to be skipped.
836     * @return the actual number of bytes skipped.
837     * @throws IOException if a truncated tar archive is detected
838     *                     or some other I/O error occurs
839     */
840    @Override
841    public long skip(final long n) throws IOException {
842        if (n <= 0 || isDirectory()) {
843            return 0;
844        }
845
846        final long availableOfInputStream = inputStream.available();
847        final long available = currEntry.getRealSize() - entryOffset;
848        final long numToSkip = Math.min(n, available);
849        long skipped;
850
851        if (!currEntry.isSparse()) {
852            skipped = IOUtils.skip(inputStream, numToSkip);
853            // for non-sparse entry, we should get the bytes actually skipped bytes along with
854            // inputStream.available() if inputStream is instance of FileInputStream
855            skipped = getActuallySkipped(availableOfInputStream, skipped, numToSkip);
856        } else {
857            skipped = skipSparse(numToSkip);
858        }
859
860
861        count(skipped);
862        entryOffset += skipped;
863        return skipped;
864    }
865
866    /**
867     * The last record block should be written at the full size, so skip any
868     * additional space used to fill a record after an entry.
869     *
870     * @throws IOException if a truncated tar archive is detected
871     */
872    private void skipRecordPadding() throws IOException {
873        if (!isDirectory() && this.entrySize > 0 && this.entrySize % this.recordSize != 0) {
874            final long available = inputStream.available();
875            final long numRecords = this.entrySize / this.recordSize + 1;
876            final long padding = numRecords * this.recordSize - this.entrySize;
877            long skipped = IOUtils.skip(inputStream, padding);
878
879            skipped = getActuallySkipped(available, skipped, padding);
880
881            count(skipped);
882        }
883    }
884
885    /**
886     * Skip n bytes from current input stream, if the current input stream doesn't have enough data to skip,
887     * jump to the next input stream and skip the rest bytes, keep doing this until total n bytes are skipped
888     * or the input streams are all skipped
889     *
890     * @param n bytes of data to skip
891     * @return actual bytes of data skipped
892     * @throws IOException
893     */
894    private long skipSparse(final long n) throws IOException {
895        if (sparseInputStreams == null || sparseInputStreams.isEmpty()) {
896            return inputStream.skip(n);
897        }
898
899        long bytesSkipped = 0;
900
901        while (bytesSkipped < n && currentSparseInputStreamIndex < sparseInputStreams.size()) {
902            final InputStream  currentInputStream = sparseInputStreams.get(currentSparseInputStreamIndex);
903            bytesSkipped += currentInputStream.skip(n - bytesSkipped);
904
905            if (bytesSkipped < n) {
906                currentSparseInputStreamIndex++;
907            }
908        }
909
910        return bytesSkipped;
911    }
912
913    /**
914     * Tries to read the next record rewinding the stream if it is not an EOF record.
915     *
916     * <p>This is meant to protect against cases where a tar
917     * implementation has written only one EOF record when two are
918     * expected.  Actually this won't help since a non-conforming
919     * implementation likely won't fill full blocks consisting of - by
920     * default - ten records either so we probably have already read
921     * beyond the archive anyway.</p>
922     */
923    private void tryToConsumeSecondEOFRecord() throws IOException {
924        boolean shouldReset = true;
925        final boolean marked = inputStream.markSupported();
926        if (marked) {
927            inputStream.mark(recordSize);
928        }
929        try {
930            shouldReset = !isEOFRecord(readRecord());
931        } finally {
932            if (shouldReset && marked) {
933                pushedBackBytes(recordSize);
934                inputStream.reset();
935            }
936        }
937    }
938}