001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers.ar;
020
021import java.io.EOFException;
022import java.io.IOException;
023import java.io.InputStream;
024import java.util.Arrays;
025import java.util.regex.Pattern;
026
027import org.apache.commons.compress.archivers.ArchiveInputStream;
028import org.apache.commons.compress.utils.ArchiveUtils;
029import org.apache.commons.compress.utils.IOUtils;
030
031/**
032 * Implements the "ar" archive format as an input stream.
033 *
034 * @NotThreadSafe
035 */
036public class ArArchiveInputStream extends ArchiveInputStream<ArArchiveEntry> {
037
038    // offsets and length of meta data parts
039    private static final int NAME_OFFSET = 0;
040    private static final int NAME_LEN = 16;
041    private static final int LAST_MODIFIED_OFFSET = NAME_LEN;
042
043    private static final int LAST_MODIFIED_LEN = 12;
044
045    private static final int USER_ID_OFFSET = LAST_MODIFIED_OFFSET + LAST_MODIFIED_LEN;
046
047    private static final int USER_ID_LEN = 6;
048
049    private static final int GROUP_ID_OFFSET = USER_ID_OFFSET + USER_ID_LEN;
050    private static final int GROUP_ID_LEN = 6;
051    private static final int FILE_MODE_OFFSET = GROUP_ID_OFFSET + GROUP_ID_LEN;
052    private static final int FILE_MODE_LEN = 8;
053    private static final int LENGTH_OFFSET = FILE_MODE_OFFSET + FILE_MODE_LEN;
054    private static final int LENGTH_LEN = 10;
055    static final String BSD_LONGNAME_PREFIX = "#1/";
056    private static final int BSD_LONGNAME_PREFIX_LEN =
057        BSD_LONGNAME_PREFIX.length();
058    private static final Pattern BSD_LONGNAME_PATTERN = Pattern.compile("^" + BSD_LONGNAME_PREFIX + "\\d+");
059    private static final String GNU_STRING_TABLE_NAME = "//";
060    private static final Pattern GNU_LONGNAME_PATTERN = Pattern.compile("^/\\d+");
061    /**
062     * Does the name look like it is a long name (or a name containing
063     * spaces) as encoded by BSD ar?
064     *
065     * <p>From the FreeBSD ar(5) man page:</p>
066     * <pre>
067     * BSD   In the BSD variant, names that are shorter than 16
068     *       characters and without embedded spaces are stored
069     *       directly in this field.  If a name has an embedded
070     *       space, or if it is longer than 16 characters, then
071     *       the string "#1/" followed by the decimal represen-
072     *       tation of the length of the file name is placed in
073     *       this field. The actual file name is stored immedi-
074     *       ately after the archive header.  The content of the
075     *       archive member follows the file name.  The ar_size
076     *       field of the header (see below) will then hold the
077     *       sum of the size of the file name and the size of
078     *       the member.
079     * </pre>
080     *
081     * @since 1.3
082     */
083    private static boolean isBSDLongName(final String name) {
084        return name != null && BSD_LONGNAME_PATTERN.matcher(name).matches();
085    }
086
087    /**
088     * Is this the name of the "Archive String Table" as used by
089     * SVR4/GNU to store long file names?
090     *
091     * <p>GNU ar stores multiple extended file names in the data section
092     * of a file with the name "//", this record is referred to by
093     * future headers.</p>
094     *
095     * <p>A header references an extended file name by storing a "/"
096     * followed by a decimal offset to the start of the file name in
097     * the extended file name data section.</p>
098     *
099     * <p>The format of the "//" file itself is simply a list of the
100     * long file names, each separated by one or more LF
101     * characters. Note that the decimal offsets are number of
102     * characters, not line or string number within the "//" file.</p>
103     */
104    private static boolean isGNUStringTable(final String name) {
105        return GNU_STRING_TABLE_NAME.equals(name);
106    }
107
108    /**
109     * Checks if the signature matches ASCII "!&lt;arch&gt;" followed by a single LF
110     * control character
111     *
112     * @param signature
113     *            the bytes to check
114     * @param length
115     *            the number of bytes to check
116     * @return true, if this stream is an Ar archive stream, false otherwise
117     */
118    public static boolean matches(final byte[] signature, final int length) {
119        // 3c21 7261 6863 0a3e
120
121        return length >= 8 && signature[0] == 0x21 &&
122                signature[1] == 0x3c && signature[2] == 0x61 &&
123                signature[3] == 0x72 && signature[4] == 0x63 &&
124                signature[5] == 0x68 && signature[6] == 0x3e &&
125                signature[7] == 0x0a;
126    }
127
128    private final InputStream input;
129
130    private long offset;
131
132    private boolean closed;
133
134    /*
135     * If getNextEntry has been called, the entry metadata is stored in
136     * currentEntry.
137     */
138    private ArArchiveEntry currentEntry;
139
140    // Storage area for extra long names (GNU ar)
141    private byte[] namebuffer;
142
143    /*
144     * The offset where the current entry started. -1 if no entry has been
145     * called
146     */
147    private long entryOffset = -1;
148
149    // cached buffer for meta data - must only be used locally in the class (COMPRESS-172 - reduce garbage collection)
150    private final byte[] metaData =
151        new byte[NAME_LEN + LAST_MODIFIED_LEN + USER_ID_LEN + GROUP_ID_LEN + FILE_MODE_LEN + LENGTH_LEN];
152
153    /**
154     * Constructs an Ar input stream with the referenced stream
155     *
156     * @param inputStream
157     *            the ar input stream
158     */
159    public ArArchiveInputStream(final InputStream inputStream) {
160        this.input = inputStream;
161    }
162
163    private int asInt(final byte[] byteArray, final int offset, final int len) {
164        return asInt(byteArray, offset, len, 10, false);
165    }
166
167    private int asInt(final byte[] byteArray, final int offset, final int len, final boolean treatBlankAsZero) {
168        return asInt(byteArray, offset, len, 10, treatBlankAsZero);
169    }
170
171    private int asInt(final byte[] byteArray, final int offset, final int len, final int base) {
172        return asInt(byteArray, offset, len, base, false);
173    }
174
175    private int asInt(final byte[] byteArray, final int offset, final int len, final int base, final boolean treatBlankAsZero) {
176        final String string = ArchiveUtils.toAsciiString(byteArray, offset, len).trim();
177        if (string.isEmpty() && treatBlankAsZero) {
178            return 0;
179        }
180        return Integer.parseInt(string, base);
181    }
182    private long asLong(final byte[] byteArray, final int offset, final int len) {
183        return Long.parseLong(ArchiveUtils.toAsciiString(byteArray, offset, len).trim());
184    }
185    /*
186     * (non-Javadoc)
187     *
188     * @see java.io.InputStream#close()
189     */
190    @Override
191    public void close() throws IOException {
192        if (!closed) {
193            closed = true;
194            input.close();
195        }
196        currentEntry = null;
197    }
198
199    /**
200     * Reads the real name from the current stream assuming the very
201     * first bytes to be read are the real file name.
202     *
203     * @see #isBSDLongName
204     *
205     * @since 1.3
206     */
207    private String getBSDLongName(final String bsdLongName) throws IOException {
208        final int nameLen =
209            Integer.parseInt(bsdLongName.substring(BSD_LONGNAME_PREFIX_LEN));
210        final byte[] name = IOUtils.readRange(input, nameLen);
211        final int read = name.length;
212        trackReadBytes(read);
213        if (read != nameLen) {
214            throw new EOFException();
215        }
216        return ArchiveUtils.toAsciiString(name);
217    }
218
219    /**
220     * Gets an extended name from the GNU extended name buffer.
221     *
222     * @param offset pointer to entry within the buffer
223     * @return the extended file name; without trailing "/" if present.
224     * @throws IOException if name not found or buffer not set up
225     */
226    private String getExtendedName(final int offset) throws IOException {
227        if (namebuffer == null) {
228            throw new IOException("Cannot process GNU long file name as no // record was found");
229        }
230        for (int i = offset; i < namebuffer.length; i++) {
231            if (namebuffer[i] == '\012' || namebuffer[i] == 0) {
232                if (namebuffer[i - 1] == '/') {
233                    i--; // drop trailing /
234                }
235                return ArchiveUtils.toAsciiString(namebuffer, offset, i - offset);
236            }
237        }
238        throw new IOException("Failed to read entry: " + offset);
239    }
240
241    /**
242     * Returns the next AR entry in this stream.
243     *
244     * @return the next AR entry.
245     * @throws IOException
246     *             if the entry could not be read
247     * @deprecated Use {@link #getNextEntry()}.
248     */
249    @Deprecated
250    public ArArchiveEntry getNextArEntry() throws IOException {
251        if (currentEntry != null) {
252            final long entryEnd = entryOffset + currentEntry.getLength();
253            final long skipped = IOUtils.skip(input, entryEnd - offset);
254            trackReadBytes(skipped);
255            currentEntry = null;
256        }
257
258        if (offset == 0) {
259            final byte[] expected = ArchiveUtils.toAsciiBytes(ArArchiveEntry.HEADER);
260            final byte[] realized = IOUtils.readRange(input, expected.length);
261            final int read = realized.length;
262            trackReadBytes(read);
263            if (read != expected.length) {
264                throw new IOException("Failed to read header. Occurred at byte: " + getBytesRead());
265            }
266            if (!Arrays.equals(expected, realized)) {
267                throw new IOException("Invalid header " + ArchiveUtils.toAsciiString(realized));
268            }
269        }
270
271        if (offset % 2 != 0) {
272            if (input.read() < 0) {
273                // hit eof
274                return null;
275            }
276            trackReadBytes(1);
277        }
278
279        {
280            final int read = IOUtils.readFully(input, metaData);
281            trackReadBytes(read);
282            if (read == 0) {
283                return null;
284            }
285            if (read < metaData.length) {
286                throw new IOException("Truncated ar archive");
287            }
288        }
289
290        {
291            final byte[] expected = ArchiveUtils.toAsciiBytes(ArArchiveEntry.TRAILER);
292            final byte[] realized = IOUtils.readRange(input, expected.length);
293            final int read = realized.length;
294            trackReadBytes(read);
295            if (read != expected.length) {
296                throw new IOException("Failed to read entry trailer. Occurred at byte: " + getBytesRead());
297            }
298            if (!Arrays.equals(expected, realized)) {
299                throw new IOException("Invalid entry trailer. not read the content? Occurred at byte: " + getBytesRead());
300            }
301        }
302
303        entryOffset = offset;
304
305//        GNU ar uses a '/' to mark the end of the file name; this allows for the use of spaces without the use of an extended file name.
306
307        // entry name is stored as ASCII string
308        String temp = ArchiveUtils.toAsciiString(metaData, NAME_OFFSET, NAME_LEN).trim();
309        if (isGNUStringTable(temp)) { // GNU extended file names entry
310            currentEntry = readGNUStringTable(metaData, LENGTH_OFFSET, LENGTH_LEN);
311            return getNextArEntry();
312        }
313
314        long len = asLong(metaData, LENGTH_OFFSET, LENGTH_LEN);
315        if (temp.endsWith("/")) { // GNU terminator
316            temp = temp.substring(0, temp.length() - 1);
317        } else if (isGNULongName(temp)) {
318            final int off = Integer.parseInt(temp.substring(1));// get the offset
319            temp = getExtendedName(off); // convert to the long name
320        } else if (isBSDLongName(temp)) {
321            temp = getBSDLongName(temp);
322            // entry length contained the length of the file name in
323            // addition to the real length of the entry.
324            // assume file name was ASCII, there is no "standard" otherwise
325            final int nameLen = temp.length();
326            len -= nameLen;
327            entryOffset += nameLen;
328        }
329
330        if (len < 0) {
331            throw new IOException("broken archive, entry with negative size");
332        }
333
334        currentEntry = new ArArchiveEntry(temp, len,
335                                          asInt(metaData, USER_ID_OFFSET, USER_ID_LEN, true),
336                                          asInt(metaData, GROUP_ID_OFFSET, GROUP_ID_LEN, true),
337                                          asInt(metaData, FILE_MODE_OFFSET, FILE_MODE_LEN, 8),
338                                          asLong(metaData, LAST_MODIFIED_OFFSET, LAST_MODIFIED_LEN));
339        return currentEntry;
340    }
341
342    /*
343     * (non-Javadoc)
344     *
345     * @see
346     * org.apache.commons.compress.archivers.ArchiveInputStream#getNextEntry()
347     */
348    @Override
349    public ArArchiveEntry getNextEntry() throws IOException {
350        return getNextArEntry();
351    }
352
353    /**
354     * Does the name look like it is a long name (or a name containing
355     * spaces) as encoded by SVR4/GNU ar?
356     *
357     * @see #isGNUStringTable
358     */
359    private boolean isGNULongName(final String name) {
360        return name != null && GNU_LONGNAME_PATTERN.matcher(name).matches();
361    }
362
363    /*
364     * (non-Javadoc)
365     *
366     * @see java.io.InputStream#read(byte[], int, int)
367     */
368    @Override
369    public int read(final byte[] b, final int off, final int len) throws IOException {
370        if (len == 0) {
371            return 0;
372        }
373        if (currentEntry == null) {
374            throw new IllegalStateException("No current ar entry");
375        }
376        final long entryEnd = entryOffset + currentEntry.getLength();
377        if (len < 0 || offset >= entryEnd) {
378            return -1;
379        }
380        final int toRead = (int) Math.min(len, entryEnd - offset);
381        final int ret = this.input.read(b, off, toRead);
382        trackReadBytes(ret);
383        return ret;
384    }
385
386    /**
387     * Reads the GNU archive String Table.
388     *
389     * @see #isGNUStringTable
390     */
391    private ArArchiveEntry readGNUStringTable(final byte[] length, final int offset, final int len) throws IOException {
392        final int bufflen = asInt(length, offset, len); // Assume length will fit in an int
393        namebuffer = IOUtils.readRange(input, bufflen);
394        final int read = namebuffer.length;
395        trackReadBytes(read);
396        if (read != bufflen){
397            throw new IOException("Failed to read complete // record: expected="
398                                  + bufflen + " read=" + read);
399        }
400        return new ArArchiveEntry(GNU_STRING_TABLE_NAME, bufflen);
401    }
402
403    private void trackReadBytes(final long read) {
404        count(read);
405        if (read > 0) {
406            offset += read;
407        }
408    }
409}