001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     * http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing,
013     * software distributed under the License is distributed on an
014     * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015     * KIND, either express or implied.  See the License for the
016     * specific language governing permissions and limitations
017     * under the License.
018     */
019    package org.apache.commons.compress.archivers.ar;
020    
021    import java.io.EOFException;
022    import java.io.IOException;
023    import java.io.InputStream;
024    
025    import org.apache.commons.compress.archivers.ArchiveEntry;
026    import org.apache.commons.compress.archivers.ArchiveInputStream;
027    import org.apache.commons.compress.utils.ArchiveUtils;
028    
029    /**
030     * Implements the "ar" archive format as an input stream.
031     * 
032     * @NotThreadSafe
033     * 
034     */
035    public class ArArchiveInputStream extends ArchiveInputStream {
036    
037        private final InputStream input;
038        private long offset = 0;
039        private boolean closed;
040    
041        /*
042         * If getNextEnxtry has been called, the entry metadata is stored in
043         * currentEntry.
044         */
045        private ArArchiveEntry currentEntry = null;
046    
047        // Storage area for extra long names (GNU ar)
048        private byte[] namebuffer = null;
049    
050        /*
051         * The offset where the current entry started. -1 if no entry has been
052         * called
053         */
054        private long entryOffset = -1;
055    
056        // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection)
057        private final byte[] NAME_BUF = new byte[16];
058        private final byte[] LAST_MODIFIED_BUF = new byte[12];
059        private final byte[] ID_BUF = new byte[6];
060        private final byte[] FILE_MODE_BUF = new byte[8];
061        private final byte[] LENGTH_BUF = new byte[10];
062    
063        /**
064         * Constructs an Ar input stream with the referenced stream
065         * 
066         * @param pInput
067         *            the ar input stream
068         */
069        public ArArchiveInputStream(final InputStream pInput) {
070            input = pInput;
071            closed = false;
072        }
073    
074        /**
075         * Returns the next AR entry in this stream.
076         * 
077         * @return the next AR entry.
078         * @throws IOException
079         *             if the entry could not be read
080         */
081        public ArArchiveEntry getNextArEntry() throws IOException {
082            if (currentEntry != null) {
083                final long entryEnd = entryOffset + currentEntry.getLength();
084                while (offset < entryEnd) {
085                    int x = read();
086                    if (x == -1) {
087                        // hit EOF before previous entry was complete
088                        // TODO: throw an exception instead?
089                        return null;
090                    }
091                }
092                currentEntry = null;
093            }
094    
095            if (offset == 0) {
096                final byte[] expected = ArchiveUtils.toAsciiBytes(ArArchiveEntry.HEADER);
097                final byte[] realized = new byte[expected.length];
098                final int read = read(realized);
099                if (read != expected.length) {
100                    throw new IOException("failed to read header. Occured at byte: " + getBytesRead());
101                }
102                for (int i = 0; i < expected.length; i++) {
103                    if (expected[i] != realized[i]) {
104                        throw new IOException("invalid header " + ArchiveUtils.toAsciiString(realized));
105                    }
106                }
107            }
108    
109            if (offset % 2 != 0 && read() < 0) {
110                // hit eof
111                return null;
112            }
113    
114            if (input.available() == 0) {
115                return null;
116            }
117    
118            read(NAME_BUF);
119            read(LAST_MODIFIED_BUF);
120            read(ID_BUF);
121            int userId = asInt(ID_BUF, true);
122            read(ID_BUF);
123            read(FILE_MODE_BUF);
124            read(LENGTH_BUF);
125    
126            {
127                final byte[] expected = ArchiveUtils.toAsciiBytes(ArArchiveEntry.TRAILER);
128                final byte[] realized = new byte[expected.length];
129                final int read = read(realized);
130                if (read != expected.length) {
131                    throw new IOException("failed to read entry trailer. Occured at byte: " + getBytesRead());
132                }
133                for (int i = 0; i < expected.length; i++) {
134                    if (expected[i] != realized[i]) {
135                        throw new IOException("invalid entry trailer. not read the content? Occured at byte: " + getBytesRead());
136                    }
137                }
138            }
139    
140            entryOffset = offset;
141    
142    //        GNU ar uses a '/' to mark the end of the filename; this allows for the use of spaces without the use of an extended filename.
143    
144            // entry name is stored as ASCII string
145            String temp = ArchiveUtils.toAsciiString(NAME_BUF).trim();
146            if (isGNUStringTable(temp)) { // GNU extended filenames entry
147                currentEntry = readGNUStringTable(LENGTH_BUF);
148                return getNextArEntry();
149            }
150    
151            long len = asLong(LENGTH_BUF);
152            if (temp.endsWith("/")) { // GNU terminator
153                temp = temp.substring(0, temp.length() - 1);
154            } else if (isGNULongName(temp)) {
155                int offset = Integer.parseInt(temp.substring(1));// get the offset
156                temp = getExtendedName(offset); // convert to the long name
157            } else if (isBSDLongName(temp)) {
158                temp = getBSDLongName(temp);
159                // entry length contained the length of the file name in
160                // addition to the real length of the entry.
161                // assume file name was ASCII, there is no "standard" otherwise
162                int nameLen = temp.length();
163                len -= nameLen;
164                entryOffset += nameLen;
165            }
166    
167            currentEntry = new ArArchiveEntry(temp, len, userId,
168                                              asInt(ID_BUF, true),
169                                              asInt(FILE_MODE_BUF, 8),
170                                              asLong(LAST_MODIFIED_BUF));
171            return currentEntry;
172        }
173    
174        /**
175         * Get an extended name from the GNU extended name buffer.
176         * 
177         * @param offset pointer to entry within the buffer
178         * @return the extended file name; without trailing "/" if present.
179         * @throws IOException if name not found or buffer not set up
180         */
181        private String getExtendedName(int offset) throws IOException{
182            if (namebuffer == null) {
183                throw new IOException("Cannot process GNU long filename as no // record was found");
184            }
185            for(int i=offset; i < namebuffer.length; i++){
186                if (namebuffer[i]=='\012'){
187                    if (namebuffer[i-1]=='/') {
188                        i--; // drop trailing /
189                    }
190                    return ArchiveUtils.toAsciiString(namebuffer, offset, i-offset);
191                }
192            }
193            throw new IOException("Failed to read entry: "+offset);
194        }
195        private long asLong(byte[] input) {
196            return Long.parseLong(ArchiveUtils.toAsciiString(input).trim());
197        }
198    
199        private int asInt(byte[] input) {
200            return asInt(input, 10, false);
201        }
202    
203        private int asInt(byte[] input, boolean treatBlankAsZero) {
204            return asInt(input, 10, treatBlankAsZero);
205        }
206    
207        private int asInt(byte[] input, int base) {
208            return asInt(input, base, false);
209        }
210    
211        private int asInt(byte[] input, int base, boolean treatBlankAsZero) {
212            String string = ArchiveUtils.toAsciiString(input).trim();
213            if (string.length() == 0 && treatBlankAsZero) {
214                return 0;
215            }
216            return Integer.parseInt(string, base);
217        }
218    
219        /*
220         * (non-Javadoc)
221         * 
222         * @see
223         * org.apache.commons.compress.archivers.ArchiveInputStream#getNextEntry()
224         */
225        @Override
226        public ArchiveEntry getNextEntry() throws IOException {
227            return getNextArEntry();
228        }
229    
230        /*
231         * (non-Javadoc)
232         * 
233         * @see java.io.InputStream#close()
234         */
235        @Override
236        public void close() throws IOException {
237            if (!closed) {
238                closed = true;
239                input.close();
240            }
241            currentEntry = null;
242        }
243    
244        /*
245         * (non-Javadoc)
246         * 
247         * @see java.io.InputStream#read(byte[], int, int)
248         */
249        @Override
250        public int read(byte[] b, final int off, final int len) throws IOException {
251            int toRead = len;
252            if (currentEntry != null) {
253                final long entryEnd = entryOffset + currentEntry.getLength();
254                if (len > 0 && entryEnd > offset) {
255                    toRead = (int) Math.min(len, entryEnd - offset);
256                } else {
257                    return -1;
258                }
259            }
260            final int ret = this.input.read(b, off, toRead);
261            count(ret);
262            offset += (ret > 0 ? ret : 0);
263            return ret;
264        }
265    
266        /**
267         * Checks if the signature matches ASCII "!<arch>" followed by a single LF
268         * control character
269         * 
270         * @param signature
271         *            the bytes to check
272         * @param length
273         *            the number of bytes to check
274         * @return true, if this stream is an Ar archive stream, false otherwise
275         */
276        public static boolean matches(byte[] signature, int length) {
277            // 3c21 7261 6863 0a3e
278    
279            if (length < 8) {
280                return false;
281            }
282            if (signature[0] != 0x21) {
283                return false;
284            }
285            if (signature[1] != 0x3c) {
286                return false;
287            }
288            if (signature[2] != 0x61) {
289                return false;
290            }
291            if (signature[3] != 0x72) {
292                return false;
293            }
294            if (signature[4] != 0x63) {
295                return false;
296            }
297            if (signature[5] != 0x68) {
298                return false;
299            }
300            if (signature[6] != 0x3e) {
301                return false;
302            }
303            if (signature[7] != 0x0a) {
304                return false;
305            }
306    
307            return true;
308        }
309    
310        static final String BSD_LONGNAME_PREFIX = "#1/";
311        private static final int BSD_LONGNAME_PREFIX_LEN =
312            BSD_LONGNAME_PREFIX.length();
313        private static final String BSD_LONGNAME_PATTERN =
314            "^" + BSD_LONGNAME_PREFIX + "\\d+";
315    
316        /**
317         * Does the name look like it is a long name (or a name containing
318         * spaces) as encoded by BSD ar?
319         *
320         * <p>From the FreeBSD ar(5) man page:</p>
321         * <pre>
322         * BSD   In the BSD variant, names that are shorter than 16
323         *       characters and without embedded spaces are stored
324         *       directly in this field.  If a name has an embedded
325         *       space, or if it is longer than 16 characters, then
326         *       the string "#1/" followed by the decimal represen-
327         *       tation of the length of the file name is placed in
328         *       this field. The actual file name is stored immedi-
329         *       ately after the archive header.  The content of the
330         *       archive member follows the file name.  The ar_size
331         *       field of the header (see below) will then hold the
332         *       sum of the size of the file name and the size of
333         *       the member.
334         * </pre>
335         *
336         * @since 1.3
337         */
338        private static boolean isBSDLongName(String name) {
339            return name != null && name.matches(BSD_LONGNAME_PATTERN);
340        }
341    
342        /**
343         * Reads the real name from the current stream assuming the very
344         * first bytes to be read are the real file name.
345         *
346         * @see #isBSDLongName
347         *
348         * @since 1.3
349         */
350        private String getBSDLongName(String bsdLongName) throws IOException {
351            int nameLen =
352                Integer.parseInt(bsdLongName.substring(BSD_LONGNAME_PREFIX_LEN));
353            byte[] name = new byte[nameLen];
354            int read = 0, readNow = 0;
355            while ((readNow = input.read(name, read, nameLen - read)) >= 0) {
356                read += readNow;
357                count(readNow);
358                if (read == nameLen) {
359                    break;
360                }
361            }
362            if (read != nameLen) {
363                throw new EOFException();
364            }
365            return ArchiveUtils.toAsciiString(name);
366        }
367    
368        private static final String GNU_STRING_TABLE_NAME = "//";
369    
370        /**
371         * Is this the name of the "Archive String Table" as used by
372         * SVR4/GNU to store long file names?
373         *
374         * <p>GNU ar stores multiple extended filenames in the data section
375         * of a file with the name "//", this record is referred to by
376         * future headers.</p>
377         *
378         * <p>A header references an extended filename by storing a "/"
379         * followed by a decimal offset to the start of the filename in
380         * the extended filename data section.</p>
381         * 
382         * <p>The format of the "//" file itself is simply a list of the
383         * long filenames, each separated by one or more LF
384         * characters. Note that the decimal offsets are number of
385         * characters, not line or string number within the "//" file.</p>
386         */
387        private static boolean isGNUStringTable(String name) {
388            return GNU_STRING_TABLE_NAME.equals(name);
389        }
390    
391        /**
392         * Reads the GNU archive String Table.
393         *
394         * @see #isGNUStringTable
395         */
396        private ArArchiveEntry readGNUStringTable(byte[] length) throws IOException {
397            int bufflen = asInt(length); // Assume length will fit in an int
398            namebuffer = new byte[bufflen];
399            int read = read(namebuffer, 0, bufflen);
400            if (read != bufflen){
401                throw new IOException("Failed to read complete // record: expected="
402                                      + bufflen + " read=" + read);
403            }
404            return new ArArchiveEntry(GNU_STRING_TABLE_NAME, bufflen);
405        }
406    
407        private static final String GNU_LONGNAME_PATTERN = "^/\\d+";
408    
409        /**
410         * Does the name look like it is a long name (or a name containing
411         * spaces) as encoded by SVR4/GNU ar?
412         *
413         * @see #isGNUStringTable
414         */
415        private boolean isGNULongName(String name) {
416            return name != null && name.matches(GNU_LONGNAME_PATTERN);
417        }
418    }