001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     * http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing,
013     * software distributed under the License is distributed on an
014     * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015     * KIND, either express or implied.  See the License for the
016     * specific language governing permissions and limitations
017     * under the License.
018     */
019    package org.apache.commons.compress.archivers.tar;
020    
021    import static org.apache.commons.compress.archivers.tar.TarConstants.CHKSUMLEN;
022    import static org.apache.commons.compress.archivers.tar.TarConstants.CHKSUM_OFFSET;
023    
024    import java.io.IOException;
025    import java.math.BigInteger;
026    import java.nio.ByteBuffer;
027    import org.apache.commons.compress.archivers.zip.ZipEncoding;
028    import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
029    
030    /**
031     * This class provides static utility methods to work with byte streams.
032     *
033     * @Immutable
034     */
035    // CheckStyle:HideUtilityClassConstructorCheck OFF (bc)
036    public class TarUtils {
037    
038        private static final int BYTE_MASK = 255;
039    
040        static final ZipEncoding DEFAULT_ENCODING =
041            ZipEncodingHelper.getZipEncoding(null);
042    
043        /**
044         * Encapsulates the algorithms used up to Commons Compress 1.3 as
045         * ZipEncoding.
046         */
047        static final ZipEncoding FALLBACK_ENCODING = new ZipEncoding() {
048                public boolean canEncode(String name) { return true; }
049    
050                public ByteBuffer encode(String name) {
051                    final int length = name.length();
052                    byte[] buf = new byte[length];
053    
054                    // copy until end of input or output is reached.
055                    for (int i = 0; i < length; ++i) {
056                        buf[i] = (byte) name.charAt(i);
057                    }
058                    return ByteBuffer.wrap(buf);
059                }
060    
061                public String decode(byte[] buffer) {
062                    final int length = buffer.length;
063                    StringBuffer result = new StringBuffer(length);
064    
065                    for (int i = 0; i < length; ++i) {
066                        byte b = buffer[i];
067                        if (b == 0) { // Trailing null
068                            break;
069                        }
070                        result.append((char) (b & 0xFF)); // Allow for sign-extension
071                    }
072    
073                    return result.toString();
074                }
075            };
076    
077        /** Private constructor to prevent instantiation of this utility class. */
078        private TarUtils(){
079        }
080    
081        /**
082         * Parse an octal string from a buffer.
083         *
084         * <p>Leading spaces are ignored.
085         * The buffer must contain a trailing space or NUL,
086         * and may contain an additional trailing space or NUL.</p>
087         *
088         * <p>The input buffer is allowed to contain all NULs,
089         * in which case the method returns 0L
090         * (this allows for missing fields).</p>
091         *
092         * <p>To work-around some tar implementations that insert a
093         * leading NUL this method returns 0 if it detects a leading NUL
094         * since Commons Compress 1.4.</p>
095         *
096         * @param buffer The buffer from which to parse.
097         * @param offset The offset into the buffer from which to parse.
098         * @param length The maximum number of bytes to parse - must be at least 2 bytes.
099         * @return The long value of the octal string.
100         * @throws IllegalArgumentException if the trailing space/NUL is missing or if a invalid byte is detected.
101         */
102        public static long parseOctal(final byte[] buffer, final int offset, final int length) {
103            long    result = 0;
104            int     end = offset + length;
105            int     start = offset;
106    
107            if (length < 2){
108                throw new IllegalArgumentException("Length "+length+" must be at least 2");
109            }
110    
111            if (buffer[start] == 0) {
112                return 0L;
113            }
114    
115            // Skip leading spaces
116            while (start < end){
117                if (buffer[start] == ' '){
118                    start++;
119                } else {
120                    break;
121                }
122            }
123    
124            // Must have trailing NUL or space
125            byte trailer;
126            trailer = buffer[end-1];
127            if (trailer == 0 || trailer == ' '){
128                end--;
129            } else {
130                throw new IllegalArgumentException(
131                        exceptionMessage(buffer, offset, length, end-1, trailer));
132            }
133            // May have additional NULs or spaces
134            trailer = buffer[end - 1];
135            while (start < end - 1 && (trailer == 0 || trailer == ' ')) {
136                end--;
137                trailer = buffer[end - 1];
138            }
139    
140            for ( ;start < end; start++) {
141                final byte currentByte = buffer[start];
142                // CheckStyle:MagicNumber OFF
143                if (currentByte < '0' || currentByte > '7'){
144                    throw new IllegalArgumentException(
145                            exceptionMessage(buffer, offset, length, start, currentByte));
146                }
147                result = (result << 3) + (currentByte - '0'); // convert from ASCII
148                // CheckStyle:MagicNumber ON
149            }
150    
151            return result;
152        }
153    
154        /** 
155         * Compute the value contained in a byte buffer.  If the most
156         * significant bit of the first byte in the buffer is set, this
157         * bit is ignored and the rest of the buffer is interpreted as a
158         * binary number.  Otherwise, the buffer is interpreted as an
159         * octal number as per the parseOctal function above.
160         *
161         * @param buffer The buffer from which to parse.
162         * @param offset The offset into the buffer from which to parse.
163         * @param length The maximum number of bytes to parse.
164         * @return The long value of the octal or binary string.
165         * @throws IllegalArgumentException if the trailing space/NUL is
166         * missing or an invalid byte is detected in an octal number, or
167         * if a binary number would exceed the size of a signed long
168         * 64-bit integer.
169         * @since 1.4
170         */
171        public static long parseOctalOrBinary(final byte[] buffer, final int offset,
172                                              final int length) {
173    
174            if ((buffer[offset] & 0x80) == 0) {
175                return parseOctal(buffer, offset, length);
176            }
177            final boolean negative = buffer[offset] == (byte) 0xff;
178            if (length < 9) {
179                return parseBinaryLong(buffer, offset, length, negative);
180            }
181            return parseBinaryBigInteger(buffer, offset, length, negative);
182        }
183    
184        private static long parseBinaryLong(final byte[] buffer, final int offset,
185                                            final int length,
186                                            final boolean negative) {
187            if (length >= 9) {
188                throw new IllegalArgumentException("At offset " + offset + ", "
189                                                   + length + " byte binary number"
190                                                   + " exceeds maximum signed long"
191                                                   + " value");
192            }
193            long val = 0;
194            for (int i = 1; i < length; i++) {
195                val = (val << 8) + (buffer[offset + i] & 0xff);
196            }
197            if (negative) {
198                // 2's complement
199                val--;
200                val ^= ((long) Math.pow(2, (length - 1) * 8) - 1);
201            }
202            return negative ? -val : val;
203        }
204    
205        private static long parseBinaryBigInteger(final byte[] buffer,
206                                                  final int offset,
207                                                  final int length,
208                                                  final boolean negative) {
209            byte[] remainder = new byte[length - 1];
210            System.arraycopy(buffer, offset + 1, remainder, 0, length - 1);
211            BigInteger val = new BigInteger(remainder);
212            if (negative) {
213                // 2's complement
214                val = val.add(BigInteger.valueOf(-1)).not();
215            }
216            if (val.bitLength() > 63) {
217                throw new IllegalArgumentException("At offset " + offset + ", "
218                                                   + length + " byte binary number"
219                                                   + " exceeds maximum signed long"
220                                                   + " value");
221            }
222            return negative ? -val.longValue() : val.longValue();
223        }
224    
225        /**
226         * Parse a boolean byte from a buffer.
227         * Leading spaces and NUL are ignored.
228         * The buffer may contain trailing spaces or NULs.
229         *
230         * @param buffer The buffer from which to parse.
231         * @param offset The offset into the buffer from which to parse.
232         * @return The boolean value of the bytes.
233         * @throws IllegalArgumentException if an invalid byte is detected.
234         */
235        public static boolean parseBoolean(final byte[] buffer, final int offset) {
236            return buffer[offset] == 1;
237        }
238    
239        // Helper method to generate the exception message
240        private static String exceptionMessage(byte[] buffer, final int offset,
241                final int length, int current, final byte currentByte) {
242            String string = new String(buffer, offset, length); // TODO default charset?
243            string=string.replaceAll("\0", "{NUL}"); // Replace NULs to allow string to be printed
244            final String s = "Invalid byte "+currentByte+" at offset "+(current-offset)+" in '"+string+"' len="+length;
245            return s;
246        }
247    
248        /**
249         * Parse an entry name from a buffer.
250         * Parsing stops when a NUL is found
251         * or the buffer length is reached.
252         *
253         * @param buffer The buffer from which to parse.
254         * @param offset The offset into the buffer from which to parse.
255         * @param length The maximum number of bytes to parse.
256         * @return The entry name.
257         */
258        public static String parseName(byte[] buffer, final int offset, final int length) {
259            try {
260                return parseName(buffer, offset, length, DEFAULT_ENCODING);
261            } catch (IOException ex) {
262                try {
263                    return parseName(buffer, offset, length, FALLBACK_ENCODING);
264                } catch (IOException ex2) {
265                    // impossible
266                    throw new RuntimeException(ex2);
267                }
268            }
269        }
270    
271        /**
272         * Parse an entry name from a buffer.
273         * Parsing stops when a NUL is found
274         * or the buffer length is reached.
275         *
276         * @param buffer The buffer from which to parse.
277         * @param offset The offset into the buffer from which to parse.
278         * @param length The maximum number of bytes to parse.
279         * @param encoding name of the encoding to use for file names
280         * @since Commons Compress 1.4
281         * @return The entry name.
282         */
283        public static String parseName(byte[] buffer, final int offset,
284                                       final int length,
285                                       final ZipEncoding encoding)
286            throws IOException {
287    
288            int len = length;
289            for (; len > 0; len--) {
290                if (buffer[offset + len - 1] != 0) {
291                    break;
292                }
293            }
294            if (len > 0) {
295                byte[] b = new byte[len];
296                System.arraycopy(buffer, offset, b, 0, len);
297                return encoding.decode(b);
298            }
299            return "";
300        }
301    
302        /**
303         * Copy a name into a buffer.
304         * Copies characters from the name into the buffer
305         * starting at the specified offset. 
306         * If the buffer is longer than the name, the buffer
307         * is filled with trailing NULs.
308         * If the name is longer than the buffer,
309         * the output is truncated.
310         *
311         * @param name The header name from which to copy the characters.
312         * @param buf The buffer where the name is to be stored.
313         * @param offset The starting offset into the buffer
314         * @param length The maximum number of header bytes to copy.
315         * @return The updated offset, i.e. offset + length
316         */
317        public static int formatNameBytes(String name, byte[] buf, final int offset, final int length) {
318            try {
319                return formatNameBytes(name, buf, offset, length, DEFAULT_ENCODING);
320            } catch (IOException ex) {
321                try {
322                    return formatNameBytes(name, buf, offset, length,
323                                           FALLBACK_ENCODING);
324                } catch (IOException ex2) {
325                    // impossible
326                    throw new RuntimeException(ex2);
327                }
328            }
329        }
330    
331        /**
332         * Copy a name into a buffer.
333         * Copies characters from the name into the buffer
334         * starting at the specified offset. 
335         * If the buffer is longer than the name, the buffer
336         * is filled with trailing NULs.
337         * If the name is longer than the buffer,
338         * the output is truncated.
339         *
340         * @param name The header name from which to copy the characters.
341         * @param buf The buffer where the name is to be stored.
342         * @param offset The starting offset into the buffer
343         * @param length The maximum number of header bytes to copy.
344         * @param encoding name of the encoding to use for file names
345         * @since Commons Compress 1.4
346         * @return The updated offset, i.e. offset + length
347         */
348        public static int formatNameBytes(String name, byte[] buf, final int offset,
349                                          final int length,
350                                          final ZipEncoding encoding)
351            throws IOException {
352            int len = name.length();
353            ByteBuffer b = encoding.encode(name);
354            while (b.limit() > length && len > 0) {
355                b = encoding.encode(name.substring(0, --len));
356            }
357            final int limit = b.limit() - b.position();
358            System.arraycopy(b.array(), b.arrayOffset(), buf, offset, limit);
359    
360            // Pad any remaining output bytes with NUL
361            for (int i = limit; i < length; ++i) {
362                buf[offset + i] = 0;
363            }
364    
365            return offset + length;
366        }
367    
368        /**
369         * Fill buffer with unsigned octal number, padded with leading zeroes.
370         * 
371         * @param value number to convert to octal - treated as unsigned
372         * @param buffer destination buffer
373         * @param offset starting offset in buffer
374         * @param length length of buffer to fill
375         * @throws IllegalArgumentException if the value will not fit in the buffer
376         */
377        public static void formatUnsignedOctalString(final long value, byte[] buffer,
378                final int offset, final int length) {
379            int remaining = length;
380            remaining--;
381            if (value == 0) {
382                buffer[offset + remaining--] = (byte) '0';
383            } else {
384                long val = value;
385                for (; remaining >= 0 && val != 0; --remaining) {
386                    // CheckStyle:MagicNumber OFF
387                    buffer[offset + remaining] = (byte) ((byte) '0' + (byte) (val & 7));
388                    val = val >>> 3;
389                    // CheckStyle:MagicNumber ON
390                }
391                if (val != 0){
392                    throw new IllegalArgumentException
393                    (value+"="+Long.toOctalString(value)+ " will not fit in octal number buffer of length "+length);
394                }
395            }
396    
397            for (; remaining >= 0; --remaining) { // leading zeros
398                buffer[offset + remaining] = (byte) '0';
399            }
400        }
401    
402        /**
403         * Write an octal integer into a buffer.
404         *
405         * Uses {@link #formatUnsignedOctalString} to format
406         * the value as an octal string with leading zeros.
407         * The converted number is followed by space and NUL
408         * 
409         * @param value The value to write
410         * @param buf The buffer to receive the output
411         * @param offset The starting offset into the buffer
412         * @param length The size of the output buffer
413         * @return The updated offset, i.e offset+length
414         * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
415         */
416        public static int formatOctalBytes(final long value, byte[] buf, final int offset, final int length) {
417    
418            int idx=length-2; // For space and trailing null
419            formatUnsignedOctalString(value, buf, offset, idx);
420    
421            buf[offset + idx++] = (byte) ' '; // Trailing space
422            buf[offset + idx]   = 0; // Trailing null
423    
424            return offset + length;
425        }
426    
427        /**
428         * Write an octal long integer into a buffer.
429         * 
430         * Uses {@link #formatUnsignedOctalString} to format
431         * the value as an octal string with leading zeros.
432         * The converted number is followed by a space.
433         * 
434         * @param value The value to write as octal
435         * @param buf The destinationbuffer.
436         * @param offset The starting offset into the buffer.
437         * @param length The length of the buffer
438         * @return The updated offset
439         * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
440         */
441        public static int formatLongOctalBytes(final long value, byte[] buf, final int offset, final int length) {
442    
443            int idx=length-1; // For space
444    
445            formatUnsignedOctalString(value, buf, offset, idx);
446            buf[offset + idx] = (byte) ' '; // Trailing space
447    
448            return offset + length;
449        }
450    
451        /**
452         * Write an long integer into a buffer as an octal string if this
453         * will fit, or as a binary number otherwise.
454         * 
455         * Uses {@link #formatUnsignedOctalString} to format
456         * the value as an octal string with leading zeros.
457         * The converted number is followed by a space.
458         * 
459         * @param value The value to write into the buffer.
460         * @param buf The destination buffer.
461         * @param offset The starting offset into the buffer.
462         * @param length The length of the buffer.
463         * @return The updated offset.
464         * @throws IllegalArgumentException if the value (and trailer)
465         * will not fit in the buffer.
466         * @since 1.4
467         */
468        public static int formatLongOctalOrBinaryBytes(
469            final long value, byte[] buf, final int offset, final int length) {
470    
471            // Check whether we are dealing with UID/GID or SIZE field
472            final long maxAsOctalChar = length == TarConstants.UIDLEN ? TarConstants.MAXID : TarConstants.MAXSIZE;
473    
474            final boolean negative = value < 0;
475            if (!negative && value <= maxAsOctalChar) { // OK to store as octal chars
476                return formatLongOctalBytes(value, buf, offset, length);
477            }
478    
479            if (length < 9) {
480                formatLongBinary(value, buf, offset, length, negative);
481            }
482            formatBigIntegerBinary(value, buf, offset, length, negative);
483    
484            buf[offset] = (byte) (negative ? 0xff : 0x80);
485            return offset + length;
486        }
487    
488        private static void formatLongBinary(final long value, byte[] buf,
489                                             final int offset, final int length,
490                                             final boolean negative) {
491            final int bits = (length - 1) * 8;
492            final long max = 1l << bits;
493            long val = Math.abs(value);
494            if (val >= max) {
495                throw new IllegalArgumentException("Value " + value +
496                    " is too large for " + length + " byte field.");
497            }
498            if (negative) {
499                val ^= max - 1;
500                val |= 0xff << bits;
501                val++;
502            }
503            for (int i = offset + length - 1; i >= offset; i--) {
504                buf[i] = (byte) val;
505                val >>= 8;
506            }
507        }
508    
509        private static void formatBigIntegerBinary(final long value, byte[] buf,
510                                                   final int offset,
511                                                   final int length,
512                                                   final boolean negative) {
513            BigInteger val = BigInteger.valueOf(value);
514            final byte[] b = val.toByteArray();
515            final int len = b.length;
516            final int off = offset + length - len;
517            System.arraycopy(b, 0, buf, off, len);
518            final byte fill = (byte) (negative ? 0xff : 0);
519            for (int i = offset + 1; i < off; i++) {
520                buf[i] = fill;
521            }
522        }
523    
524        /**
525         * Writes an octal value into a buffer.
526         * 
527         * Uses {@link #formatUnsignedOctalString} to format
528         * the value as an octal string with leading zeros.
529         * The converted number is followed by NUL and then space.
530         *
531         * @param value The value to convert
532         * @param buf The destination buffer
533         * @param offset The starting offset into the buffer.
534         * @param length The size of the buffer.
535         * @return The updated value of offset, i.e. offset+length
536         * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
537         */
538        public static int formatCheckSumOctalBytes(final long value, byte[] buf, final int offset, final int length) {
539    
540            int idx=length-2; // for NUL and space
541            formatUnsignedOctalString(value, buf, offset, idx);
542    
543            buf[offset + idx++]   = 0; // Trailing null
544            buf[offset + idx]     = (byte) ' '; // Trailing space
545    
546            return offset + length;
547        }
548    
549        /**
550         * Compute the checksum of a tar entry header.
551         *
552         * @param buf The tar entry's header buffer.
553         * @return The computed checksum.
554         */
555        public static long computeCheckSum(final byte[] buf) {
556            long sum = 0;
557    
558            for (int i = 0; i < buf.length; ++i) {
559                sum += BYTE_MASK & buf[i];
560            }
561    
562            return sum;
563        }
564    
565        /**
566         * Wikipedia <a href="http://en.wikipedia.org/wiki/Tar_(file_format)#File_header">says</a>:
567         * <blockquote>
568         * The checksum is calculated by taking the sum of the unsigned byte values
569         * of the header block with the eight checksum bytes taken to be ascii
570         * spaces (decimal value 32). It is stored as a six digit octal number with
571         * leading zeroes followed by a NUL and then a space. Various
572         * implementations do not adhere to this format. For better compatibility,
573         * ignore leading and trailing whitespace, and get the first six digits. In
574         * addition, some historic tar implementations treated bytes as signed.
575         * Implementations typically calculate the checksum both ways, and treat it
576         * as good if either the signed or unsigned sum matches the included
577         * checksum.
578         * </blockquote>
579         * <p>
580         * In addition there are
581         * <a href="https://issues.apache.org/jira/browse/COMPRESS-117">some tar files</a>
582         * that seem to have parts of their header cleared to zero (no detectable
583         * magic bytes, etc.) but still have a reasonable-looking checksum field
584         * present. It looks like we can detect such cases reasonably well by
585         * checking whether the stored checksum is <em>greater than</em> the
586         * computed unsigned checksum. That check is unlikely to pass on some
587         * random file header, as it would need to have a valid sequence of
588         * octal digits in just the right place.
589         * <p>
590         * The return value of this method should be treated as a best-effort
591         * heuristic rather than an absolute and final truth. The checksum
592         * verification logic may well evolve over time as more special cases
593         * are encountered.
594         *
595         * @param header tar header
596         * @return whether the checksum is reasonably good
597         * @see <a href="https://issues.apache.org/jira/browse/COMPRESS-191">COMPRESS-191</a>
598         * @since 1.5
599         */
600        public static boolean verifyCheckSum(byte[] header) {
601            long storedSum = 0;
602            long unsignedSum = 0;
603            long signedSum = 0;
604    
605            int digits = 0;
606            for (int i = 0; i < header.length; i++) {
607                byte b = header[i];
608                if (CHKSUM_OFFSET  <= i && i < CHKSUM_OFFSET + CHKSUMLEN) {
609                    if ('0' <= b && b <= '7' && digits++ < 6) {
610                        storedSum = storedSum * 8 + b - '0';
611                    } else if (digits > 0) {
612                        digits = 6; // only look at the first octal digit sequence
613                    }
614                    b = ' ';
615                }
616                unsignedSum += 0xff & b;
617                signedSum += b;
618            }
619    
620            return storedSum == unsignedSum || storedSum == signedSum
621                    || storedSum > unsignedSum; // COMPRESS-177
622        }
623    
624    }