001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019 package org.apache.commons.compress.archivers.tar; 020 021 import static org.apache.commons.compress.archivers.tar.TarConstants.CHKSUMLEN; 022 import static org.apache.commons.compress.archivers.tar.TarConstants.CHKSUM_OFFSET; 023 024 import java.io.IOException; 025 import java.math.BigInteger; 026 import java.nio.ByteBuffer; 027 import org.apache.commons.compress.archivers.zip.ZipEncoding; 028 import org.apache.commons.compress.archivers.zip.ZipEncodingHelper; 029 030 /** 031 * This class provides static utility methods to work with byte streams. 032 * 033 * @Immutable 034 */ 035 // CheckStyle:HideUtilityClassConstructorCheck OFF (bc) 036 public class TarUtils { 037 038 private static final int BYTE_MASK = 255; 039 040 static final ZipEncoding DEFAULT_ENCODING = 041 ZipEncodingHelper.getZipEncoding(null); 042 043 /** 044 * Encapsulates the algorithms used up to Commons Compress 1.3 as 045 * ZipEncoding. 046 */ 047 static final ZipEncoding FALLBACK_ENCODING = new ZipEncoding() { 048 public boolean canEncode(String name) { return true; } 049 050 public ByteBuffer encode(String name) { 051 final int length = name.length(); 052 byte[] buf = new byte[length]; 053 054 // copy until end of input or output is reached. 055 for (int i = 0; i < length; ++i) { 056 buf[i] = (byte) name.charAt(i); 057 } 058 return ByteBuffer.wrap(buf); 059 } 060 061 public String decode(byte[] buffer) { 062 final int length = buffer.length; 063 StringBuffer result = new StringBuffer(length); 064 065 for (int i = 0; i < length; ++i) { 066 byte b = buffer[i]; 067 if (b == 0) { // Trailing null 068 break; 069 } 070 result.append((char) (b & 0xFF)); // Allow for sign-extension 071 } 072 073 return result.toString(); 074 } 075 }; 076 077 /** Private constructor to prevent instantiation of this utility class. */ 078 private TarUtils(){ 079 } 080 081 /** 082 * Parse an octal string from a buffer. 083 * 084 * <p>Leading spaces are ignored. 085 * The buffer must contain a trailing space or NUL, 086 * and may contain an additional trailing space or NUL.</p> 087 * 088 * <p>The input buffer is allowed to contain all NULs, 089 * in which case the method returns 0L 090 * (this allows for missing fields).</p> 091 * 092 * <p>To work-around some tar implementations that insert a 093 * leading NUL this method returns 0 if it detects a leading NUL 094 * since Commons Compress 1.4.</p> 095 * 096 * @param buffer The buffer from which to parse. 097 * @param offset The offset into the buffer from which to parse. 098 * @param length The maximum number of bytes to parse - must be at least 2 bytes. 099 * @return The long value of the octal string. 100 * @throws IllegalArgumentException if the trailing space/NUL is missing or if a invalid byte is detected. 101 */ 102 public static long parseOctal(final byte[] buffer, final int offset, final int length) { 103 long result = 0; 104 int end = offset + length; 105 int start = offset; 106 107 if (length < 2){ 108 throw new IllegalArgumentException("Length "+length+" must be at least 2"); 109 } 110 111 if (buffer[start] == 0) { 112 return 0L; 113 } 114 115 // Skip leading spaces 116 while (start < end){ 117 if (buffer[start] == ' '){ 118 start++; 119 } else { 120 break; 121 } 122 } 123 124 // Must have trailing NUL or space 125 byte trailer; 126 trailer = buffer[end-1]; 127 if (trailer == 0 || trailer == ' '){ 128 end--; 129 } else { 130 throw new IllegalArgumentException( 131 exceptionMessage(buffer, offset, length, end-1, trailer)); 132 } 133 // May have additional NULs or spaces 134 trailer = buffer[end - 1]; 135 while (start < end - 1 && (trailer == 0 || trailer == ' ')) { 136 end--; 137 trailer = buffer[end - 1]; 138 } 139 140 for ( ;start < end; start++) { 141 final byte currentByte = buffer[start]; 142 // CheckStyle:MagicNumber OFF 143 if (currentByte < '0' || currentByte > '7'){ 144 throw new IllegalArgumentException( 145 exceptionMessage(buffer, offset, length, start, currentByte)); 146 } 147 result = (result << 3) + (currentByte - '0'); // convert from ASCII 148 // CheckStyle:MagicNumber ON 149 } 150 151 return result; 152 } 153 154 /** 155 * Compute the value contained in a byte buffer. If the most 156 * significant bit of the first byte in the buffer is set, this 157 * bit is ignored and the rest of the buffer is interpreted as a 158 * binary number. Otherwise, the buffer is interpreted as an 159 * octal number as per the parseOctal function above. 160 * 161 * @param buffer The buffer from which to parse. 162 * @param offset The offset into the buffer from which to parse. 163 * @param length The maximum number of bytes to parse. 164 * @return The long value of the octal or binary string. 165 * @throws IllegalArgumentException if the trailing space/NUL is 166 * missing or an invalid byte is detected in an octal number, or 167 * if a binary number would exceed the size of a signed long 168 * 64-bit integer. 169 * @since 1.4 170 */ 171 public static long parseOctalOrBinary(final byte[] buffer, final int offset, 172 final int length) { 173 174 if ((buffer[offset] & 0x80) == 0) { 175 return parseOctal(buffer, offset, length); 176 } 177 final boolean negative = buffer[offset] == (byte) 0xff; 178 if (length < 9) { 179 return parseBinaryLong(buffer, offset, length, negative); 180 } 181 return parseBinaryBigInteger(buffer, offset, length, negative); 182 } 183 184 private static long parseBinaryLong(final byte[] buffer, final int offset, 185 final int length, 186 final boolean negative) { 187 if (length >= 9) { 188 throw new IllegalArgumentException("At offset " + offset + ", " 189 + length + " byte binary number" 190 + " exceeds maximum signed long" 191 + " value"); 192 } 193 long val = 0; 194 for (int i = 1; i < length; i++) { 195 val = (val << 8) + (buffer[offset + i] & 0xff); 196 } 197 if (negative) { 198 // 2's complement 199 val--; 200 val ^= ((long) Math.pow(2, (length - 1) * 8) - 1); 201 } 202 return negative ? -val : val; 203 } 204 205 private static long parseBinaryBigInteger(final byte[] buffer, 206 final int offset, 207 final int length, 208 final boolean negative) { 209 byte[] remainder = new byte[length - 1]; 210 System.arraycopy(buffer, offset + 1, remainder, 0, length - 1); 211 BigInteger val = new BigInteger(remainder); 212 if (negative) { 213 // 2's complement 214 val = val.add(BigInteger.valueOf(-1)).not(); 215 } 216 if (val.bitLength() > 63) { 217 throw new IllegalArgumentException("At offset " + offset + ", " 218 + length + " byte binary number" 219 + " exceeds maximum signed long" 220 + " value"); 221 } 222 return negative ? -val.longValue() : val.longValue(); 223 } 224 225 /** 226 * Parse a boolean byte from a buffer. 227 * Leading spaces and NUL are ignored. 228 * The buffer may contain trailing spaces or NULs. 229 * 230 * @param buffer The buffer from which to parse. 231 * @param offset The offset into the buffer from which to parse. 232 * @return The boolean value of the bytes. 233 * @throws IllegalArgumentException if an invalid byte is detected. 234 */ 235 public static boolean parseBoolean(final byte[] buffer, final int offset) { 236 return buffer[offset] == 1; 237 } 238 239 // Helper method to generate the exception message 240 private static String exceptionMessage(byte[] buffer, final int offset, 241 final int length, int current, final byte currentByte) { 242 String string = new String(buffer, offset, length); // TODO default charset? 243 string=string.replaceAll("\0", "{NUL}"); // Replace NULs to allow string to be printed 244 final String s = "Invalid byte "+currentByte+" at offset "+(current-offset)+" in '"+string+"' len="+length; 245 return s; 246 } 247 248 /** 249 * Parse an entry name from a buffer. 250 * Parsing stops when a NUL is found 251 * or the buffer length is reached. 252 * 253 * @param buffer The buffer from which to parse. 254 * @param offset The offset into the buffer from which to parse. 255 * @param length The maximum number of bytes to parse. 256 * @return The entry name. 257 */ 258 public static String parseName(byte[] buffer, final int offset, final int length) { 259 try { 260 return parseName(buffer, offset, length, DEFAULT_ENCODING); 261 } catch (IOException ex) { 262 try { 263 return parseName(buffer, offset, length, FALLBACK_ENCODING); 264 } catch (IOException ex2) { 265 // impossible 266 throw new RuntimeException(ex2); 267 } 268 } 269 } 270 271 /** 272 * Parse an entry name from a buffer. 273 * Parsing stops when a NUL is found 274 * or the buffer length is reached. 275 * 276 * @param buffer The buffer from which to parse. 277 * @param offset The offset into the buffer from which to parse. 278 * @param length The maximum number of bytes to parse. 279 * @param encoding name of the encoding to use for file names 280 * @since Commons Compress 1.4 281 * @return The entry name. 282 */ 283 public static String parseName(byte[] buffer, final int offset, 284 final int length, 285 final ZipEncoding encoding) 286 throws IOException { 287 288 int len = length; 289 for (; len > 0; len--) { 290 if (buffer[offset + len - 1] != 0) { 291 break; 292 } 293 } 294 if (len > 0) { 295 byte[] b = new byte[len]; 296 System.arraycopy(buffer, offset, b, 0, len); 297 return encoding.decode(b); 298 } 299 return ""; 300 } 301 302 /** 303 * Copy a name into a buffer. 304 * Copies characters from the name into the buffer 305 * starting at the specified offset. 306 * If the buffer is longer than the name, the buffer 307 * is filled with trailing NULs. 308 * If the name is longer than the buffer, 309 * the output is truncated. 310 * 311 * @param name The header name from which to copy the characters. 312 * @param buf The buffer where the name is to be stored. 313 * @param offset The starting offset into the buffer 314 * @param length The maximum number of header bytes to copy. 315 * @return The updated offset, i.e. offset + length 316 */ 317 public static int formatNameBytes(String name, byte[] buf, final int offset, final int length) { 318 try { 319 return formatNameBytes(name, buf, offset, length, DEFAULT_ENCODING); 320 } catch (IOException ex) { 321 try { 322 return formatNameBytes(name, buf, offset, length, 323 FALLBACK_ENCODING); 324 } catch (IOException ex2) { 325 // impossible 326 throw new RuntimeException(ex2); 327 } 328 } 329 } 330 331 /** 332 * Copy a name into a buffer. 333 * Copies characters from the name into the buffer 334 * starting at the specified offset. 335 * If the buffer is longer than the name, the buffer 336 * is filled with trailing NULs. 337 * If the name is longer than the buffer, 338 * the output is truncated. 339 * 340 * @param name The header name from which to copy the characters. 341 * @param buf The buffer where the name is to be stored. 342 * @param offset The starting offset into the buffer 343 * @param length The maximum number of header bytes to copy. 344 * @param encoding name of the encoding to use for file names 345 * @since Commons Compress 1.4 346 * @return The updated offset, i.e. offset + length 347 */ 348 public static int formatNameBytes(String name, byte[] buf, final int offset, 349 final int length, 350 final ZipEncoding encoding) 351 throws IOException { 352 int len = name.length(); 353 ByteBuffer b = encoding.encode(name); 354 while (b.limit() > length && len > 0) { 355 b = encoding.encode(name.substring(0, --len)); 356 } 357 final int limit = b.limit() - b.position(); 358 System.arraycopy(b.array(), b.arrayOffset(), buf, offset, limit); 359 360 // Pad any remaining output bytes with NUL 361 for (int i = limit; i < length; ++i) { 362 buf[offset + i] = 0; 363 } 364 365 return offset + length; 366 } 367 368 /** 369 * Fill buffer with unsigned octal number, padded with leading zeroes. 370 * 371 * @param value number to convert to octal - treated as unsigned 372 * @param buffer destination buffer 373 * @param offset starting offset in buffer 374 * @param length length of buffer to fill 375 * @throws IllegalArgumentException if the value will not fit in the buffer 376 */ 377 public static void formatUnsignedOctalString(final long value, byte[] buffer, 378 final int offset, final int length) { 379 int remaining = length; 380 remaining--; 381 if (value == 0) { 382 buffer[offset + remaining--] = (byte) '0'; 383 } else { 384 long val = value; 385 for (; remaining >= 0 && val != 0; --remaining) { 386 // CheckStyle:MagicNumber OFF 387 buffer[offset + remaining] = (byte) ((byte) '0' + (byte) (val & 7)); 388 val = val >>> 3; 389 // CheckStyle:MagicNumber ON 390 } 391 if (val != 0){ 392 throw new IllegalArgumentException 393 (value+"="+Long.toOctalString(value)+ " will not fit in octal number buffer of length "+length); 394 } 395 } 396 397 for (; remaining >= 0; --remaining) { // leading zeros 398 buffer[offset + remaining] = (byte) '0'; 399 } 400 } 401 402 /** 403 * Write an octal integer into a buffer. 404 * 405 * Uses {@link #formatUnsignedOctalString} to format 406 * the value as an octal string with leading zeros. 407 * The converted number is followed by space and NUL 408 * 409 * @param value The value to write 410 * @param buf The buffer to receive the output 411 * @param offset The starting offset into the buffer 412 * @param length The size of the output buffer 413 * @return The updated offset, i.e offset+length 414 * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer 415 */ 416 public static int formatOctalBytes(final long value, byte[] buf, final int offset, final int length) { 417 418 int idx=length-2; // For space and trailing null 419 formatUnsignedOctalString(value, buf, offset, idx); 420 421 buf[offset + idx++] = (byte) ' '; // Trailing space 422 buf[offset + idx] = 0; // Trailing null 423 424 return offset + length; 425 } 426 427 /** 428 * Write an octal long integer into a buffer. 429 * 430 * Uses {@link #formatUnsignedOctalString} to format 431 * the value as an octal string with leading zeros. 432 * The converted number is followed by a space. 433 * 434 * @param value The value to write as octal 435 * @param buf The destinationbuffer. 436 * @param offset The starting offset into the buffer. 437 * @param length The length of the buffer 438 * @return The updated offset 439 * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer 440 */ 441 public static int formatLongOctalBytes(final long value, byte[] buf, final int offset, final int length) { 442 443 int idx=length-1; // For space 444 445 formatUnsignedOctalString(value, buf, offset, idx); 446 buf[offset + idx] = (byte) ' '; // Trailing space 447 448 return offset + length; 449 } 450 451 /** 452 * Write an long integer into a buffer as an octal string if this 453 * will fit, or as a binary number otherwise. 454 * 455 * Uses {@link #formatUnsignedOctalString} to format 456 * the value as an octal string with leading zeros. 457 * The converted number is followed by a space. 458 * 459 * @param value The value to write into the buffer. 460 * @param buf The destination buffer. 461 * @param offset The starting offset into the buffer. 462 * @param length The length of the buffer. 463 * @return The updated offset. 464 * @throws IllegalArgumentException if the value (and trailer) 465 * will not fit in the buffer. 466 * @since 1.4 467 */ 468 public static int formatLongOctalOrBinaryBytes( 469 final long value, byte[] buf, final int offset, final int length) { 470 471 // Check whether we are dealing with UID/GID or SIZE field 472 final long maxAsOctalChar = length == TarConstants.UIDLEN ? TarConstants.MAXID : TarConstants.MAXSIZE; 473 474 final boolean negative = value < 0; 475 if (!negative && value <= maxAsOctalChar) { // OK to store as octal chars 476 return formatLongOctalBytes(value, buf, offset, length); 477 } 478 479 if (length < 9) { 480 formatLongBinary(value, buf, offset, length, negative); 481 } 482 formatBigIntegerBinary(value, buf, offset, length, negative); 483 484 buf[offset] = (byte) (negative ? 0xff : 0x80); 485 return offset + length; 486 } 487 488 private static void formatLongBinary(final long value, byte[] buf, 489 final int offset, final int length, 490 final boolean negative) { 491 final int bits = (length - 1) * 8; 492 final long max = 1l << bits; 493 long val = Math.abs(value); 494 if (val >= max) { 495 throw new IllegalArgumentException("Value " + value + 496 " is too large for " + length + " byte field."); 497 } 498 if (negative) { 499 val ^= max - 1; 500 val |= 0xff << bits; 501 val++; 502 } 503 for (int i = offset + length - 1; i >= offset; i--) { 504 buf[i] = (byte) val; 505 val >>= 8; 506 } 507 } 508 509 private static void formatBigIntegerBinary(final long value, byte[] buf, 510 final int offset, 511 final int length, 512 final boolean negative) { 513 BigInteger val = BigInteger.valueOf(value); 514 final byte[] b = val.toByteArray(); 515 final int len = b.length; 516 final int off = offset + length - len; 517 System.arraycopy(b, 0, buf, off, len); 518 final byte fill = (byte) (negative ? 0xff : 0); 519 for (int i = offset + 1; i < off; i++) { 520 buf[i] = fill; 521 } 522 } 523 524 /** 525 * Writes an octal value into a buffer. 526 * 527 * Uses {@link #formatUnsignedOctalString} to format 528 * the value as an octal string with leading zeros. 529 * The converted number is followed by NUL and then space. 530 * 531 * @param value The value to convert 532 * @param buf The destination buffer 533 * @param offset The starting offset into the buffer. 534 * @param length The size of the buffer. 535 * @return The updated value of offset, i.e. offset+length 536 * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer 537 */ 538 public static int formatCheckSumOctalBytes(final long value, byte[] buf, final int offset, final int length) { 539 540 int idx=length-2; // for NUL and space 541 formatUnsignedOctalString(value, buf, offset, idx); 542 543 buf[offset + idx++] = 0; // Trailing null 544 buf[offset + idx] = (byte) ' '; // Trailing space 545 546 return offset + length; 547 } 548 549 /** 550 * Compute the checksum of a tar entry header. 551 * 552 * @param buf The tar entry's header buffer. 553 * @return The computed checksum. 554 */ 555 public static long computeCheckSum(final byte[] buf) { 556 long sum = 0; 557 558 for (int i = 0; i < buf.length; ++i) { 559 sum += BYTE_MASK & buf[i]; 560 } 561 562 return sum; 563 } 564 565 /** 566 * Wikipedia <a href="http://en.wikipedia.org/wiki/Tar_(file_format)#File_header">says</a>: 567 * <blockquote> 568 * The checksum is calculated by taking the sum of the unsigned byte values 569 * of the header block with the eight checksum bytes taken to be ascii 570 * spaces (decimal value 32). It is stored as a six digit octal number with 571 * leading zeroes followed by a NUL and then a space. Various 572 * implementations do not adhere to this format. For better compatibility, 573 * ignore leading and trailing whitespace, and get the first six digits. In 574 * addition, some historic tar implementations treated bytes as signed. 575 * Implementations typically calculate the checksum both ways, and treat it 576 * as good if either the signed or unsigned sum matches the included 577 * checksum. 578 * </blockquote> 579 * <p> 580 * In addition there are 581 * <a href="https://issues.apache.org/jira/browse/COMPRESS-117">some tar files</a> 582 * that seem to have parts of their header cleared to zero (no detectable 583 * magic bytes, etc.) but still have a reasonable-looking checksum field 584 * present. It looks like we can detect such cases reasonably well by 585 * checking whether the stored checksum is <em>greater than</em> the 586 * computed unsigned checksum. That check is unlikely to pass on some 587 * random file header, as it would need to have a valid sequence of 588 * octal digits in just the right place. 589 * <p> 590 * The return value of this method should be treated as a best-effort 591 * heuristic rather than an absolute and final truth. The checksum 592 * verification logic may well evolve over time as more special cases 593 * are encountered. 594 * 595 * @param header tar header 596 * @return whether the checksum is reasonably good 597 * @see <a href="https://issues.apache.org/jira/browse/COMPRESS-191">COMPRESS-191</a> 598 * @since 1.5 599 */ 600 public static boolean verifyCheckSum(byte[] header) { 601 long storedSum = 0; 602 long unsignedSum = 0; 603 long signedSum = 0; 604 605 int digits = 0; 606 for (int i = 0; i < header.length; i++) { 607 byte b = header[i]; 608 if (CHKSUM_OFFSET <= i && i < CHKSUM_OFFSET + CHKSUMLEN) { 609 if ('0' <= b && b <= '7' && digits++ < 6) { 610 storedSum = storedSum * 8 + b - '0'; 611 } else if (digits > 0) { 612 digits = 6; // only look at the first octal digit sequence 613 } 614 b = ' '; 615 } 616 unsignedSum += 0xff & b; 617 signedSum += b; 618 } 619 620 return storedSum == unsignedSum || storedSum == signedSum 621 || storedSum > unsignedSum; // COMPRESS-177 622 } 623 624 }