001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 * 017 */ 018 019 /* 020 * This package is based on the work done by Timothy Gerard Endres 021 * (time@ice.com) to whom the Ant project is very grateful for his great code. 022 */ 023 024 package org.apache.commons.compress.archivers.tar; 025 026 import java.io.ByteArrayOutputStream; 027 import java.io.IOException; 028 import java.io.InputStream; 029 import java.util.HashMap; 030 import java.util.Map; 031 import java.util.Map.Entry; 032 033 import org.apache.commons.compress.archivers.ArchiveEntry; 034 import org.apache.commons.compress.archivers.ArchiveInputStream; 035 import org.apache.commons.compress.archivers.zip.ZipEncoding; 036 import org.apache.commons.compress.archivers.zip.ZipEncodingHelper; 037 import org.apache.commons.compress.utils.ArchiveUtils; 038 import org.apache.commons.compress.utils.CharsetNames; 039 040 /** 041 * The TarInputStream reads a UNIX tar archive as an InputStream. 042 * methods are provided to position at each successive entry in 043 * the archive, and the read each entry as a normal input stream 044 * using read(). 045 * @NotThreadSafe 046 */ 047 public class TarArchiveInputStream extends ArchiveInputStream { 048 private static final int SMALL_BUFFER_SIZE = 256; 049 private static final int BUFFER_SIZE = 8 * 1024; 050 051 private final byte[] SKIP_BUF = new byte[BUFFER_SIZE]; 052 private final byte[] SMALL_BUF = new byte[SMALL_BUFFER_SIZE]; 053 054 private boolean hasHitEOF; 055 private long entrySize; 056 private long entryOffset; 057 private byte[] readBuf; 058 protected final TarBuffer buffer; 059 private TarArchiveEntry currEntry; 060 private final ZipEncoding encoding; 061 062 /** 063 * Constructor for TarInputStream. 064 * @param is the input stream to use 065 */ 066 public TarArchiveInputStream(InputStream is) { 067 this(is, TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE); 068 } 069 070 /** 071 * Constructor for TarInputStream. 072 * @param is the input stream to use 073 * @param encoding name of the encoding to use for file names 074 * @since Commons Compress 1.4 075 */ 076 public TarArchiveInputStream(InputStream is, String encoding) { 077 this(is, TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE, encoding); 078 } 079 080 /** 081 * Constructor for TarInputStream. 082 * @param is the input stream to use 083 * @param blockSize the block size to use 084 */ 085 public TarArchiveInputStream(InputStream is, int blockSize) { 086 this(is, blockSize, TarBuffer.DEFAULT_RCDSIZE); 087 } 088 089 /** 090 * Constructor for TarInputStream. 091 * @param is the input stream to use 092 * @param blockSize the block size to use 093 * @param encoding name of the encoding to use for file names 094 * @since Commons Compress 1.4 095 */ 096 public TarArchiveInputStream(InputStream is, int blockSize, 097 String encoding) { 098 this(is, blockSize, TarBuffer.DEFAULT_RCDSIZE, encoding); 099 } 100 101 /** 102 * Constructor for TarInputStream. 103 * @param is the input stream to use 104 * @param blockSize the block size to use 105 * @param recordSize the record size to use 106 */ 107 public TarArchiveInputStream(InputStream is, int blockSize, int recordSize) { 108 this(is, blockSize, recordSize, null); 109 } 110 111 /** 112 * Constructor for TarInputStream. 113 * @param is the input stream to use 114 * @param blockSize the block size to use 115 * @param recordSize the record size to use 116 * @param encoding name of the encoding to use for file names 117 * @since Commons Compress 1.4 118 */ 119 public TarArchiveInputStream(InputStream is, int blockSize, int recordSize, 120 String encoding) { 121 this.buffer = new TarBuffer(is, blockSize, recordSize); 122 this.readBuf = null; 123 this.hasHitEOF = false; 124 this.encoding = ZipEncodingHelper.getZipEncoding(encoding); 125 } 126 127 /** 128 * Closes this stream. Calls the TarBuffer's close() method. 129 * @throws IOException on error 130 */ 131 @Override 132 public void close() throws IOException { 133 buffer.close(); 134 } 135 136 /** 137 * Get the record size being used by this stream's TarBuffer. 138 * 139 * @return The TarBuffer record size. 140 */ 141 public int getRecordSize() { 142 return buffer.getRecordSize(); 143 } 144 145 /** 146 * Get the available data that can be read from the current 147 * entry in the archive. This does not indicate how much data 148 * is left in the entire archive, only in the current entry. 149 * This value is determined from the entry's size header field 150 * and the amount of data already read from the current entry. 151 * Integer.MAX_VALUE is returned in case more than Integer.MAX_VALUE 152 * bytes are left in the current entry in the archive. 153 * 154 * @return The number of available bytes for the current entry. 155 * @throws IOException for signature 156 */ 157 @Override 158 public int available() throws IOException { 159 if (entrySize - entryOffset > Integer.MAX_VALUE) { 160 return Integer.MAX_VALUE; 161 } 162 return (int) (entrySize - entryOffset); 163 } 164 165 /** 166 * Skip bytes in the input buffer. This skips bytes in the 167 * current entry's data, not the entire archive, and will 168 * stop at the end of the current entry's data if the number 169 * to skip extends beyond that point. 170 * 171 * @param numToSkip The number of bytes to skip. 172 * @return the number actually skipped 173 * @throws IOException on error 174 */ 175 @Override 176 public long skip(long numToSkip) throws IOException { 177 // REVIEW 178 // This is horribly inefficient, but it ensures that we 179 // properly skip over bytes via the TarBuffer... 180 // 181 long skip = numToSkip; 182 while (skip > 0) { 183 int realSkip = (int) (skip > SKIP_BUF.length 184 ? SKIP_BUF.length : skip); 185 int numRead = read(SKIP_BUF, 0, realSkip); 186 if (numRead == -1) { 187 break; 188 } 189 skip -= numRead; 190 } 191 return (numToSkip - skip); 192 } 193 194 /** 195 * Since we do not support marking just yet, we do nothing. 196 */ 197 @Override 198 public synchronized void reset() { 199 } 200 201 /** 202 * Get the next entry in this tar archive. This will skip 203 * over any remaining data in the current entry, if there 204 * is one, and place the input stream at the header of the 205 * next entry, and read the header and instantiate a new 206 * TarEntry from the header bytes and return that entry. 207 * If there are no more entries in the archive, null will 208 * be returned to indicate that the end of the archive has 209 * been reached. 210 * 211 * @return The next TarEntry in the archive, or null. 212 * @throws IOException on error 213 */ 214 public TarArchiveEntry getNextTarEntry() throws IOException { 215 if (hasHitEOF) { 216 return null; 217 } 218 219 if (currEntry != null) { 220 long numToSkip = entrySize - entryOffset; 221 222 while (numToSkip > 0) { 223 long skipped = skip(numToSkip); 224 if (skipped <= 0) { 225 throw new RuntimeException("failed to skip current tar" 226 + " entry"); 227 } 228 numToSkip -= skipped; 229 } 230 231 readBuf = null; 232 } 233 234 byte[] headerBuf = getRecord(); 235 236 if (hasHitEOF) { 237 currEntry = null; 238 return null; 239 } 240 241 try { 242 currEntry = new TarArchiveEntry(headerBuf, encoding); 243 } catch (IllegalArgumentException e) { 244 IOException ioe = new IOException("Error detected parsing the header"); 245 ioe.initCause(e); 246 throw ioe; 247 } 248 entryOffset = 0; 249 entrySize = currEntry.getSize(); 250 251 if (currEntry.isGNULongNameEntry()) { 252 // read in the name 253 ByteArrayOutputStream longName = new ByteArrayOutputStream(); 254 int length = 0; 255 while ((length = read(SMALL_BUF)) >= 0) { 256 longName.write(SMALL_BUF, 0, length); 257 } 258 getNextEntry(); 259 if (currEntry == null) { 260 // Bugzilla: 40334 261 // Malformed tar file - long entry name not followed by entry 262 return null; 263 } 264 byte[] longNameData = longName.toByteArray(); 265 // remove trailing null terminator(s) 266 length = longNameData.length; 267 while (length > 0 && longNameData[length - 1] == 0) { 268 --length; 269 } 270 if (length != longNameData.length) { 271 byte[] l = new byte[length]; 272 System.arraycopy(longNameData, 0, l, 0, length); 273 longNameData = l; 274 } 275 276 currEntry.setName(encoding.decode(longNameData)); 277 } 278 279 if (currEntry.isPaxHeader()){ // Process Pax headers 280 paxHeaders(); 281 } 282 283 if (currEntry.isGNUSparse()){ // Process sparse files 284 readGNUSparse(); 285 } 286 287 // If the size of the next element in the archive has changed 288 // due to a new size being reported in the posix header 289 // information, we update entrySize here so that it contains 290 // the correct value. 291 entrySize = currEntry.getSize(); 292 return currEntry; 293 } 294 295 /** 296 * Get the next record in this tar archive. This will skip 297 * over any remaining data in the current entry, if there 298 * is one, and place the input stream at the header of the 299 * next entry. 300 * If there are no more entries in the archive, null will 301 * be returned to indicate that the end of the archive has 302 * been reached. 303 * 304 * @return The next header in the archive, or null. 305 * @throws IOException on error 306 */ 307 private byte[] getRecord() throws IOException { 308 if (hasHitEOF) { 309 return null; 310 } 311 312 byte[] headerBuf = buffer.readRecord(); 313 314 if (headerBuf == null) { 315 hasHitEOF = true; 316 } else if (buffer.isEOFRecord(headerBuf)) { 317 hasHitEOF = true; 318 buffer.tryToConsumeSecondEOFRecord(); 319 } 320 321 return hasHitEOF ? null : headerBuf; 322 } 323 324 private void paxHeaders() throws IOException{ 325 Map<String, String> headers = parsePaxHeaders(this); 326 getNextEntry(); // Get the actual file entry 327 applyPaxHeadersToCurrentEntry(headers); 328 } 329 330 Map<String, String> parsePaxHeaders(InputStream i) throws IOException { 331 Map<String, String> headers = new HashMap<String, String>(); 332 // Format is "length keyword=value\n"; 333 while(true){ // get length 334 int ch; 335 int len = 0; 336 int read = 0; 337 while((ch = i.read()) != -1) { 338 read++; 339 if (ch == ' '){ // End of length string 340 // Get keyword 341 ByteArrayOutputStream coll = new ByteArrayOutputStream(); 342 while((ch = i.read()) != -1) { 343 read++; 344 if (ch == '='){ // end of keyword 345 String keyword = coll.toString(CharsetNames.UTF_8); 346 // Get rest of entry 347 byte[] rest = new byte[len - read]; 348 int got = i.read(rest); 349 if (got != len - read){ 350 throw new IOException("Failed to read " 351 + "Paxheader. Expected " 352 + (len - read) 353 + " bytes, read " 354 + got); 355 } 356 // Drop trailing NL 357 String value = new String(rest, 0, 358 len - read - 1, CharsetNames.UTF_8); 359 headers.put(keyword, value); 360 break; 361 } 362 coll.write((byte) ch); 363 } 364 break; // Processed single header 365 } 366 len *= 10; 367 len += ch - '0'; 368 } 369 if (ch == -1){ // EOF 370 break; 371 } 372 } 373 return headers; 374 } 375 376 private void applyPaxHeadersToCurrentEntry(Map<String, String> headers) { 377 /* 378 * The following headers are defined for Pax. 379 * atime, ctime, charset: cannot use these without changing TarArchiveEntry fields 380 * mtime 381 * comment 382 * gid, gname 383 * linkpath 384 * size 385 * uid,uname 386 * SCHILY.devminor, SCHILY.devmajor: don't have setters/getters for those 387 */ 388 for (Entry<String, String> ent : headers.entrySet()){ 389 String key = ent.getKey(); 390 String val = ent.getValue(); 391 if ("path".equals(key)){ 392 currEntry.setName(val); 393 } else if ("linkpath".equals(key)){ 394 currEntry.setLinkName(val); 395 } else if ("gid".equals(key)){ 396 currEntry.setGroupId(Integer.parseInt(val)); 397 } else if ("gname".equals(key)){ 398 currEntry.setGroupName(val); 399 } else if ("uid".equals(key)){ 400 currEntry.setUserId(Integer.parseInt(val)); 401 } else if ("uname".equals(key)){ 402 currEntry.setUserName(val); 403 } else if ("size".equals(key)){ 404 currEntry.setSize(Long.parseLong(val)); 405 } else if ("mtime".equals(key)){ 406 currEntry.setModTime((long) (Double.parseDouble(val) * 1000)); 407 } else if ("SCHILY.devminor".equals(key)){ 408 currEntry.setDevMinor(Integer.parseInt(val)); 409 } else if ("SCHILY.devmajor".equals(key)){ 410 currEntry.setDevMajor(Integer.parseInt(val)); 411 } 412 } 413 } 414 415 /** 416 * Adds the sparse chunks from the current entry to the sparse chunks, 417 * including any additional sparse entries following the current entry. 418 * 419 * @throws IOException on error 420 * 421 * @todo Sparse files get not yet really processed. 422 */ 423 private void readGNUSparse() throws IOException { 424 /* we do not really process sparse files yet 425 sparses = new ArrayList(); 426 sparses.addAll(currEntry.getSparses()); 427 */ 428 if (currEntry.isExtended()) { 429 TarArchiveSparseEntry entry; 430 do { 431 byte[] headerBuf = getRecord(); 432 if (hasHitEOF) { 433 currEntry = null; 434 break; 435 } 436 entry = new TarArchiveSparseEntry(headerBuf); 437 /* we do not really process sparse files yet 438 sparses.addAll(entry.getSparses()); 439 */ 440 } while (entry.isExtended()); 441 } 442 } 443 444 @Override 445 public ArchiveEntry getNextEntry() throws IOException { 446 return getNextTarEntry(); 447 } 448 449 /** 450 * Reads bytes from the current tar archive entry. 451 * 452 * This method is aware of the boundaries of the current 453 * entry in the archive and will deal with them as if they 454 * were this stream's start and EOF. 455 * 456 * @param buf The buffer into which to place bytes read. 457 * @param offset The offset at which to place bytes read. 458 * @param numToRead The number of bytes to read. 459 * @return The number of bytes read, or -1 at EOF. 460 * @throws IOException on error 461 */ 462 @Override 463 public int read(byte[] buf, int offset, int numToRead) throws IOException { 464 int totalRead = 0; 465 466 if (entryOffset >= entrySize) { 467 return -1; 468 } 469 470 if ((numToRead + entryOffset) > entrySize) { 471 numToRead = (int) (entrySize - entryOffset); 472 } 473 474 if (readBuf != null) { 475 int sz = (numToRead > readBuf.length) ? readBuf.length 476 : numToRead; 477 478 System.arraycopy(readBuf, 0, buf, offset, sz); 479 480 if (sz >= readBuf.length) { 481 readBuf = null; 482 } else { 483 int newLen = readBuf.length - sz; 484 byte[] newBuf = new byte[newLen]; 485 486 System.arraycopy(readBuf, sz, newBuf, 0, newLen); 487 488 readBuf = newBuf; 489 } 490 491 totalRead += sz; 492 numToRead -= sz; 493 offset += sz; 494 } 495 496 while (numToRead > 0) { 497 byte[] rec = buffer.readRecord(); 498 499 if (rec == null) { 500 // Unexpected EOF! 501 throw new IOException("unexpected EOF with " + numToRead 502 + " bytes unread. Occured at byte: " + getBytesRead()); 503 } 504 count(rec.length); 505 int sz = numToRead; 506 int recLen = rec.length; 507 508 if (recLen > sz) { 509 System.arraycopy(rec, 0, buf, offset, sz); 510 511 readBuf = new byte[recLen - sz]; 512 513 System.arraycopy(rec, sz, readBuf, 0, recLen - sz); 514 } else { 515 sz = recLen; 516 517 System.arraycopy(rec, 0, buf, offset, recLen); 518 } 519 520 totalRead += sz; 521 numToRead -= sz; 522 offset += sz; 523 } 524 525 entryOffset += totalRead; 526 527 return totalRead; 528 } 529 530 /** 531 * Whether this class is able to read the given entry. 532 * 533 * <p>May return false if the current entry is a sparse file.</p> 534 */ 535 @Override 536 public boolean canReadEntryData(ArchiveEntry ae) { 537 if (ae instanceof TarArchiveEntry) { 538 TarArchiveEntry te = (TarArchiveEntry) ae; 539 return !te.isGNUSparse(); 540 } 541 return false; 542 } 543 544 protected final TarArchiveEntry getCurrentEntry() { 545 return currEntry; 546 } 547 548 protected final void setCurrentEntry(TarArchiveEntry e) { 549 currEntry = e; 550 } 551 552 protected final boolean isAtEOF() { 553 return hasHitEOF; 554 } 555 556 protected final void setAtEOF(boolean b) { 557 hasHitEOF = b; 558 } 559 560 /** 561 * Checks if the signature matches what is expected for a tar file. 562 * 563 * @param signature 564 * the bytes to check 565 * @param length 566 * the number of bytes to check 567 * @return true, if this stream is a tar archive stream, false otherwise 568 */ 569 public static boolean matches(byte[] signature, int length) { 570 if (length < TarConstants.VERSION_OFFSET+TarConstants.VERSIONLEN) { 571 return false; 572 } 573 574 if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_POSIX, 575 signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN) 576 && 577 ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_POSIX, 578 signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN) 579 ){ 580 return true; 581 } 582 if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_GNU, 583 signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN) 584 && 585 ( 586 ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_GNU_SPACE, 587 signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN) 588 || 589 ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_GNU_ZERO, 590 signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN) 591 ) 592 ){ 593 return true; 594 } 595 // COMPRESS-107 - recognise Ant tar files 596 if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_ANT, 597 signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN) 598 && 599 ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_ANT, 600 signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN) 601 ){ 602 return true; 603 } 604 return false; 605 } 606 607 }