001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019 package org.apache.commons.compress.archivers.tar; 020 021 import java.io.File; 022 import java.io.IOException; 023 import java.io.OutputStream; 024 import java.io.StringWriter; 025 import java.nio.ByteBuffer; 026 import java.util.Arrays; 027 import java.util.HashMap; 028 import java.util.Map; 029 import org.apache.commons.compress.archivers.ArchiveEntry; 030 import org.apache.commons.compress.archivers.ArchiveOutputStream; 031 import org.apache.commons.compress.archivers.zip.ZipEncoding; 032 import org.apache.commons.compress.archivers.zip.ZipEncodingHelper; 033 import org.apache.commons.compress.utils.CharsetNames; 034 import org.apache.commons.compress.utils.CountingOutputStream; 035 036 /** 037 * The TarOutputStream writes a UNIX tar archive as an OutputStream. 038 * Methods are provided to put entries, and then write their contents 039 * by writing to this stream using write(). 040 * @NotThreadSafe 041 */ 042 public class TarArchiveOutputStream extends ArchiveOutputStream { 043 /** Fail if a long file name is required in the archive. */ 044 public static final int LONGFILE_ERROR = 0; 045 046 /** Long paths will be truncated in the archive. */ 047 public static final int LONGFILE_TRUNCATE = 1; 048 049 /** GNU tar extensions are used to store long file names in the archive. */ 050 public static final int LONGFILE_GNU = 2; 051 052 /** POSIX/PAX extensions are used to store long file names in the archive. */ 053 public static final int LONGFILE_POSIX = 3; 054 055 /** Fail if a big number (e.g. size > 8GiB) is required in the archive. */ 056 public static final int BIGNUMBER_ERROR = 0; 057 058 /** star/GNU tar/BSD tar extensions are used to store big number in the archive. */ 059 public static final int BIGNUMBER_STAR = 1; 060 061 /** POSIX/PAX extensions are used to store big numbers in the archive. */ 062 public static final int BIGNUMBER_POSIX = 2; 063 064 private long currSize; 065 private String currName; 066 private long currBytes; 067 private final byte[] recordBuf; 068 private int assemLen; 069 private final byte[] assemBuf; 070 protected final TarBuffer buffer; 071 private int longFileMode = LONGFILE_ERROR; 072 private int bigNumberMode = BIGNUMBER_ERROR; 073 074 private boolean closed = false; 075 076 /** Indicates if putArchiveEntry has been called without closeArchiveEntry */ 077 private boolean haveUnclosedEntry = false; 078 079 /** indicates if this archive is finished */ 080 private boolean finished = false; 081 082 private final OutputStream out; 083 084 private final ZipEncoding encoding; 085 086 private boolean addPaxHeadersForNonAsciiNames = false; 087 private static final ZipEncoding ASCII = 088 ZipEncodingHelper.getZipEncoding("ASCII"); 089 090 /** 091 * Constructor for TarInputStream. 092 * @param os the output stream to use 093 */ 094 public TarArchiveOutputStream(OutputStream os) { 095 this(os, TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE); 096 } 097 098 /** 099 * Constructor for TarInputStream. 100 * @param os the output stream to use 101 * @param encoding name of the encoding to use for file names 102 * @since Commons Compress 1.4 103 */ 104 public TarArchiveOutputStream(OutputStream os, String encoding) { 105 this(os, TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE, encoding); 106 } 107 108 /** 109 * Constructor for TarInputStream. 110 * @param os the output stream to use 111 * @param blockSize the block size to use 112 */ 113 public TarArchiveOutputStream(OutputStream os, int blockSize) { 114 this(os, blockSize, TarBuffer.DEFAULT_RCDSIZE); 115 } 116 117 /** 118 * Constructor for TarInputStream. 119 * @param os the output stream to use 120 * @param blockSize the block size to use 121 * @param encoding name of the encoding to use for file names 122 * @since Commons Compress 1.4 123 */ 124 public TarArchiveOutputStream(OutputStream os, int blockSize, 125 String encoding) { 126 this(os, blockSize, TarBuffer.DEFAULT_RCDSIZE, encoding); 127 } 128 129 /** 130 * Constructor for TarInputStream. 131 * @param os the output stream to use 132 * @param blockSize the block size to use 133 * @param recordSize the record size to use 134 */ 135 public TarArchiveOutputStream(OutputStream os, int blockSize, int recordSize) { 136 this(os, blockSize, recordSize, null); 137 } 138 139 /** 140 * Constructor for TarInputStream. 141 * @param os the output stream to use 142 * @param blockSize the block size to use 143 * @param recordSize the record size to use 144 * @param encoding name of the encoding to use for file names 145 * @since Commons Compress 1.4 146 */ 147 public TarArchiveOutputStream(OutputStream os, int blockSize, 148 int recordSize, String encoding) { 149 out = new CountingOutputStream(os); 150 this.encoding = ZipEncodingHelper.getZipEncoding(encoding); 151 152 this.buffer = new TarBuffer(out, blockSize, recordSize); 153 this.assemLen = 0; 154 this.assemBuf = new byte[recordSize]; 155 this.recordBuf = new byte[recordSize]; 156 } 157 158 /** 159 * Set the long file mode. 160 * This can be LONGFILE_ERROR(0), LONGFILE_TRUNCATE(1) or LONGFILE_GNU(2). 161 * This specifies the treatment of long file names (names >= TarConstants.NAMELEN). 162 * Default is LONGFILE_ERROR. 163 * @param longFileMode the mode to use 164 */ 165 public void setLongFileMode(int longFileMode) { 166 this.longFileMode = longFileMode; 167 } 168 169 /** 170 * Set the big number mode. 171 * This can be BIGNUMBER_ERROR(0), BIGNUMBER_POSIX(1) or BIGNUMBER_STAR(2). 172 * This specifies the treatment of big files (sizes > TarConstants.MAXSIZE) and other numeric values to big to fit into a traditional tar header. 173 * Default is BIGNUMBER_ERROR. 174 * @param bigNumberMode the mode to use 175 * @since 1.4 176 */ 177 public void setBigNumberMode(int bigNumberMode) { 178 this.bigNumberMode = bigNumberMode; 179 } 180 181 /** 182 * Whether to add a PAX extension header for non-ASCII file names. 183 * @since 1.4 184 */ 185 public void setAddPaxHeadersForNonAsciiNames(boolean b) { 186 addPaxHeadersForNonAsciiNames = b; 187 } 188 189 @Deprecated 190 @Override 191 public int getCount() { 192 return (int) getBytesWritten(); 193 } 194 195 @Override 196 public long getBytesWritten() { 197 return ((CountingOutputStream) out).getBytesWritten(); 198 } 199 200 /** 201 * Ends the TAR archive without closing the underlying OutputStream. 202 * 203 * An archive consists of a series of file entries terminated by an 204 * end-of-archive entry, which consists of two 512 blocks of zero bytes. 205 * POSIX.1 requires two EOF records, like some other implementations. 206 * 207 * @throws IOException on error 208 */ 209 @Override 210 public void finish() throws IOException { 211 if (finished) { 212 throw new IOException("This archive has already been finished"); 213 } 214 215 if (haveUnclosedEntry) { 216 throw new IOException("This archives contains unclosed entries."); 217 } 218 writeEOFRecord(); 219 writeEOFRecord(); 220 buffer.flushBlock(); 221 finished = true; 222 } 223 224 /** 225 * Closes the underlying OutputStream. 226 * @throws IOException on error 227 */ 228 @Override 229 public void close() throws IOException { 230 if(!finished) { 231 finish(); 232 } 233 234 if (!closed) { 235 buffer.close(); 236 out.close(); 237 closed = true; 238 } 239 } 240 241 /** 242 * Get the record size being used by this stream's TarBuffer. 243 * 244 * @return The TarBuffer record size. 245 */ 246 public int getRecordSize() { 247 return buffer.getRecordSize(); 248 } 249 250 /** 251 * Put an entry on the output stream. This writes the entry's 252 * header record and positions the output stream for writing 253 * the contents of the entry. Once this method is called, the 254 * stream is ready for calls to write() to write the entry's 255 * contents. Once the contents are written, closeArchiveEntry() 256 * <B>MUST</B> be called to ensure that all buffered data 257 * is completely written to the output stream. 258 * 259 * @param archiveEntry The TarEntry to be written to the archive. 260 * @throws IOException on error 261 * @throws ClassCastException if archiveEntry is not an instance of TarArchiveEntry 262 */ 263 @Override 264 public void putArchiveEntry(ArchiveEntry archiveEntry) throws IOException { 265 if(finished) { 266 throw new IOException("Stream has already been finished"); 267 } 268 TarArchiveEntry entry = (TarArchiveEntry) archiveEntry; 269 Map<String, String> paxHeaders = new HashMap<String, String>(); 270 final String entryName = entry.getName(); 271 final ByteBuffer encodedName = encoding.encode(entryName); 272 final int nameLen = encodedName.limit() - encodedName.position(); 273 boolean paxHeaderContainsPath = false; 274 if (nameLen >= TarConstants.NAMELEN) { 275 276 if (longFileMode == LONGFILE_POSIX) { 277 paxHeaders.put("path", entryName); 278 paxHeaderContainsPath = true; 279 } else if (longFileMode == LONGFILE_GNU) { 280 // create a TarEntry for the LongLink, the contents 281 // of which are the entry's name 282 TarArchiveEntry longLinkEntry = new TarArchiveEntry(TarConstants.GNU_LONGLINK, 283 TarConstants.LF_GNUTYPE_LONGNAME); 284 285 longLinkEntry.setSize(nameLen + 1); // +1 for NUL 286 putArchiveEntry(longLinkEntry); 287 write(encodedName.array(), encodedName.arrayOffset(), nameLen); 288 write(0); // NUL terminator 289 closeArchiveEntry(); 290 } else if (longFileMode != LONGFILE_TRUNCATE) { 291 throw new RuntimeException("file name '" + entryName 292 + "' is too long ( > " 293 + TarConstants.NAMELEN + " bytes)"); 294 } 295 } 296 297 if (bigNumberMode == BIGNUMBER_POSIX) { 298 addPaxHeadersForBigNumbers(paxHeaders, entry); 299 } else if (bigNumberMode != BIGNUMBER_STAR) { 300 failForBigNumbers(entry); 301 } 302 303 if (addPaxHeadersForNonAsciiNames && !paxHeaderContainsPath 304 && !ASCII.canEncode(entryName)) { 305 paxHeaders.put("path", entryName); 306 } 307 308 if (addPaxHeadersForNonAsciiNames 309 && (entry.isLink() || entry.isSymbolicLink()) 310 && !ASCII.canEncode(entry.getLinkName())) { 311 paxHeaders.put("linkpath", entry.getLinkName()); 312 } 313 314 if (paxHeaders.size() > 0) { 315 writePaxHeaders(entryName, paxHeaders); 316 } 317 318 entry.writeEntryHeader(recordBuf, encoding, 319 bigNumberMode == BIGNUMBER_STAR); 320 buffer.writeRecord(recordBuf); 321 322 currBytes = 0; 323 324 if (entry.isDirectory()) { 325 currSize = 0; 326 } else { 327 currSize = entry.getSize(); 328 } 329 currName = entryName; 330 haveUnclosedEntry = true; 331 } 332 333 /** 334 * Close an entry. This method MUST be called for all file 335 * entries that contain data. The reason is that we must 336 * buffer data written to the stream in order to satisfy 337 * the buffer's record based writes. Thus, there may be 338 * data fragments still being assembled that must be written 339 * to the output stream before this entry is closed and the 340 * next entry written. 341 * @throws IOException on error 342 */ 343 @Override 344 public void closeArchiveEntry() throws IOException { 345 if (finished) { 346 throw new IOException("Stream has already been finished"); 347 } 348 if (!haveUnclosedEntry){ 349 throw new IOException("No current entry to close"); 350 } 351 if (assemLen > 0) { 352 for (int i = assemLen; i < assemBuf.length; ++i) { 353 assemBuf[i] = 0; 354 } 355 356 buffer.writeRecord(assemBuf); 357 358 currBytes += assemLen; 359 assemLen = 0; 360 } 361 362 if (currBytes < currSize) { 363 throw new IOException("entry '" + currName + "' closed at '" 364 + currBytes 365 + "' before the '" + currSize 366 + "' bytes specified in the header were written"); 367 } 368 haveUnclosedEntry = false; 369 } 370 371 /** 372 * Writes bytes to the current tar archive entry. This method 373 * is aware of the current entry and will throw an exception if 374 * you attempt to write bytes past the length specified for the 375 * current entry. The method is also (painfully) aware of the 376 * record buffering required by TarBuffer, and manages buffers 377 * that are not a multiple of recordsize in length, including 378 * assembling records from small buffers. 379 * 380 * @param wBuf The buffer to write to the archive. 381 * @param wOffset The offset in the buffer from which to get bytes. 382 * @param numToWrite The number of bytes to write. 383 * @throws IOException on error 384 */ 385 @Override 386 public void write(byte[] wBuf, int wOffset, int numToWrite) throws IOException { 387 if ((currBytes + numToWrite) > currSize) { 388 throw new IOException("request to write '" + numToWrite 389 + "' bytes exceeds size in header of '" 390 + currSize + "' bytes for entry '" 391 + currName + "'"); 392 393 // 394 // We have to deal with assembly!!! 395 // The programmer can be writing little 32 byte chunks for all 396 // we know, and we must assemble complete records for writing. 397 // REVIEW Maybe this should be in TarBuffer? Could that help to 398 // eliminate some of the buffer copying. 399 // 400 } 401 402 if (assemLen > 0) { 403 if ((assemLen + numToWrite) >= recordBuf.length) { 404 int aLen = recordBuf.length - assemLen; 405 406 System.arraycopy(assemBuf, 0, recordBuf, 0, 407 assemLen); 408 System.arraycopy(wBuf, wOffset, recordBuf, 409 assemLen, aLen); 410 buffer.writeRecord(recordBuf); 411 412 currBytes += recordBuf.length; 413 wOffset += aLen; 414 numToWrite -= aLen; 415 assemLen = 0; 416 } else { 417 System.arraycopy(wBuf, wOffset, assemBuf, assemLen, 418 numToWrite); 419 420 wOffset += numToWrite; 421 assemLen += numToWrite; 422 numToWrite = 0; 423 } 424 } 425 426 // 427 // When we get here we have EITHER: 428 // o An empty "assemble" buffer. 429 // o No bytes to write (numToWrite == 0) 430 // 431 while (numToWrite > 0) { 432 if (numToWrite < recordBuf.length) { 433 System.arraycopy(wBuf, wOffset, assemBuf, assemLen, 434 numToWrite); 435 436 assemLen += numToWrite; 437 438 break; 439 } 440 441 buffer.writeRecord(wBuf, wOffset); 442 443 int num = recordBuf.length; 444 445 currBytes += num; 446 numToWrite -= num; 447 wOffset += num; 448 } 449 } 450 451 /** 452 * Writes a PAX extended header with the given map as contents. 453 * @since 1.4 454 */ 455 void writePaxHeaders(String entryName, 456 Map<String, String> headers) throws IOException { 457 String name = "./PaxHeaders.X/" + stripTo7Bits(entryName); 458 if (name.length() >= TarConstants.NAMELEN) { 459 name = name.substring(0, TarConstants.NAMELEN - 1); 460 } 461 while (name.endsWith("/")) { 462 // TarEntry's constructor would think this is a directory 463 // and not allow any data to be written 464 name = name.substring(0, name.length() - 1); 465 } 466 TarArchiveEntry pex = new TarArchiveEntry(name, 467 TarConstants.LF_PAX_EXTENDED_HEADER_LC); 468 469 StringWriter w = new StringWriter(); 470 for (Map.Entry<String, String> h : headers.entrySet()) { 471 String key = h.getKey(); 472 String value = h.getValue(); 473 int len = key.length() + value.length() 474 + 3 /* blank, equals and newline */ 475 + 2 /* guess 9 < actual length < 100 */; 476 String line = len + " " + key + "=" + value + "\n"; 477 int actualLength = line.getBytes(CharsetNames.UTF_8).length; 478 while (len != actualLength) { 479 // Adjust for cases where length < 10 or > 100 480 // or where UTF-8 encoding isn't a single octet 481 // per character. 482 // Must be in loop as size may go from 99 to 100 in 483 // first pass so we'd need a second. 484 len = actualLength; 485 line = len + " " + key + "=" + value + "\n"; 486 actualLength = line.getBytes(CharsetNames.UTF_8).length; 487 } 488 w.write(line); 489 } 490 byte[] data = w.toString().getBytes(CharsetNames.UTF_8); 491 pex.setSize(data.length); 492 putArchiveEntry(pex); 493 write(data); 494 closeArchiveEntry(); 495 } 496 497 private String stripTo7Bits(String name) { 498 final int length = name.length(); 499 StringBuffer result = new StringBuffer(length); 500 for (int i = 0; i < length; i++) { 501 char stripped = (char) (name.charAt(i) & 0x7F); 502 if (stripped != 0) { // would be read as Trailing null 503 result.append(stripped); 504 } 505 } 506 return result.toString(); 507 } 508 509 /** 510 * Write an EOF (end of archive) record to the tar archive. 511 * An EOF record consists of a record of all zeros. 512 */ 513 private void writeEOFRecord() throws IOException { 514 Arrays.fill(recordBuf, (byte) 0); 515 buffer.writeRecord(recordBuf); 516 } 517 518 @Override 519 public void flush() throws IOException { 520 out.flush(); 521 } 522 523 /** {@inheritDoc} */ 524 @Override 525 public ArchiveEntry createArchiveEntry(File inputFile, String entryName) 526 throws IOException { 527 if(finished) { 528 throw new IOException("Stream has already been finished"); 529 } 530 return new TarArchiveEntry(inputFile, entryName); 531 } 532 533 private void addPaxHeadersForBigNumbers(Map<String, String> paxHeaders, 534 TarArchiveEntry entry) { 535 addPaxHeaderForBigNumber(paxHeaders, "size", entry.getSize(), 536 TarConstants.MAXSIZE); 537 addPaxHeaderForBigNumber(paxHeaders, "gid", entry.getGroupId(), 538 TarConstants.MAXID); 539 addPaxHeaderForBigNumber(paxHeaders, "mtime", 540 entry.getModTime().getTime() / 1000, 541 TarConstants.MAXSIZE); 542 addPaxHeaderForBigNumber(paxHeaders, "uid", entry.getUserId(), 543 TarConstants.MAXID); 544 // star extensions by J\u00f6rg Schilling 545 addPaxHeaderForBigNumber(paxHeaders, "SCHILY.devmajor", 546 entry.getDevMajor(), TarConstants.MAXID); 547 addPaxHeaderForBigNumber(paxHeaders, "SCHILY.devminor", 548 entry.getDevMinor(), TarConstants.MAXID); 549 // there is no PAX header for file mode 550 failForBigNumber("mode", entry.getMode(), TarConstants.MAXID); 551 } 552 553 private void addPaxHeaderForBigNumber(Map<String, String> paxHeaders, 554 String header, long value, 555 long maxValue) { 556 if (value < 0 || value > maxValue) { 557 paxHeaders.put(header, String.valueOf(value)); 558 } 559 } 560 561 private void failForBigNumbers(TarArchiveEntry entry) { 562 failForBigNumber("entry size", entry.getSize(), TarConstants.MAXSIZE); 563 failForBigNumber("group id", entry.getGroupId(), TarConstants.MAXID); 564 failForBigNumber("last modification time", 565 entry.getModTime().getTime() / 1000, 566 TarConstants.MAXSIZE); 567 failForBigNumber("user id", entry.getUserId(), TarConstants.MAXID); 568 failForBigNumber("mode", entry.getMode(), TarConstants.MAXID); 569 failForBigNumber("major device number", entry.getDevMajor(), 570 TarConstants.MAXID); 571 failForBigNumber("minor device number", entry.getDevMinor(), 572 TarConstants.MAXID); 573 } 574 575 private void failForBigNumber(String field, long value, long maxValue) { 576 if (value < 0 || value > maxValue) { 577 throw new RuntimeException(field + " '" + value 578 + "' is too big ( > " 579 + maxValue + " )"); 580 } 581 } 582 }