001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     * http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing,
013     * software distributed under the License is distributed on an
014     * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015     * KIND, either express or implied.  See the License for the
016     * specific language governing permissions and limitations
017     * under the License.
018     */
019    package org.apache.commons.compress.archivers.tar;
020    
021    import java.io.File;
022    import java.io.IOException;
023    import java.io.OutputStream;
024    import java.io.StringWriter;
025    import java.nio.ByteBuffer;
026    import java.util.Arrays;
027    import java.util.HashMap;
028    import java.util.Map;
029    import org.apache.commons.compress.archivers.ArchiveEntry;
030    import org.apache.commons.compress.archivers.ArchiveOutputStream;
031    import org.apache.commons.compress.archivers.zip.ZipEncoding;
032    import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
033    import org.apache.commons.compress.utils.CharsetNames;
034    import org.apache.commons.compress.utils.CountingOutputStream;
035    
036    /**
037     * The TarOutputStream writes a UNIX tar archive as an OutputStream.
038     * Methods are provided to put entries, and then write their contents
039     * by writing to this stream using write().
040     * @NotThreadSafe
041     */
042    public class TarArchiveOutputStream extends ArchiveOutputStream {
043        /** Fail if a long file name is required in the archive. */
044        public static final int LONGFILE_ERROR = 0;
045    
046        /** Long paths will be truncated in the archive. */
047        public static final int LONGFILE_TRUNCATE = 1;
048    
049        /** GNU tar extensions are used to store long file names in the archive. */
050        public static final int LONGFILE_GNU = 2;
051    
052        /** POSIX/PAX extensions are used to store long file names in the archive. */
053        public static final int LONGFILE_POSIX = 3;
054    
055        /** Fail if a big number (e.g. size > 8GiB) is required in the archive. */
056        public static final int BIGNUMBER_ERROR = 0;
057    
058        /** star/GNU tar/BSD tar extensions are used to store big number in the archive. */
059        public static final int BIGNUMBER_STAR = 1;
060    
061        /** POSIX/PAX extensions are used to store big numbers in the archive. */
062        public static final int BIGNUMBER_POSIX = 2;
063    
064        private long      currSize;
065        private String    currName;
066        private long      currBytes;
067        private final byte[]    recordBuf;
068        private int       assemLen;
069        private final byte[]    assemBuf;
070        protected final TarBuffer buffer;
071        private int       longFileMode = LONGFILE_ERROR;
072        private int       bigNumberMode = BIGNUMBER_ERROR;
073    
074        private boolean closed = false;
075    
076        /** Indicates if putArchiveEntry has been called without closeArchiveEntry */
077        private boolean haveUnclosedEntry = false;
078    
079        /** indicates if this archive is finished */
080        private boolean finished = false;
081    
082        private final OutputStream out;
083    
084        private final ZipEncoding encoding;
085    
086        private boolean addPaxHeadersForNonAsciiNames = false;
087        private static final ZipEncoding ASCII =
088            ZipEncodingHelper.getZipEncoding("ASCII");
089    
090        /**
091         * Constructor for TarInputStream.
092         * @param os the output stream to use
093         */
094        public TarArchiveOutputStream(OutputStream os) {
095            this(os, TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE);
096        }
097    
098        /**
099         * Constructor for TarInputStream.
100         * @param os the output stream to use
101         * @param encoding name of the encoding to use for file names
102         * @since Commons Compress 1.4
103         */
104        public TarArchiveOutputStream(OutputStream os, String encoding) {
105            this(os, TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE, encoding);
106        }
107    
108        /**
109         * Constructor for TarInputStream.
110         * @param os the output stream to use
111         * @param blockSize the block size to use
112         */
113        public TarArchiveOutputStream(OutputStream os, int blockSize) {
114            this(os, blockSize, TarBuffer.DEFAULT_RCDSIZE);
115        }
116    
117        /**
118         * Constructor for TarInputStream.
119         * @param os the output stream to use
120         * @param blockSize the block size to use
121         * @param encoding name of the encoding to use for file names
122         * @since Commons Compress 1.4
123         */
124        public TarArchiveOutputStream(OutputStream os, int blockSize,
125                                      String encoding) {
126            this(os, blockSize, TarBuffer.DEFAULT_RCDSIZE, encoding);
127        }
128    
129        /**
130         * Constructor for TarInputStream.
131         * @param os the output stream to use
132         * @param blockSize the block size to use
133         * @param recordSize the record size to use
134         */
135        public TarArchiveOutputStream(OutputStream os, int blockSize, int recordSize) {
136            this(os, blockSize, recordSize, null);
137        }
138    
139        /**
140         * Constructor for TarInputStream.
141         * @param os the output stream to use
142         * @param blockSize the block size to use
143         * @param recordSize the record size to use
144         * @param encoding name of the encoding to use for file names
145         * @since Commons Compress 1.4
146         */
147        public TarArchiveOutputStream(OutputStream os, int blockSize,
148                                      int recordSize, String encoding) {
149            out = new CountingOutputStream(os);
150            this.encoding = ZipEncodingHelper.getZipEncoding(encoding);
151    
152            this.buffer = new TarBuffer(out, blockSize, recordSize);
153            this.assemLen = 0;
154            this.assemBuf = new byte[recordSize];
155            this.recordBuf = new byte[recordSize];
156        }
157    
158        /**
159         * Set the long file mode.
160         * This can be LONGFILE_ERROR(0), LONGFILE_TRUNCATE(1) or LONGFILE_GNU(2).
161         * This specifies the treatment of long file names (names >= TarConstants.NAMELEN).
162         * Default is LONGFILE_ERROR.
163         * @param longFileMode the mode to use
164         */
165        public void setLongFileMode(int longFileMode) {
166            this.longFileMode = longFileMode;
167        }
168    
169        /**
170         * Set the big number mode.
171         * This can be BIGNUMBER_ERROR(0), BIGNUMBER_POSIX(1) or BIGNUMBER_STAR(2).
172         * This specifies the treatment of big files (sizes > TarConstants.MAXSIZE) and other numeric values to big to fit into a traditional tar header.
173         * Default is BIGNUMBER_ERROR.
174         * @param bigNumberMode the mode to use
175         * @since 1.4
176         */
177        public void setBigNumberMode(int bigNumberMode) {
178            this.bigNumberMode = bigNumberMode;
179        }
180    
181        /**
182         * Whether to add a PAX extension header for non-ASCII file names.
183         * @since 1.4
184         */
185        public void setAddPaxHeadersForNonAsciiNames(boolean b) {
186            addPaxHeadersForNonAsciiNames = b;
187        }
188    
189        @Deprecated
190        @Override
191        public int getCount() {
192            return (int) getBytesWritten();
193        }
194    
195        @Override
196        public long getBytesWritten() {
197            return ((CountingOutputStream) out).getBytesWritten();
198        }
199    
200        /**
201         * Ends the TAR archive without closing the underlying OutputStream.
202         * 
203         * An archive consists of a series of file entries terminated by an
204         * end-of-archive entry, which consists of two 512 blocks of zero bytes. 
205         * POSIX.1 requires two EOF records, like some other implementations.
206         * 
207         * @throws IOException on error
208         */
209        @Override
210        public void finish() throws IOException {
211            if (finished) {
212                throw new IOException("This archive has already been finished");
213            }
214    
215            if (haveUnclosedEntry) {
216                throw new IOException("This archives contains unclosed entries.");
217            }
218            writeEOFRecord();
219            writeEOFRecord();
220            buffer.flushBlock();
221            finished = true;
222        }
223    
224        /**
225         * Closes the underlying OutputStream.
226         * @throws IOException on error
227         */
228        @Override
229        public void close() throws IOException {
230            if(!finished) {
231                finish();
232            }
233    
234            if (!closed) {
235                buffer.close();
236                out.close();
237                closed = true;
238            }
239        }
240    
241        /**
242         * Get the record size being used by this stream's TarBuffer.
243         *
244         * @return The TarBuffer record size.
245         */
246        public int getRecordSize() {
247            return buffer.getRecordSize();
248        }
249    
250        /**
251         * Put an entry on the output stream. This writes the entry's
252         * header record and positions the output stream for writing
253         * the contents of the entry. Once this method is called, the
254         * stream is ready for calls to write() to write the entry's
255         * contents. Once the contents are written, closeArchiveEntry()
256         * <B>MUST</B> be called to ensure that all buffered data
257         * is completely written to the output stream.
258         *
259         * @param archiveEntry The TarEntry to be written to the archive.
260         * @throws IOException on error
261         * @throws ClassCastException if archiveEntry is not an instance of TarArchiveEntry
262         */
263        @Override
264        public void putArchiveEntry(ArchiveEntry archiveEntry) throws IOException {
265            if(finished) {
266                throw new IOException("Stream has already been finished");
267            }
268            TarArchiveEntry entry = (TarArchiveEntry) archiveEntry;
269            Map<String, String> paxHeaders = new HashMap<String, String>();
270            final String entryName = entry.getName();
271            final ByteBuffer encodedName = encoding.encode(entryName);
272            final int nameLen = encodedName.limit() - encodedName.position();
273            boolean paxHeaderContainsPath = false;
274            if (nameLen >= TarConstants.NAMELEN) {
275    
276                if (longFileMode == LONGFILE_POSIX) {
277                    paxHeaders.put("path", entryName);
278                    paxHeaderContainsPath = true;
279                } else if (longFileMode == LONGFILE_GNU) {
280                    // create a TarEntry for the LongLink, the contents
281                    // of which are the entry's name
282                    TarArchiveEntry longLinkEntry = new TarArchiveEntry(TarConstants.GNU_LONGLINK,
283                                                                        TarConstants.LF_GNUTYPE_LONGNAME);
284    
285                    longLinkEntry.setSize(nameLen + 1); // +1 for NUL
286                    putArchiveEntry(longLinkEntry);
287                    write(encodedName.array(), encodedName.arrayOffset(), nameLen);
288                    write(0); // NUL terminator
289                    closeArchiveEntry();
290                } else if (longFileMode != LONGFILE_TRUNCATE) {
291                    throw new RuntimeException("file name '" + entryName
292                                               + "' is too long ( > "
293                                               + TarConstants.NAMELEN + " bytes)");
294                }
295            }
296    
297            if (bigNumberMode == BIGNUMBER_POSIX) {
298                addPaxHeadersForBigNumbers(paxHeaders, entry);
299            } else if (bigNumberMode != BIGNUMBER_STAR) {
300                failForBigNumbers(entry);
301            }
302    
303            if (addPaxHeadersForNonAsciiNames && !paxHeaderContainsPath
304                && !ASCII.canEncode(entryName)) {
305                paxHeaders.put("path", entryName);
306            }
307    
308            if (addPaxHeadersForNonAsciiNames
309                && (entry.isLink() || entry.isSymbolicLink())
310                && !ASCII.canEncode(entry.getLinkName())) {
311                paxHeaders.put("linkpath", entry.getLinkName());
312            }
313    
314            if (paxHeaders.size() > 0) {
315                writePaxHeaders(entryName, paxHeaders);
316            }
317    
318            entry.writeEntryHeader(recordBuf, encoding,
319                                   bigNumberMode == BIGNUMBER_STAR);
320            buffer.writeRecord(recordBuf);
321    
322            currBytes = 0;
323    
324            if (entry.isDirectory()) {
325                currSize = 0;
326            } else {
327                currSize = entry.getSize();
328            }
329            currName = entryName;
330            haveUnclosedEntry = true;
331        }
332    
333        /**
334         * Close an entry. This method MUST be called for all file
335         * entries that contain data. The reason is that we must
336         * buffer data written to the stream in order to satisfy
337         * the buffer's record based writes. Thus, there may be
338         * data fragments still being assembled that must be written
339         * to the output stream before this entry is closed and the
340         * next entry written.
341         * @throws IOException on error
342         */
343        @Override
344        public void closeArchiveEntry() throws IOException {
345            if (finished) {
346                throw new IOException("Stream has already been finished");
347            }
348            if (!haveUnclosedEntry){
349                throw new IOException("No current entry to close");
350            }
351            if (assemLen > 0) {
352                for (int i = assemLen; i < assemBuf.length; ++i) {
353                    assemBuf[i] = 0;
354                }
355    
356                buffer.writeRecord(assemBuf);
357    
358                currBytes += assemLen;
359                assemLen = 0;
360            }
361    
362            if (currBytes < currSize) {
363                throw new IOException("entry '" + currName + "' closed at '"
364                                      + currBytes
365                                      + "' before the '" + currSize
366                                      + "' bytes specified in the header were written");
367            }
368            haveUnclosedEntry = false;
369        }
370    
371        /**
372         * Writes bytes to the current tar archive entry. This method
373         * is aware of the current entry and will throw an exception if
374         * you attempt to write bytes past the length specified for the
375         * current entry. The method is also (painfully) aware of the
376         * record buffering required by TarBuffer, and manages buffers
377         * that are not a multiple of recordsize in length, including
378         * assembling records from small buffers.
379         *
380         * @param wBuf The buffer to write to the archive.
381         * @param wOffset The offset in the buffer from which to get bytes.
382         * @param numToWrite The number of bytes to write.
383         * @throws IOException on error
384         */
385        @Override
386        public void write(byte[] wBuf, int wOffset, int numToWrite) throws IOException {
387            if ((currBytes + numToWrite) > currSize) {
388                throw new IOException("request to write '" + numToWrite
389                                      + "' bytes exceeds size in header of '"
390                                      + currSize + "' bytes for entry '"
391                                      + currName + "'");
392    
393                //
394                // We have to deal with assembly!!!
395                // The programmer can be writing little 32 byte chunks for all
396                // we know, and we must assemble complete records for writing.
397                // REVIEW Maybe this should be in TarBuffer? Could that help to
398                // eliminate some of the buffer copying.
399                //
400            }
401    
402            if (assemLen > 0) {
403                if ((assemLen + numToWrite) >= recordBuf.length) {
404                    int aLen = recordBuf.length - assemLen;
405    
406                    System.arraycopy(assemBuf, 0, recordBuf, 0,
407                                     assemLen);
408                    System.arraycopy(wBuf, wOffset, recordBuf,
409                                     assemLen, aLen);
410                    buffer.writeRecord(recordBuf);
411    
412                    currBytes += recordBuf.length;
413                    wOffset += aLen;
414                    numToWrite -= aLen;
415                    assemLen = 0;
416                } else {
417                    System.arraycopy(wBuf, wOffset, assemBuf, assemLen,
418                                     numToWrite);
419    
420                    wOffset += numToWrite;
421                    assemLen += numToWrite;
422                    numToWrite = 0;
423                }
424            }
425    
426            //
427            // When we get here we have EITHER:
428            // o An empty "assemble" buffer.
429            // o No bytes to write (numToWrite == 0)
430            //
431            while (numToWrite > 0) {
432                if (numToWrite < recordBuf.length) {
433                    System.arraycopy(wBuf, wOffset, assemBuf, assemLen,
434                                     numToWrite);
435    
436                    assemLen += numToWrite;
437    
438                    break;
439                }
440    
441                buffer.writeRecord(wBuf, wOffset);
442    
443                int num = recordBuf.length;
444    
445                currBytes += num;
446                numToWrite -= num;
447                wOffset += num;
448            }
449        }
450    
451        /**
452         * Writes a PAX extended header with the given map as contents.
453         * @since 1.4
454         */
455        void writePaxHeaders(String entryName,
456                             Map<String, String> headers) throws IOException {
457            String name = "./PaxHeaders.X/" + stripTo7Bits(entryName);
458            if (name.length() >= TarConstants.NAMELEN) {
459                name = name.substring(0, TarConstants.NAMELEN - 1);
460            }
461            while (name.endsWith("/")) {
462                // TarEntry's constructor would think this is a directory
463                // and not allow any data to be written
464                name = name.substring(0, name.length() - 1);
465            }
466            TarArchiveEntry pex = new TarArchiveEntry(name,
467                                                      TarConstants.LF_PAX_EXTENDED_HEADER_LC);
468    
469            StringWriter w = new StringWriter();
470            for (Map.Entry<String, String> h : headers.entrySet()) {
471                String key = h.getKey();
472                String value = h.getValue();
473                int len = key.length() + value.length()
474                    + 3 /* blank, equals and newline */
475                    + 2 /* guess 9 < actual length < 100 */;
476                String line = len + " " + key + "=" + value + "\n";
477                int actualLength = line.getBytes(CharsetNames.UTF_8).length;
478                while (len != actualLength) {
479                    // Adjust for cases where length < 10 or > 100
480                    // or where UTF-8 encoding isn't a single octet
481                    // per character.
482                    // Must be in loop as size may go from 99 to 100 in
483                    // first pass so we'd need a second.
484                    len = actualLength;
485                    line = len + " " + key + "=" + value + "\n";
486                    actualLength = line.getBytes(CharsetNames.UTF_8).length;
487                }
488                w.write(line);
489            }
490            byte[] data = w.toString().getBytes(CharsetNames.UTF_8);
491            pex.setSize(data.length);
492            putArchiveEntry(pex);
493            write(data);
494            closeArchiveEntry();
495        }
496    
497        private String stripTo7Bits(String name) {
498            final int length = name.length();
499            StringBuffer result = new StringBuffer(length);
500            for (int i = 0; i < length; i++) {
501                char stripped = (char) (name.charAt(i) & 0x7F);
502                if (stripped != 0) { // would be read as Trailing null
503                    result.append(stripped);
504                }
505            }
506            return result.toString();
507        }
508    
509        /**
510         * Write an EOF (end of archive) record to the tar archive.
511         * An EOF record consists of a record of all zeros.
512         */
513        private void writeEOFRecord() throws IOException {
514            Arrays.fill(recordBuf, (byte) 0);
515            buffer.writeRecord(recordBuf);
516        }
517    
518        @Override
519        public void flush() throws IOException {
520            out.flush();
521        }
522    
523        /** {@inheritDoc} */
524        @Override
525        public ArchiveEntry createArchiveEntry(File inputFile, String entryName)
526                throws IOException {
527            if(finished) {
528                throw new IOException("Stream has already been finished");
529            }
530            return new TarArchiveEntry(inputFile, entryName);
531        }
532    
533        private void addPaxHeadersForBigNumbers(Map<String, String> paxHeaders,
534                                                TarArchiveEntry entry) {
535            addPaxHeaderForBigNumber(paxHeaders, "size", entry.getSize(),
536                                     TarConstants.MAXSIZE);
537            addPaxHeaderForBigNumber(paxHeaders, "gid", entry.getGroupId(),
538                                     TarConstants.MAXID);
539            addPaxHeaderForBigNumber(paxHeaders, "mtime",
540                                     entry.getModTime().getTime() / 1000,
541                                     TarConstants.MAXSIZE);
542            addPaxHeaderForBigNumber(paxHeaders, "uid", entry.getUserId(),
543                                     TarConstants.MAXID);
544            // star extensions by J\u00f6rg Schilling
545            addPaxHeaderForBigNumber(paxHeaders, "SCHILY.devmajor",
546                                     entry.getDevMajor(), TarConstants.MAXID);
547            addPaxHeaderForBigNumber(paxHeaders, "SCHILY.devminor",
548                                     entry.getDevMinor(), TarConstants.MAXID);
549            // there is no PAX header for file mode
550            failForBigNumber("mode", entry.getMode(), TarConstants.MAXID);
551        }
552    
553        private void addPaxHeaderForBigNumber(Map<String, String> paxHeaders,
554                                              String header, long value,
555                                              long maxValue) {
556            if (value < 0 || value > maxValue) {
557                paxHeaders.put(header, String.valueOf(value));
558            }
559        }
560    
561        private void failForBigNumbers(TarArchiveEntry entry) {
562            failForBigNumber("entry size", entry.getSize(), TarConstants.MAXSIZE);
563            failForBigNumber("group id", entry.getGroupId(), TarConstants.MAXID);
564            failForBigNumber("last modification time",
565                             entry.getModTime().getTime() / 1000,
566                             TarConstants.MAXSIZE);
567            failForBigNumber("user id", entry.getUserId(), TarConstants.MAXID);
568            failForBigNumber("mode", entry.getMode(), TarConstants.MAXID);
569            failForBigNumber("major device number", entry.getDevMajor(),
570                             TarConstants.MAXID);
571            failForBigNumber("minor device number", entry.getDevMinor(),
572                             TarConstants.MAXID);
573        }
574    
575        private void failForBigNumber(String field, long value, long maxValue) {
576            if (value < 0 || value > maxValue) {
577                throw new RuntimeException(field + " '" + value
578                                           + "' is too big ( > "
579                                           + maxValue + " )");
580            }
581        }
582    }