001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     * http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing,
013     * software distributed under the License is distributed on an
014     * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015     * KIND, either express or implied.  See the License for the
016     * specific language governing permissions and limitations
017     * under the License.
018     */
019    package org.apache.commons.compress.archivers;
020    
021    import java.io.ByteArrayInputStream;
022    import java.io.IOException;
023    import java.io.InputStream;
024    import java.io.OutputStream;
025    
026    import org.apache.commons.compress.archivers.ar.ArArchiveInputStream;
027    import org.apache.commons.compress.archivers.ar.ArArchiveOutputStream;
028    import org.apache.commons.compress.archivers.cpio.CpioArchiveInputStream;
029    import org.apache.commons.compress.archivers.cpio.CpioArchiveOutputStream;
030    import org.apache.commons.compress.archivers.dump.DumpArchiveInputStream;
031    import org.apache.commons.compress.archivers.jar.JarArchiveInputStream;
032    import org.apache.commons.compress.archivers.jar.JarArchiveOutputStream;
033    import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
034    import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream;
035    import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
036    import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream;
037    
038    /**
039     * <p>Factory to create Archive[In|Out]putStreams from names or the first bytes of
040     * the InputStream. In order add other implementations you should extend
041     * ArchiveStreamFactory and override the appropriate methods (and call their
042     * implementation from super of course).</p>
043     * 
044     * Compressing a ZIP-File:
045     * 
046     * <pre>
047     * final OutputStream out = new FileOutputStream(output); 
048     * ArchiveOutputStream os = new ArchiveStreamFactory().createArchiveOutputStream(ArchiveStreamFactory.ZIP, out);
049     * 
050     * os.putArchiveEntry(new ZipArchiveEntry("testdata/test1.xml"));
051     * IOUtils.copy(new FileInputStream(file1), os);
052     * os.closeArchiveEntry();
053     *
054     * os.putArchiveEntry(new ZipArchiveEntry("testdata/test2.xml"));
055     * IOUtils.copy(new FileInputStream(file2), os);
056     * os.closeArchiveEntry();
057     * os.close();
058     * </pre>
059     * 
060     * Decompressing a ZIP-File:
061     * 
062     * <pre>
063     * final InputStream is = new FileInputStream(input); 
064     * ArchiveInputStream in = new ArchiveStreamFactory().createArchiveInputStream(ArchiveStreamFactory.ZIP, is);
065     * ZipArchiveEntry entry = (ZipArchiveEntry)in.getNextEntry();
066     * OutputStream out = new FileOutputStream(new File(dir, entry.getName()));
067     * IOUtils.copy(in, out);
068     * out.close();
069     * in.close();
070     * </pre>
071     * 
072     * @Immutable
073     */
074    public class ArchiveStreamFactory {
075    
076        /**
077         * Constant used to identify the AR archive format.
078         * @since Commons Compress 1.1
079         */
080        public static final String AR = "ar";
081        /**
082         * Constant used to identify the CPIO archive format.
083         * @since Commons Compress 1.1
084         */
085        public static final String CPIO = "cpio";
086        /**
087         * Constant used to identify the Unix DUMP archive format.
088         * @since Commons Compress 1.3
089         */
090        public static final String DUMP = "dump";
091        /**
092         * Constant used to identify the JAR archive format.
093         * @since Commons Compress 1.1
094         */
095        public static final String JAR = "jar";
096        /**
097         * Constant used to identify the TAR archive format.
098         * @since Commons Compress 1.1
099         */
100        public static final String TAR = "tar";
101        /**
102         * Constant used to identify the ZIP archive format.
103         * @since Commons Compress 1.1
104         */
105        public static final String ZIP = "zip";
106    
107        /**
108         * Entry encoding, null for the default.
109         */
110        private String entryEncoding = null;
111    
112        /**
113         * Returns the encoding to use for zip and tar files, or null for
114         * the default.
115         *
116         * @return entry encoding, or null
117         * @since 1.5
118         */
119        public String getEntryEncoding() {
120            return entryEncoding;
121        }
122    
123        /**
124         * Sets the encoding to use for zip and tar files. Use null for
125         * the default.
126         *
127         * @since 1.5
128         */
129        public void setEntryEncoding(String entryEncoding) {
130            this.entryEncoding = entryEncoding;
131        }
132    
133        /**
134         * Create an archive input stream from an archiver name and an input stream.
135         * 
136         * @param archiverName the archive name, i.e. "ar", "zip", "tar", "jar", "dump" or "cpio"
137         * @param in the input stream
138         * @return the archive input stream
139         * @throws ArchiveException if the archiver name is not known
140         * @throws IllegalArgumentException if the archiver name or stream is null
141         */
142        public ArchiveInputStream createArchiveInputStream(
143                final String archiverName, final InputStream in)
144                throws ArchiveException {
145    
146            if (archiverName == null) {
147                throw new IllegalArgumentException("Archivername must not be null.");
148            }
149    
150            if (in == null) {
151                throw new IllegalArgumentException("InputStream must not be null.");
152            }
153    
154            if (AR.equalsIgnoreCase(archiverName)) {
155                return new ArArchiveInputStream(in);
156            }
157            if (ZIP.equalsIgnoreCase(archiverName)) {
158                if (entryEncoding != null) {
159                    return new ZipArchiveInputStream(in, entryEncoding);
160                } else {
161                    return new ZipArchiveInputStream(in);
162                }
163            }
164            if (TAR.equalsIgnoreCase(archiverName)) {
165                if (entryEncoding != null) {
166                    return new TarArchiveInputStream(in, entryEncoding);
167                } else {
168                    return new TarArchiveInputStream(in);
169                }
170            }
171            if (JAR.equalsIgnoreCase(archiverName)) {
172                return new JarArchiveInputStream(in);
173            }
174            if (CPIO.equalsIgnoreCase(archiverName)) {
175                return new CpioArchiveInputStream(in);
176            }
177            if (DUMP.equalsIgnoreCase(archiverName)) {
178                return new DumpArchiveInputStream(in);
179            }
180    
181            throw new ArchiveException("Archiver: " + archiverName + " not found.");
182        }
183    
184        /**
185         * Create an archive output stream from an archiver name and an input stream.
186         * 
187         * @param archiverName the archive name, i.e. "ar", "zip", "tar", "jar" or "cpio"
188         * @param out the output stream
189         * @return the archive output stream
190         * @throws ArchiveException if the archiver name is not known
191         * @throws IllegalArgumentException if the archiver name or stream is null
192         */
193        public ArchiveOutputStream createArchiveOutputStream(
194                final String archiverName, final OutputStream out)
195                throws ArchiveException {
196            if (archiverName == null) {
197                throw new IllegalArgumentException("Archivername must not be null.");
198            }
199            if (out == null) {
200                throw new IllegalArgumentException("OutputStream must not be null.");
201            }
202    
203            if (AR.equalsIgnoreCase(archiverName)) {
204                return new ArArchiveOutputStream(out);
205            }
206            if (ZIP.equalsIgnoreCase(archiverName)) {
207                ZipArchiveOutputStream zip = new ZipArchiveOutputStream(out);
208                if (entryEncoding != null) {
209                    zip.setEncoding(entryEncoding);
210                }
211                return zip;
212            }
213            if (TAR.equalsIgnoreCase(archiverName)) {
214                if (entryEncoding != null) {
215                    return new TarArchiveOutputStream(out, entryEncoding);
216                } else {
217                    return new TarArchiveOutputStream(out);
218                }
219            }
220            if (JAR.equalsIgnoreCase(archiverName)) {
221                return new JarArchiveOutputStream(out);
222            }
223            if (CPIO.equalsIgnoreCase(archiverName)) {
224                return new CpioArchiveOutputStream(out);
225            }
226            throw new ArchiveException("Archiver: " + archiverName + " not found.");
227        }
228    
229        /**
230         * Create an archive input stream from an input stream, autodetecting
231         * the archive type from the first few bytes of the stream. The InputStream
232         * must support marks, like BufferedInputStream.
233         * 
234         * @param in the input stream
235         * @return the archive input stream
236         * @throws ArchiveException if the archiver name is not known
237         * @throws IllegalArgumentException if the stream is null or does not support mark
238         */
239        public ArchiveInputStream createArchiveInputStream(final InputStream in)
240                throws ArchiveException {
241            if (in == null) {
242                throw new IllegalArgumentException("Stream must not be null.");
243            }
244    
245            if (!in.markSupported()) {
246                throw new IllegalArgumentException("Mark is not supported.");
247            }
248    
249            final byte[] signature = new byte[12];
250            in.mark(signature.length);
251            try {
252                int signatureLength = in.read(signature);
253                in.reset();
254                if (ZipArchiveInputStream.matches(signature, signatureLength)) {
255                    if (entryEncoding != null) {
256                        return new ZipArchiveInputStream(in, entryEncoding);
257                    } else {
258                        return new ZipArchiveInputStream(in);
259                    }
260                } else if (JarArchiveInputStream.matches(signature, signatureLength)) {
261                    return new JarArchiveInputStream(in);
262                } else if (ArArchiveInputStream.matches(signature, signatureLength)) {
263                    return new ArArchiveInputStream(in);
264                } else if (CpioArchiveInputStream.matches(signature, signatureLength)) {
265                    return new CpioArchiveInputStream(in);
266                }
267    
268                // Dump needs a bigger buffer to check the signature;
269                final byte[] dumpsig = new byte[32];
270                in.mark(dumpsig.length);
271                signatureLength = in.read(dumpsig);
272                in.reset();
273                if (DumpArchiveInputStream.matches(dumpsig, signatureLength)) {
274                    return new DumpArchiveInputStream(in);
275                }
276    
277                // Tar needs an even bigger buffer to check the signature; read the first block
278                final byte[] tarheader = new byte[512];
279                in.mark(tarheader.length);
280                signatureLength = in.read(tarheader);
281                in.reset();
282                if (TarArchiveInputStream.matches(tarheader, signatureLength)) {
283                    if (entryEncoding != null) {
284                        return new TarArchiveInputStream(in, entryEncoding);
285                    } else {
286                        return new TarArchiveInputStream(in);
287                    }
288                }
289                // COMPRESS-117 - improve auto-recognition
290                if (signatureLength >= 512) {
291                    TarArchiveInputStream tais = null;
292                    try {
293                        tais = new TarArchiveInputStream(new ByteArrayInputStream(tarheader));
294                        // COMPRESS-191 - verify the header checksum
295                        if (tais.getNextTarEntry().isCheckSumOK()) {
296                            return new TarArchiveInputStream(in);
297                        }
298                    } catch (Exception e) { // NOPMD
299                        // can generate IllegalArgumentException as well
300                        // as IOException
301                        // autodetection, simply not a TAR
302                        // ignored
303                    } finally {
304                        if (tais != null) {
305                            try {
306                                tais.close();
307                            } catch (IOException ignored) {
308                                // ignored
309                            }
310                        }
311                    }
312                }
313            } catch (IOException e) {
314                throw new ArchiveException("Could not use reset and mark operations.", e);
315            }
316    
317            throw new ArchiveException("No Archiver found for the stream signature");
318        }
319    
320    }