001    /*
002     *  Licensed to the Apache Software Foundation (ASF) under one or more
003     *  contributor license agreements.  See the NOTICE file distributed with
004     *  this work for additional information regarding copyright ownership.
005     *  The ASF licenses this file to You under the Apache License, Version 2.0
006     *  (the "License"); you may not use this file except in compliance with
007     *  the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     *  Unless required by applicable law or agreed to in writing, software
012     *  distributed under the License is distributed on an "AS IS" BASIS,
013     *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     *  See the License for the specific language governing permissions and
015     *  limitations under the License.
016     *
017     */
018    
019    package org.apache.commons.compress.archivers.zip;
020    
021    import java.nio.ByteBuffer;
022    import java.nio.charset.Charset;
023    import java.nio.charset.UnsupportedCharsetException;
024    import java.util.Collections;
025    import java.util.HashMap;
026    import java.util.Map;
027    
028    import org.apache.commons.compress.utils.CharsetNames;
029    
030    /**
031     * Static helper functions for robustly encoding filenames in zip files. 
032     */
033    public abstract class ZipEncodingHelper {
034    
035        /**
036         * A class, which holds the high characters of a simple encoding
037         * and lazily instantiates a Simple8BitZipEncoding instance in a
038         * thread-safe manner.
039         */
040        private static class SimpleEncodingHolder {
041    
042            private final char [] highChars;
043            private Simple8BitZipEncoding encoding;
044    
045            /**
046             * Instantiate a simple encoding holder.
047             * 
048             * @param highChars The characters for byte codes 128 to 255.
049             * 
050             * @see Simple8BitZipEncoding#Simple8BitZipEncoding(char[])
051             */
052            SimpleEncodingHolder(char [] highChars) {
053                this.highChars = highChars;
054            }
055    
056            /**
057             * @return The associated {@link Simple8BitZipEncoding}, which
058             *         is instantiated if not done so far.
059             */
060            public synchronized Simple8BitZipEncoding getEncoding() {
061                if (this.encoding == null) {
062                    this.encoding = new Simple8BitZipEncoding(this.highChars);
063                }
064                return this.encoding;
065            }
066        }
067    
068        private static final Map<String, SimpleEncodingHolder> simpleEncodings;
069    
070        static {
071            Map<String, SimpleEncodingHolder> se =
072                new HashMap<String, SimpleEncodingHolder>();
073    
074            char[] cp437_high_chars =
075                new char[] { 0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0,
076                             0x00e5, 0x00e7, 0x00ea, 0x00eb, 0x00e8, 0x00ef,
077                             0x00ee, 0x00ec, 0x00c4, 0x00c5, 0x00c9, 0x00e6,
078                             0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9,
079                             0x00ff, 0x00d6, 0x00dc, 0x00a2, 0x00a3, 0x00a5,
080                             0x20a7, 0x0192, 0x00e1, 0x00ed, 0x00f3, 0x00fa,
081                             0x00f1, 0x00d1, 0x00aa, 0x00ba, 0x00bf, 0x2310,
082                             0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb,
083                             0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561,
084                             0x2562, 0x2556, 0x2555, 0x2563, 0x2551, 0x2557,
085                             0x255d, 0x255c, 0x255b, 0x2510, 0x2514, 0x2534,
086                             0x252c, 0x251c, 0x2500, 0x253c, 0x255e, 0x255f,
087                             0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550,
088                             0x256c, 0x2567, 0x2568, 0x2564, 0x2565, 0x2559,
089                             0x2558, 0x2552, 0x2553, 0x256b, 0x256a, 0x2518,
090                             0x250c, 0x2588, 0x2584, 0x258c, 0x2590, 0x2580,
091                             0x03b1, 0x00df, 0x0393, 0x03c0, 0x03a3, 0x03c3,
092                             0x00b5, 0x03c4, 0x03a6, 0x0398, 0x03a9, 0x03b4,
093                             0x221e, 0x03c6, 0x03b5, 0x2229, 0x2261, 0x00b1,
094                             0x2265, 0x2264, 0x2320, 0x2321, 0x00f7, 0x2248,
095                             0x00b0, 0x2219, 0x00b7, 0x221a, 0x207f, 0x00b2,
096                             0x25a0, 0x00a0 };
097    
098            SimpleEncodingHolder cp437 = new SimpleEncodingHolder(cp437_high_chars);
099    
100            se.put("CP437", cp437);
101            se.put("Cp437", cp437);
102            se.put("cp437", cp437);
103            se.put("IBM437", cp437);
104            se.put("ibm437", cp437);
105    
106            char[] cp850_high_chars =
107                new char[] { 0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0,
108                             0x00e5, 0x00e7, 0x00ea, 0x00eb, 0x00e8, 0x00ef,
109                             0x00ee, 0x00ec, 0x00c4, 0x00c5, 0x00c9, 0x00e6,
110                             0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9,
111                             0x00ff, 0x00d6, 0x00dc, 0x00f8, 0x00a3, 0x00d8,
112                             0x00d7, 0x0192, 0x00e1, 0x00ed, 0x00f3, 0x00fa,
113                             0x00f1, 0x00d1, 0x00aa, 0x00ba, 0x00bf, 0x00ae,
114                             0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb,
115                             0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00c1,
116                             0x00c2, 0x00c0, 0x00a9, 0x2563, 0x2551, 0x2557,
117                             0x255d, 0x00a2, 0x00a5, 0x2510, 0x2514, 0x2534,
118                             0x252c, 0x251c, 0x2500, 0x253c, 0x00e3, 0x00c3,
119                             0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550,
120                             0x256c, 0x00a4, 0x00f0, 0x00d0, 0x00ca, 0x00cb,
121                             0x00c8, 0x0131, 0x00cd, 0x00ce, 0x00cf, 0x2518,
122                             0x250c, 0x2588, 0x2584, 0x00a6, 0x00cc, 0x2580,
123                             0x00d3, 0x00df, 0x00d4, 0x00d2, 0x00f5, 0x00d5,
124                             0x00b5, 0x00fe, 0x00de, 0x00da, 0x00db, 0x00d9,
125                             0x00fd, 0x00dd, 0x00af, 0x00b4, 0x00ad, 0x00b1,
126                             0x2017, 0x00be, 0x00b6, 0x00a7, 0x00f7, 0x00b8,
127                             0x00b0, 0x00a8, 0x00b7, 0x00b9, 0x00b3, 0x00b2,
128                             0x25a0, 0x00a0 };
129    
130            SimpleEncodingHolder cp850 = new SimpleEncodingHolder(cp850_high_chars);
131    
132            se.put("CP850", cp850);
133            se.put("Cp850", cp850);
134            se.put("cp850", cp850);
135            se.put("IBM850", cp850);
136            se.put("ibm850", cp850);
137            simpleEncodings = Collections.unmodifiableMap(se);
138        }
139    
140        /**
141         * Grow a byte buffer, so it has a minimal capacity or at least
142         * the double capacity of the original buffer 
143         * 
144         * @param b The original buffer.
145         * @param newCapacity The minimal requested new capacity.
146         * @return A byte buffer <code>r</code> with
147         *         <code>r.capacity() = max(b.capacity()*2,newCapacity)</code> and
148         *         all the data contained in <code>b</code> copied to the beginning
149         *         of <code>r</code>.
150         *
151         */
152        static ByteBuffer growBuffer(ByteBuffer b, int newCapacity) {
153            b.limit(b.position());
154            b.rewind();
155    
156            int c2 = b.capacity() * 2;
157            ByteBuffer on = ByteBuffer.allocate(c2 < newCapacity ? newCapacity : c2);
158    
159            on.put(b);
160            return on;
161        }
162    
163     
164        /**
165         * The hexadecimal digits <code>0,...,9,A,...,F</code> encoded as
166         * ASCII bytes.
167         */
168        private static final byte[] HEX_DIGITS =
169            new byte [] {
170            0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x41,
171            0x42, 0x43, 0x44, 0x45, 0x46
172        };
173    
174        /**
175         * Append <code>%Uxxxx</code> to the given byte buffer.
176         * The caller must assure, that <code>bb.remaining()&gt;=6</code>.
177         * 
178         * @param bb The byte buffer to write to.
179         * @param c The character to write.
180         */
181        static void appendSurrogate(ByteBuffer bb, char c) {
182    
183            bb.put((byte) '%');
184            bb.put((byte) 'U');
185    
186            bb.put(HEX_DIGITS[(c >> 12)&0x0f]);
187            bb.put(HEX_DIGITS[(c >> 8)&0x0f]);
188            bb.put(HEX_DIGITS[(c >> 4)&0x0f]);
189            bb.put(HEX_DIGITS[c & 0x0f]);
190        }
191    
192    
193        /**
194         * name of the encoding UTF-8
195         */
196        static final String UTF8 = "UTF8";
197    
198        /**
199         * variant name of the encoding UTF-8 used for comparisions.
200         */
201        private static final String UTF_DASH_8 = CharsetNames.UTF_8;
202    
203        /**
204         * name of the encoding UTF-8
205         */
206        static final ZipEncoding UTF8_ZIP_ENCODING = new FallbackZipEncoding(UTF8);
207    
208        /**
209         * Instantiates a zip encoding.
210         * 
211         * @param name The name of the zip encoding. Specify {@code null} for
212         *             the platform's default encoding.
213         * @return A zip encoding for the given encoding name.
214         */
215        public static ZipEncoding getZipEncoding(String name) {
216     
217            // fallback encoding is good enough for utf-8.
218            if (isUTF8(name)) {
219                return UTF8_ZIP_ENCODING;
220            }
221    
222            if (name == null) {
223                return new FallbackZipEncoding();
224            }
225    
226            SimpleEncodingHolder h = simpleEncodings.get(name);
227    
228            if (h!=null) {
229                return h.getEncoding();
230            }
231    
232            try {
233    
234                Charset cs = Charset.forName(name);
235                return new NioZipEncoding(cs);
236    
237            } catch (UnsupportedCharsetException e) {
238                return new FallbackZipEncoding(name);
239            }
240        }
241    
242        /**
243         * Whether a given encoding - or the platform's default encoding
244         * if the parameter is null - is UTF-8.
245         */
246        static boolean isUTF8(String encoding) {
247            if (encoding == null) {
248                // check platform's default encoding
249                encoding = System.getProperty("file.encoding");
250            }
251            return UTF8.equalsIgnoreCase(encoding)
252                || UTF_DASH_8.equalsIgnoreCase(encoding);
253        }
254    }