001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *   http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     *
017     */
018    
019    package org.apache.commons.compress.utils;
020    
021    import java.nio.charset.Charset;
022    
023    /**
024     * Charsets required of every implementation of the Java platform.
025     *
026     * From the Java documentation <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
027     * charsets</a>:
028     * <p>
029     * <cite>Every implementation of the Java platform is required to support the following character encodings. Consult the
030     * release documentation for your implementation to see if any other encodings are supported. Consult the release
031     * documentation for your implementation to see if any other encodings are supported. </cite>
032     * </p>
033     *
034     * <ul>
035     * <li><code>US-ASCII</code><br/>
036     * Seven-bit ASCII, a.k.a. ISO646-US, a.k.a. the Basic Latin block of the Unicode character set.</li>
037     * <li><code>ISO-8859-1</code><br/>
038     * ISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1.</li>
039     * <li><code>UTF-8</code><br/>
040     * Eight-bit Unicode Transformation Format.</li>
041     * <li><code>UTF-16BE</code><br/>
042     * Sixteen-bit Unicode Transformation Format, big-endian byte order.</li>
043     * <li><code>UTF-16LE</code><br/>
044     * Sixteen-bit Unicode Transformation Format, little-endian byte order.</li>
045     * <li><code>UTF-16</code><br/>
046     * Sixteen-bit Unicode Transformation Format, byte order specified by a mandatory initial byte-order mark (either order
047     * accepted on input, big-endian used on output.)</li>
048     * </ul>
049     *
050     * This class best belongs in the Commons Lang or IO project. Even if a similar class is defined in another Commons component, it is
051     * not foreseen that Commons Compress would be made to depend on another Commons component.
052     *
053     * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
054     * @since 1.4
055     * @version $Id: Charsets.java 1437047 2013-01-22 17:00:33Z sebb $
056     */
057    public class Charsets {
058    
059        //
060        // This class should only contain Charset instances for required encodings. This guarantees that it will load correctly and
061        // without delay on all Java platforms.
062        //
063    
064        /**
065         * Returns the given Charset or the default Charset if the given Charset is null.
066         *
067         * @param charset
068         *            A charset or null.
069         * @return the given Charset or the default Charset if the given Charset is null
070         */
071        public static Charset toCharset(Charset charset) {
072            return charset == null ? Charset.defaultCharset() : charset;
073        }
074    
075        /**
076         * Returns a Charset for the named charset. If the name is null, return the default Charset.
077         *
078         * @param charset
079         *            The name of the requested charset, may be null.
080         * @return a Charset for the named charset
081         * @throws java.nio.charset.UnsupportedCharsetException
082         *             If the named charset is unavailable
083         * @throws java.nio.charset.IllegalCharsetNameException
084         *             If the given charset name is illegal
085         */
086        public static Charset toCharset(String charset) {
087            return charset == null ? Charset.defaultCharset() : Charset.forName(charset);
088        }
089    
090        /**
091         * CharsetNamesISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1. </p>
092         * <p>
093         * Every implementation of the Java platform is required to support this character encoding.
094         * </p>
095         *
096         * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
097         */
098        public static final Charset ISO_8859_1 = Charset.forName(CharsetNames.ISO_8859_1);
099    
100        /**
101         * <p>
102         * Seven-bit ASCII, also known as ISO646-US, also known as the Basic Latin block of the Unicode character set.
103         * </p>
104         * <p>
105         * Every implementation of the Java platform is required to support this character encoding.
106         * </p>
107         *
108         * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
109         */
110        public static final Charset US_ASCII = Charset.forName(CharsetNames.US_ASCII);
111    
112        /**
113         * <p>
114         * Sixteen-bit Unicode Transformation Format, The byte order specified by a mandatory initial byte-order mark
115         * (either order accepted on input, big-endian used on output)
116         * </p>
117         * <p>
118         * Every implementation of the Java platform is required to support this character encoding.
119         * </p>
120         *
121         * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
122         */
123        public static final Charset UTF_16 = Charset.forName(CharsetNames.UTF_16);
124    
125        /**
126         * <p>
127         * Sixteen-bit Unicode Transformation Format, big-endian byte order.
128         * </p>
129         * <p>
130         * Every implementation of the Java platform is required to support this character encoding.
131         * </p>
132         *
133         * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
134         */
135        public static final Charset UTF_16BE = Charset.forName(CharsetNames.UTF_16BE);
136    
137        /**
138         * <p>
139         * Sixteen-bit Unicode Transformation Format, little-endian byte order.
140         * </p>
141         * <p>
142         * Every implementation of the Java platform is required to support this character encoding.
143         * </p>
144         *
145         * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
146         */
147        public static final Charset UTF_16LE = Charset.forName(CharsetNames.UTF_16LE);
148    
149        /**
150         * <p>
151         * Eight-bit Unicode Transformation Format.
152         * </p>
153         * <p>
154         * Every implementation of the Java platform is required to support this character encoding.
155         * </p>
156         *
157         * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
158         */
159        public static final Charset UTF_8 = Charset.forName(CharsetNames.UTF_8);
160    }