001/**
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.camel.runtimecatalog;
018
019import java.util.ArrayList;
020import java.util.BitSet;
021import java.util.List;
022
023/**
024 * Encoder for unsafe URI characters.
025 * <p/>
026 * A good source for details is
027 * <a href="http://en.wikipedia.org/wiki/Url_encode">wikipedia url encode</a>
028 * article.
029 */
030public final class UnsafeUriCharactersEncoder {
031    private static BitSet unsafeCharactersRfc1738;
032    private static BitSet unsafeCharactersHttp;
033    private static final char[] HEX_DIGITS = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'a', 'b', 'c', 'd', 'e', 'f'};
034
035    static {
036        unsafeCharactersRfc1738 = new BitSet(256);
037        unsafeCharactersRfc1738.set(' ');
038        unsafeCharactersRfc1738.set('"');
039        unsafeCharactersRfc1738.set('<');
040        unsafeCharactersRfc1738.set('>');
041        unsafeCharactersRfc1738.set('#');
042        unsafeCharactersRfc1738.set('%');
043        unsafeCharactersRfc1738.set('{');
044        unsafeCharactersRfc1738.set('}');
045        unsafeCharactersRfc1738.set('|');
046        unsafeCharactersRfc1738.set('\\');
047        unsafeCharactersRfc1738.set('^');
048        unsafeCharactersRfc1738.set('~');
049        unsafeCharactersRfc1738.set('[');
050        unsafeCharactersRfc1738.set(']');
051        unsafeCharactersRfc1738.set('`');
052    }
053
054    static {
055        unsafeCharactersHttp = new BitSet(256);
056        unsafeCharactersHttp.set(' ');
057        unsafeCharactersHttp.set('"');
058        unsafeCharactersHttp.set('<');
059        unsafeCharactersHttp.set('>');
060        unsafeCharactersHttp.set('#');
061        unsafeCharactersHttp.set('%');
062        unsafeCharactersHttp.set('{');
063        unsafeCharactersHttp.set('}');
064        unsafeCharactersHttp.set('|');
065        unsafeCharactersHttp.set('\\');
066        unsafeCharactersHttp.set('^');
067        unsafeCharactersHttp.set('~');
068        unsafeCharactersHttp.set('`');
069    }
070
071    private UnsafeUriCharactersEncoder() {
072        // util class
073    }
074
075    public static String encode(String s) {
076        return encode(s, unsafeCharactersRfc1738);
077    }
078
079    public static String encodeHttpURI(String s) {
080        return encode(s, unsafeCharactersHttp);
081    }
082
083    public static String encode(String s, BitSet unsafeCharacters) {
084        return encode(s, unsafeCharacters, false);
085    }
086
087    public static String encode(String s, boolean checkRaw) {
088        return encode(s, unsafeCharactersRfc1738, checkRaw);
089    }
090
091    public static String encodeHttpURI(String s, boolean checkRaw) {
092        return encode(s, unsafeCharactersHttp, checkRaw);
093    }
094
095    // Just skip the encode for isRAW part
096    public static String encode(String s, BitSet unsafeCharacters, boolean checkRaw) {
097        List<Pair<Integer>> rawPairs;
098        if (checkRaw) {
099            rawPairs = URISupport.scanRaw(s);
100        } else {
101            rawPairs = new ArrayList<>();
102        }
103
104        int n = s == null ? 0 : s.length();
105        if (n == 0) {
106            return s;
107        }
108
109        // First check whether we actually need to encode
110        char chars[] = s.toCharArray();
111        for (int i = 0;;) {
112            // just deal with the ascii character
113            if (chars[i] > 0 && chars[i] < 128) {
114                if (unsafeCharacters.get(chars[i])) {
115                    break;
116                }
117            }
118            if (++i >= chars.length) {
119                return s;
120            }
121        }
122
123        // okay there are some unsafe characters so we do need to encode
124        // see details at: http://en.wikipedia.org/wiki/Url_encode
125        StringBuilder sb = new StringBuilder();
126        for (int i = 0; i < chars.length; i++) {
127            char ch = chars[i];
128            if (ch > 0 && ch < 128 && unsafeCharacters.get(ch)) {
129                // special for % sign as it may be a decimal encoded value
130                if (ch == '%') {
131                    char next = i + 1 < chars.length ? chars[i + 1] : ' ';
132                    char next2 = i + 2 < chars.length ? chars[i + 2] : ' ';
133
134                    if (isHexDigit(next) && isHexDigit(next2) && !URISupport.isRaw(i, rawPairs)) {
135                        // its already encoded (decimal encoded) so just append
136                        // as is
137                        sb.append(ch);
138                    } else {
139                        // must escape then, as its an unsafe character
140                        appendEscape(sb, (byte)ch);
141                    }
142                } else {
143                    // must escape then, as its an unsafe character
144                    appendEscape(sb, (byte)ch);
145                }
146            } else {
147                sb.append(ch);
148            }
149        }
150        return sb.toString();
151    }
152
153    private static void appendEscape(StringBuilder sb, byte b) {
154        sb.append('%');
155        sb.append(HEX_DIGITS[(b >> 4) & 0x0f]);
156        sb.append(HEX_DIGITS[(b >> 0) & 0x0f]);
157    }
158
159    private static boolean isHexDigit(char ch) {
160        for (char hex : HEX_DIGITS) {
161            if (hex == ch) {
162                return true;
163            }
164        }
165        return false;
166    }
167
168}