001/**
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.camel.util;
018
019import java.util.ArrayList;
020import java.util.BitSet;
021import java.util.List;
022
023/**
024 * Encoder for unsafe URI characters.
025 * <p/>
026 * A good source for details is <a href="http://en.wikipedia.org/wiki/Url_encode">wikipedia url encode</a> article.
027 */
028public final class UnsafeUriCharactersEncoder {
029    private static BitSet unsafeCharactersRfc1738;
030    private static BitSet unsafeCharactersHttp;
031    private static final char[] HEX_DIGITS = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C',
032                                              'D', 'E', 'F', 'a', 'b', 'c', 'd', 'e', 'f'};
033
034    static {
035        unsafeCharactersRfc1738 = new BitSet(256);
036        unsafeCharactersRfc1738.set(' ');
037        unsafeCharactersRfc1738.set('"');
038        unsafeCharactersRfc1738.set('<');
039        unsafeCharactersRfc1738.set('>');
040        unsafeCharactersRfc1738.set('#');
041        unsafeCharactersRfc1738.set('%');
042        unsafeCharactersRfc1738.set('{');
043        unsafeCharactersRfc1738.set('}');
044        unsafeCharactersRfc1738.set('|');
045        unsafeCharactersRfc1738.set('\\');
046        unsafeCharactersRfc1738.set('^');
047        unsafeCharactersRfc1738.set('~');
048        unsafeCharactersRfc1738.set('[');
049        unsafeCharactersRfc1738.set(']');
050        unsafeCharactersRfc1738.set('`');
051    }
052    
053    static {
054        unsafeCharactersHttp = new BitSet(256);
055        unsafeCharactersHttp.set(' ');
056        unsafeCharactersHttp.set('"');
057        unsafeCharactersHttp.set('<');
058        unsafeCharactersHttp.set('>');
059        unsafeCharactersHttp.set('#');
060        unsafeCharactersHttp.set('%');
061        unsafeCharactersHttp.set('{');
062        unsafeCharactersHttp.set('}');
063        unsafeCharactersHttp.set('|');
064        unsafeCharactersHttp.set('\\');
065        unsafeCharactersHttp.set('^');
066        unsafeCharactersHttp.set('~');
067        unsafeCharactersHttp.set('`');
068    }
069
070    private UnsafeUriCharactersEncoder() {
071        // util class
072    }
073
074    public static String encode(String s) {
075        return encode(s, unsafeCharactersRfc1738);
076    }
077    
078    public static String encodeHttpURI(String s) {
079        return encode(s, unsafeCharactersHttp);
080    }
081    
082    public static String encode(String s, BitSet unsafeCharacters) {
083        return encode(s, unsafeCharacters, false);
084    }
085    
086    public static String encode(String s, boolean checkRaw) {
087        return encode(s, unsafeCharactersRfc1738, checkRaw);
088    }
089    
090    public static String encodeHttpURI(String s, boolean checkRaw) {
091        return encode(s, unsafeCharactersHttp, checkRaw);
092    }
093
094    // Just skip the encode for isRAW part
095    public static String encode(String s, BitSet unsafeCharacters, boolean checkRaw) {
096        List<Pair<Integer>> rawPairs;
097        if (checkRaw) {
098            rawPairs = URISupport.scanRaw(s);
099        } else {
100            rawPairs = new ArrayList<>();
101        }
102   
103        int n = s == null ? 0 : s.length();
104        if (n == 0) {
105            return s;
106        }
107
108        // First check whether we actually need to encode
109        char chars[] = s.toCharArray();
110        for (int i = 0;;) {
111            // just deal with the ascii character
112            if (chars[i] > 0 && chars[i] < 128) {
113                if (unsafeCharacters.get(chars[i])) {
114                    break;
115                }
116            }
117            if (++i >= chars.length) {
118                return s;
119            }
120        }
121
122        // okay there are some unsafe characters so we do need to encode
123        // see details at: http://en.wikipedia.org/wiki/Url_encode
124        StringBuilder sb = new StringBuilder();
125        for (int i = 0; i < chars.length; i++) {
126            char ch = chars[i];
127            if (ch > 0 && ch < 128 && unsafeCharacters.get(ch)) {
128                // special for % sign as it may be a decimal encoded value
129                if (ch == '%') {
130                    char next = i + 1 < chars.length ? chars[i + 1] : ' ';
131                    char next2 = i + 2 < chars.length ? chars[i + 2] : ' ';
132
133                    if (isHexDigit(next) && isHexDigit(next2) && !URISupport.isRaw(i, rawPairs)) {
134                        // its already encoded (decimal encoded) so just append as is
135                        sb.append(ch);
136                    } else {
137                        // must escape then, as its an unsafe character
138                        appendEscape(sb, (byte)ch);
139                    }
140                } else {
141                    // must escape then, as its an unsafe character
142                    appendEscape(sb, (byte)ch);
143                }
144            } else {
145                sb.append(ch);
146            }
147        }
148        return sb.toString();
149    }
150
151    private static void appendEscape(StringBuilder sb, byte b) {
152        sb.append('%');
153        sb.append(HEX_DIGITS[(b >> 4) & 0x0f]);
154        sb.append(HEX_DIGITS[(b >> 0) & 0x0f]);
155    }
156
157    private static boolean isHexDigit(char ch) {
158        for (char hex : HEX_DIGITS) {
159            if (hex == ch) {
160                return true;
161            }
162        }
163        return false;
164    }
165
166}