001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.camel.util;
018    
019    import java.util.BitSet;
020    
021    /**
022     * Encoder for unsafe URI characters.
023     * <p/>
024     * A good source for details is <a href="http://en.wikipedia.org/wiki/Url_encode">wikipedia url encode</a> article.
025     */
026    public final class UnsafeUriCharactersEncoder {
027        private static BitSet unsafeCharacters;   
028        private static final char[] HEX_DIGITS = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C',
029                                                  'D', 'E', 'F'};
030    
031        static {
032            unsafeCharacters = new BitSet(256);
033            unsafeCharacters.set(' ');
034            unsafeCharacters.set('"');
035            unsafeCharacters.set('<');
036            unsafeCharacters.set('>');
037            unsafeCharacters.set('#');
038            unsafeCharacters.set('%');
039            unsafeCharacters.set('{');
040            unsafeCharacters.set('}');
041            unsafeCharacters.set('|');
042            unsafeCharacters.set('\\');
043            unsafeCharacters.set('^');
044            unsafeCharacters.set('~');
045            unsafeCharacters.set('[');
046            unsafeCharacters.set(']');
047            unsafeCharacters.set('`');
048        }
049    
050        private UnsafeUriCharactersEncoder() {
051            // util class
052        }
053    
054        public static String encode(String s) {
055            int n = s == null ? 0 : s.length();
056            if (n == 0) {
057                return s;
058            }
059    
060            // First check whether we actually need to encode
061            char chars[] = s.toCharArray();
062            for (int i = 0;;) {
063                // just deal with the ascii character
064                if (chars[i] > 0 && chars[i] < 128) {
065                    if (unsafeCharacters.get(chars[i])) {
066                        break;
067                    }
068                }
069                if (++i >= chars.length) {
070                    return s;
071                }
072            }
073    
074            // okay there are some unsafe characters so we do need to encode
075            // see details at: http://en.wikipedia.org/wiki/Url_encode
076            StringBuilder sb = new StringBuilder();
077            for (int i = 0; i < chars.length; i++) {
078                char ch = chars[i];
079                if (ch > 0 && ch < 128 && unsafeCharacters.get(ch)) {
080                    // special for % sign as it may be a decimal encoded value
081                    if (ch == '%') {
082                        char next = i + 1 < chars.length ? chars[i + 1] : ' ';
083                        char next2 = i + 2 < chars.length ? chars[i + 2] : ' ';
084    
085                        if (isHexDigit(next) && isHexDigit(next2)) {
086                            // its already encoded (decimal encoded) so just append as is
087                            sb.append(ch);
088                        } else {
089                            // must escape then, as its an unsafe character
090                            appendEscape(sb, (byte)ch);
091                        }
092                    } else {
093                        // must escape then, as its an unsafe character
094                        appendEscape(sb, (byte)ch);
095                    }
096                } else {
097                    sb.append(ch);
098                }
099            }
100            return sb.toString();
101        }
102    
103        private static void appendEscape(StringBuilder sb, byte b) {
104            sb.append('%');
105            sb.append(HEX_DIGITS[(b >> 4) & 0x0f]);
106            sb.append(HEX_DIGITS[(b >> 0) & 0x0f]);
107        }
108    
109        private static boolean isHexDigit(char ch) {
110            for (char hex : HEX_DIGITS) {
111                if (hex == ch) {
112                    return true;
113                }
114            }
115            return false;
116        }
117    
118    }