001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 package org.apache.camel.util; 018 019 import java.util.BitSet; 020 021 /** 022 * Encoder for unsafe URI characters. 023 * <p/> 024 * A good source for details is <a href="http://en.wikipedia.org/wiki/Url_encode">wikipedia url encode</a> article. 025 */ 026 public final class UnsafeUriCharactersEncoder { 027 private static BitSet unsafeCharacters; 028 private static final char[] HEX_DIGITS = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 029 'D', 'E', 'F'}; 030 031 static { 032 unsafeCharacters = new BitSet(256); 033 unsafeCharacters.set(' '); 034 unsafeCharacters.set('"'); 035 unsafeCharacters.set('<'); 036 unsafeCharacters.set('>'); 037 unsafeCharacters.set('#'); 038 unsafeCharacters.set('%'); 039 unsafeCharacters.set('{'); 040 unsafeCharacters.set('}'); 041 unsafeCharacters.set('|'); 042 unsafeCharacters.set('\\'); 043 unsafeCharacters.set('^'); 044 unsafeCharacters.set('~'); 045 unsafeCharacters.set('['); 046 unsafeCharacters.set(']'); 047 unsafeCharacters.set('`'); 048 } 049 050 private UnsafeUriCharactersEncoder() { 051 // util class 052 } 053 054 public static String encode(String s) { 055 int n = s == null ? 0 : s.length(); 056 if (n == 0) { 057 return s; 058 } 059 060 // First check whether we actually need to encode 061 char chars[] = s.toCharArray(); 062 for (int i = 0;;) { 063 // just deal with the ascii character 064 if (chars[i] > 0 && chars[i] < 128) { 065 if (unsafeCharacters.get(chars[i])) { 066 break; 067 } 068 } 069 if (++i >= chars.length) { 070 return s; 071 } 072 } 073 074 // okay there are some unsafe characters so we do need to encode 075 // see details at: http://en.wikipedia.org/wiki/Url_encode 076 StringBuilder sb = new StringBuilder(); 077 for (int i = 0; i < chars.length; i++) { 078 char ch = chars[i]; 079 if (ch > 0 && ch < 128 && unsafeCharacters.get(ch)) { 080 // special for % sign as it may be a decimal encoded value 081 if (ch == '%') { 082 char next = i + 1 < chars.length ? chars[i + 1] : ' '; 083 char next2 = i + 2 < chars.length ? chars[i + 2] : ' '; 084 085 if (isHexDigit(next) && isHexDigit(next2)) { 086 // its already encoded (decimal encoded) so just append as is 087 sb.append(ch); 088 } else { 089 // must escape then, as its an unsafe character 090 appendEscape(sb, (byte)ch); 091 } 092 } else { 093 // must escape then, as its an unsafe character 094 appendEscape(sb, (byte)ch); 095 } 096 } else { 097 sb.append(ch); 098 } 099 } 100 return sb.toString(); 101 } 102 103 private static void appendEscape(StringBuilder sb, byte b) { 104 sb.append('%'); 105 sb.append(HEX_DIGITS[(b >> 4) & 0x0f]); 106 sb.append(HEX_DIGITS[(b >> 0) & 0x0f]); 107 } 108 109 private static boolean isHexDigit(char ch) { 110 for (char hex : HEX_DIGITS) { 111 if (hex == ch) { 112 return true; 113 } 114 } 115 return false; 116 } 117 118 }