001/** 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.camel.runtimecatalog; 018 019import java.util.ArrayList; 020import java.util.BitSet; 021import java.util.List; 022 023/** 024 * Encoder for unsafe URI characters. 025 * <p/> 026 * A good source for details is 027 * <a href="http://en.wikipedia.org/wiki/Url_encode">wikipedia url encode</a> 028 * article. 029 */ 030public final class UnsafeUriCharactersEncoder { 031 private static BitSet unsafeCharactersRfc1738; 032 private static BitSet unsafeCharactersHttp; 033 private static final char[] HEX_DIGITS = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'a', 'b', 'c', 'd', 'e', 'f'}; 034 035 static { 036 unsafeCharactersRfc1738 = new BitSet(256); 037 unsafeCharactersRfc1738.set(' '); 038 unsafeCharactersRfc1738.set('"'); 039 unsafeCharactersRfc1738.set('<'); 040 unsafeCharactersRfc1738.set('>'); 041 unsafeCharactersRfc1738.set('#'); 042 unsafeCharactersRfc1738.set('%'); 043 unsafeCharactersRfc1738.set('{'); 044 unsafeCharactersRfc1738.set('}'); 045 unsafeCharactersRfc1738.set('|'); 046 unsafeCharactersRfc1738.set('\\'); 047 unsafeCharactersRfc1738.set('^'); 048 unsafeCharactersRfc1738.set('~'); 049 unsafeCharactersRfc1738.set('['); 050 unsafeCharactersRfc1738.set(']'); 051 unsafeCharactersRfc1738.set('`'); 052 } 053 054 static { 055 unsafeCharactersHttp = new BitSet(256); 056 unsafeCharactersHttp.set(' '); 057 unsafeCharactersHttp.set('"'); 058 unsafeCharactersHttp.set('<'); 059 unsafeCharactersHttp.set('>'); 060 unsafeCharactersHttp.set('#'); 061 unsafeCharactersHttp.set('%'); 062 unsafeCharactersHttp.set('{'); 063 unsafeCharactersHttp.set('}'); 064 unsafeCharactersHttp.set('|'); 065 unsafeCharactersHttp.set('\\'); 066 unsafeCharactersHttp.set('^'); 067 unsafeCharactersHttp.set('~'); 068 unsafeCharactersHttp.set('`'); 069 } 070 071 private UnsafeUriCharactersEncoder() { 072 // util class 073 } 074 075 public static String encode(String s) { 076 return encode(s, unsafeCharactersRfc1738); 077 } 078 079 public static String encodeHttpURI(String s) { 080 return encode(s, unsafeCharactersHttp); 081 } 082 083 public static String encode(String s, BitSet unsafeCharacters) { 084 return encode(s, unsafeCharacters, false); 085 } 086 087 public static String encode(String s, boolean checkRaw) { 088 return encode(s, unsafeCharactersRfc1738, checkRaw); 089 } 090 091 public static String encodeHttpURI(String s, boolean checkRaw) { 092 return encode(s, unsafeCharactersHttp, checkRaw); 093 } 094 095 // Just skip the encode for isRAW part 096 public static String encode(String s, BitSet unsafeCharacters, boolean checkRaw) { 097 List<Pair<Integer>> rawPairs; 098 if (checkRaw) { 099 rawPairs = URISupport.scanRaw(s); 100 } else { 101 rawPairs = new ArrayList<>(); 102 } 103 104 int n = s == null ? 0 : s.length(); 105 if (n == 0) { 106 return s; 107 } 108 109 // First check whether we actually need to encode 110 char chars[] = s.toCharArray(); 111 for (int i = 0;;) { 112 // just deal with the ascii character 113 if (chars[i] > 0 && chars[i] < 128) { 114 if (unsafeCharacters.get(chars[i])) { 115 break; 116 } 117 } 118 if (++i >= chars.length) { 119 return s; 120 } 121 } 122 123 // okay there are some unsafe characters so we do need to encode 124 // see details at: http://en.wikipedia.org/wiki/Url_encode 125 StringBuilder sb = new StringBuilder(); 126 for (int i = 0; i < chars.length; i++) { 127 char ch = chars[i]; 128 if (ch > 0 && ch < 128 && unsafeCharacters.get(ch)) { 129 // special for % sign as it may be a decimal encoded value 130 if (ch == '%') { 131 char next = i + 1 < chars.length ? chars[i + 1] : ' '; 132 char next2 = i + 2 < chars.length ? chars[i + 2] : ' '; 133 134 if (isHexDigit(next) && isHexDigit(next2) && !URISupport.isRaw(i, rawPairs)) { 135 // its already encoded (decimal encoded) so just append 136 // as is 137 sb.append(ch); 138 } else { 139 // must escape then, as its an unsafe character 140 appendEscape(sb, (byte)ch); 141 } 142 } else { 143 // must escape then, as its an unsafe character 144 appendEscape(sb, (byte)ch); 145 } 146 } else { 147 sb.append(ch); 148 } 149 } 150 return sb.toString(); 151 } 152 153 private static void appendEscape(StringBuilder sb, byte b) { 154 sb.append('%'); 155 sb.append(HEX_DIGITS[(b >> 4) & 0x0f]); 156 sb.append(HEX_DIGITS[(b >> 0) & 0x0f]); 157 } 158 159 private static boolean isHexDigit(char ch) { 160 for (char hex : HEX_DIGITS) { 161 if (hex == ch) { 162 return true; 163 } 164 } 165 return false; 166 } 167 168}