001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 package org.apache.camel.language.simple; 018 019 import java.util.List; 020 import java.util.concurrent.CopyOnWriteArrayList; 021 022 import org.apache.camel.language.simple.types.SimpleToken; 023 import org.apache.camel.language.simple.types.SimpleTokenType; 024 import org.apache.camel.language.simple.types.TokenType; 025 026 /** 027 * Tokenizer to create {@link SimpleToken} from the input. 028 */ 029 public final class SimpleTokenizer { 030 031 // use CopyOnWriteArrayList so we can modify it in the for loop when changing function start/end tokens 032 private static final List<SimpleTokenType> KNOWN_TOKENS = new CopyOnWriteArrayList<SimpleTokenType>(); 033 034 static { 035 // add known tokens 036 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.whiteSpace, " ")); 037 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.whiteSpace, "\t")); 038 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.whiteSpace, "\n")); 039 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.whiteSpace, "\r")); 040 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.singleQuote, "'")); 041 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.doubleQuote, "\"")); 042 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.functionStart, "${")); 043 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.functionStart, "$simple{")); 044 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.functionEnd, "}")); 045 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.booleanValue, "true")); 046 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.booleanValue, "false")); 047 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.nullValue, "null")); 048 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.escape, "\\")); 049 050 // binary operators 051 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "==")); 052 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, ">=")); 053 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "<=")); 054 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, ">")); 055 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "<")); 056 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "!=")); 057 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not is")); 058 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "is")); 059 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not contains")); 060 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "contains")); 061 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not regex")); 062 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "regex")); 063 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not in")); 064 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "in")); 065 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "range")); 066 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not range")); 067 068 // unary operators 069 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.unaryOperator, "++")); 070 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.unaryOperator, "--")); 071 072 // logical operators 073 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.logicalOperator, "&&")); 074 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.logicalOperator, "||")); 075 // TODO: @deprecated logical operators, to be removed in Camel 3.0 076 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.logicalOperator, "and")); 077 KNOWN_TOKENS.add(new SimpleTokenType(TokenType.logicalOperator, "or")); 078 } 079 080 private SimpleTokenizer() { 081 // static methods 082 } 083 084 085 /** 086 * @see SimpleLanguage#changeFunctionStartToken(String...) 087 */ 088 public static void changeFunctionStartToken(String... startToken) { 089 for (SimpleTokenType type : KNOWN_TOKENS) { 090 if (type.getType() == TokenType.functionStart) { 091 KNOWN_TOKENS.remove(type); 092 } 093 } 094 095 // add in start of list as its a more common token to be used 096 for (String token : startToken) { 097 KNOWN_TOKENS.add(0, new SimpleTokenType(TokenType.functionStart, token)); 098 } 099 } 100 101 /** 102 * @see SimpleLanguage#changeFunctionEndToken(String...) 103 */ 104 public static void changeFunctionEndToken(String... endToken) { 105 for (SimpleTokenType type : KNOWN_TOKENS) { 106 if (type.getType() == TokenType.functionEnd) { 107 KNOWN_TOKENS.remove(type); 108 } 109 } 110 111 // add in start of list as its a more common token to be used 112 for (String token : endToken) { 113 KNOWN_TOKENS.add(0, new SimpleTokenType(TokenType.functionEnd, token)); 114 } 115 } 116 117 /** 118 * Create the next token 119 * 120 * @param expression the input expression 121 * @param index the current index 122 * @param allowEscape whether to allow escapes 123 * @param filter defines the accepted token types to be returned (character is always used as fallback) 124 * @return the created token, will always return a token 125 */ 126 public static SimpleToken nextToken(String expression, int index, boolean allowEscape, TokenType... filter) { 127 return doNextToken(expression, index, allowEscape, filter); 128 } 129 130 /** 131 * Create the next token 132 * 133 * @param expression the input expression 134 * @param index the current index 135 * @param allowEscape whether to allow escapes 136 * @return the created token, will always return a token 137 */ 138 public static SimpleToken nextToken(String expression, int index, boolean allowEscape) { 139 return doNextToken(expression, index, allowEscape); 140 } 141 142 private static SimpleToken doNextToken(String expression, int index, boolean allowEscape, TokenType... filters) { 143 144 boolean numericAllowed = acceptType(TokenType.numericValue, filters); 145 if (numericAllowed) { 146 // is it a numeric value 147 StringBuilder sb = new StringBuilder(); 148 boolean digit = true; 149 while (digit && index < expression.length()) { 150 digit = Character.isDigit(expression.charAt(index)); 151 if (digit) { 152 char ch = expression.charAt(index); 153 sb.append(ch); 154 index++; 155 continue; 156 } 157 // is it a dot or comma as part of a floating point number 158 boolean decimalSeparator = '.' == expression.charAt(index) || ',' == expression.charAt(index); 159 if (decimalSeparator && sb.length() > 0) { 160 char ch = expression.charAt(index); 161 sb.append(ch); 162 index++; 163 // assume its still a digit 164 digit = true; 165 continue; 166 } 167 } 168 if (sb.length() > 0) { 169 return new SimpleToken(new SimpleTokenType(TokenType.numericValue, sb.toString()), index); 170 } 171 } 172 173 boolean escapeAllowed = allowEscape && acceptType(TokenType.escape, filters); 174 if (escapeAllowed) { 175 StringBuilder sb = new StringBuilder(); 176 char ch = expression.charAt(index); 177 boolean escaped = '\\' == ch; 178 if (escaped && index < expression.length() - 1) { 179 // grab next character to escape 180 char next = expression.charAt(++index); 181 // special for new line, tabs and carriage return 182 if ('n' == next) { 183 sb.append("\n"); 184 } else if ('t' == next) { 185 sb.append("\t"); 186 } else if ('r' == next) { 187 sb.append("\r"); 188 } else { 189 // append the next 190 sb.append(next); 191 } 192 // force 2 as length 193 return new SimpleToken(new SimpleTokenType(TokenType.character, sb.toString()), index, 2); 194 } 195 } 196 197 // it could be any of the known tokens 198 String text = expression.substring(index); 199 for (SimpleTokenType token : KNOWN_TOKENS) { 200 if (acceptType(token.getType(), filters)) { 201 if (text.startsWith(token.getValue())) { 202 return new SimpleToken(token, index); 203 } 204 } 205 } 206 207 // fallback and create a character token 208 char ch = expression.charAt(index); 209 SimpleToken token = new SimpleToken(new SimpleTokenType(TokenType.character, "" + ch), index); 210 return token; 211 } 212 213 private static boolean acceptType(TokenType type, TokenType... filters) { 214 if (filters == null || filters.length == 0) { 215 return true; 216 } 217 for (TokenType filter : filters) { 218 if (type == filter) { 219 return true; 220 } 221 } 222 return false; 223 } 224 225 }