001/**
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.camel.language.simple;
018
019import java.util.List;
020import java.util.concurrent.CopyOnWriteArrayList;
021
022import org.apache.camel.language.simple.types.SimpleToken;
023import org.apache.camel.language.simple.types.SimpleTokenType;
024import org.apache.camel.language.simple.types.TokenType;
025import org.apache.camel.util.ObjectHelper;
026
027/**
028 * Tokenizer to create {@link SimpleToken} from the input.
029 */
030public final class SimpleTokenizer {
031
032    // use CopyOnWriteArrayList so we can modify it in the for loop when changing function start/end tokens
033    private static final List<SimpleTokenType> KNOWN_TOKENS = new CopyOnWriteArrayList<SimpleTokenType>();
034
035    static {
036        // add known tokens
037        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.whiteSpace, " "));
038        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.whiteSpace, "\t"));
039        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.whiteSpace, "\n"));
040        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.whiteSpace, "\r"));
041        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.singleQuote, "'"));
042        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.doubleQuote, "\""));
043        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.functionStart, "${"));
044        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.functionStart, "$simple{"));
045        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.functionEnd, "}"));
046        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.booleanValue, "true"));
047        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.booleanValue, "false"));
048        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.nullValue, "null"));
049        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.escape, "\\"));
050
051        // binary operators
052        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "=="));
053        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "=~"));
054        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, ">="));
055        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "<="));
056        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, ">"));
057        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "<"));
058        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "!="));
059        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not is"));
060        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "is"));
061        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not contains"));
062        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "contains"));
063        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not regex"));
064        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "regex"));
065        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not in"));
066        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "in"));
067        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "range"));
068        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not range"));
069        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "ends with"));
070
071        // unary operators
072        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.unaryOperator, "++"));
073        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.unaryOperator, "--"));
074
075        // logical operators
076        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.logicalOperator, "&&"));
077        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.logicalOperator, "||"));
078        // TODO: @deprecated logical operators, to be removed in Camel 3.0
079        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.logicalOperator, "and"));
080        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.logicalOperator, "or"));
081    }
082
083    private SimpleTokenizer() {
084        // static methods
085    }
086
087
088    /**
089     * @see SimpleLanguage#changeFunctionStartToken(String...)
090     */
091    public static void changeFunctionStartToken(String... startToken) {
092        for (SimpleTokenType type : KNOWN_TOKENS) {
093            if (type.getType() == TokenType.functionStart) {
094                KNOWN_TOKENS.remove(type);
095            }
096        }
097
098        // add in start of list as its a more common token to be used
099        for (String token : startToken) {
100            KNOWN_TOKENS.add(0, new SimpleTokenType(TokenType.functionStart, token));
101        }
102    }
103
104    /**
105     * @see SimpleLanguage#changeFunctionEndToken(String...)
106     */
107    public static void changeFunctionEndToken(String... endToken) {
108        for (SimpleTokenType type : KNOWN_TOKENS) {
109            if (type.getType() == TokenType.functionEnd) {
110                KNOWN_TOKENS.remove(type);
111            }
112        }
113
114        // add in start of list as its a more common token to be used
115        for (String token : endToken) {
116            KNOWN_TOKENS.add(0, new SimpleTokenType(TokenType.functionEnd, token));
117        }
118    }
119
120    /**
121     * Create the next token
122     *
123     * @param expression  the input expression
124     * @param index       the current index
125     * @param allowEscape whether to allow escapes
126     * @param filter      defines the accepted token types to be returned (character is always used as fallback)
127     * @return the created token, will always return a token
128     */
129    public static SimpleToken nextToken(String expression, int index, boolean allowEscape, TokenType... filter) {
130        return doNextToken(expression, index, allowEscape, filter);
131    }
132
133    /**
134     * Create the next token
135     *
136     * @param expression  the input expression
137     * @param index       the current index
138     * @param allowEscape whether to allow escapes
139     * @return the created token, will always return a token
140     */
141    public static SimpleToken nextToken(String expression, int index, boolean allowEscape) {
142        return doNextToken(expression, index, allowEscape);
143    }
144
145    private static SimpleToken doNextToken(String expression, int index, boolean allowEscape, TokenType... filters) {
146
147        boolean numericAllowed = acceptType(TokenType.numericValue, filters);
148        if (numericAllowed) {
149            // is it a numeric value
150            StringBuilder sb = new StringBuilder();
151            boolean digit = true;
152            while (digit && index < expression.length()) {
153                digit = Character.isDigit(expression.charAt(index));
154                if (digit) {
155                    char ch = expression.charAt(index);
156                    sb.append(ch);
157                    index++;
158                    continue;
159                }
160                // is it a dot or comma as part of a floating point number
161                boolean decimalSeparator = '.' == expression.charAt(index) || ',' == expression.charAt(index);
162                if (decimalSeparator && sb.length() > 0) {
163                    char ch = expression.charAt(index);
164                    sb.append(ch);
165                    index++;
166                    // assume its still a digit
167                    digit = true;
168                    continue;
169                }
170            }
171            if (sb.length() > 0) {
172                return new SimpleToken(new SimpleTokenType(TokenType.numericValue, sb.toString()), index);
173            }
174        }
175
176        boolean escapeAllowed = allowEscape && acceptType(TokenType.escape, filters);
177        if (escapeAllowed) {
178            StringBuilder sb = new StringBuilder();
179            char ch = expression.charAt(index);
180            boolean escaped = '\\' == ch;
181            if (escaped && index < expression.length() - 1) {
182                // grab next character to escape
183                char next = expression.charAt(++index);
184                // special for new line, tabs and carriage return
185                boolean special = false;
186                if ('n' == next) {
187                    sb.append("\n");
188                    special = true;
189                } else if ('t' == next) {
190                    sb.append("\t");
191                    special = true;
192                } else if ('r' == next) {
193                    sb.append("\r");
194                    special = true;
195                } else {
196                    // not special just a regular character
197                    sb.append(ch);
198                }
199
200                // force 2 as length if special
201                return new SimpleToken(new SimpleTokenType(TokenType.character, sb.toString()), index, special ? 2 : 1);
202            }
203        }
204
205        // it could be any of the known tokens
206        String text = expression.substring(index);
207        for (SimpleTokenType token : KNOWN_TOKENS) {
208            if (acceptType(token.getType(), filters)) {
209                if (acceptToken(token, text, expression, index)) {
210                    return new SimpleToken(token, index);
211                }
212            }
213        }
214
215        // fallback and create a character token
216        char ch = expression.charAt(index);
217        SimpleToken token = new SimpleToken(new SimpleTokenType(TokenType.character, "" + ch), index);
218        return token;
219    }
220
221    private static boolean acceptType(TokenType type, TokenType... filters) {
222        if (filters == null || filters.length == 0) {
223            return true;
224        }
225        for (TokenType filter : filters) {
226            if (type == filter) {
227                return true;
228            }
229        }
230        return false;
231    }
232
233    private static boolean acceptToken(SimpleTokenType token, String text, String expression, int index) {
234        if (token.isUnary() && text.startsWith(token.getValue())) {
235            SimpleTokenType functionEndToken = getFunctionEndToken();
236            if (functionEndToken != null) {
237                int endLen = functionEndToken.getValue().length();
238
239                // special check for unary as the previous must be a function end, and the next a whitespace
240                // to ensure unary operators is only applied on functions as intended
241                int len = token.getValue().length();
242
243                String previous = "";
244                if (index - endLen >= 0) {
245                    previous = expression.substring(index - endLen, index);
246                }
247                String after = text.substring(len);
248                boolean whiteSpace = ObjectHelper.isEmpty(after) || after.startsWith(" ");
249                boolean functionEnd = previous.equals(functionEndToken.getValue());
250                return functionEnd && whiteSpace;
251            }
252        }
253
254        return text.startsWith(token.getValue());
255    }
256
257    private static SimpleTokenType getFunctionEndToken() {
258        for (SimpleTokenType token : KNOWN_TOKENS) {
259            if (token.isFunctionEnd()) {
260                return token;
261            }
262        }
263        return null;
264    }
265
266}