001/**
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.camel.language.simple;
018
019import java.util.List;
020import java.util.concurrent.CopyOnWriteArrayList;
021
022import org.apache.camel.language.simple.types.SimpleToken;
023import org.apache.camel.language.simple.types.SimpleTokenType;
024import org.apache.camel.language.simple.types.TokenType;
025import org.apache.camel.util.ObjectHelper;
026
027/**
028 * Tokenizer to create {@link SimpleToken} from the input.
029 */
030public final class SimpleTokenizer {
031
032    // use CopyOnWriteArrayList so we can modify it in the for loop when changing function start/end tokens
033    private static final List<SimpleTokenType> KNOWN_TOKENS = new CopyOnWriteArrayList<SimpleTokenType>();
034
035    static {
036        // add known tokens
037        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.functionStart, "${"));
038        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.functionStart, "$simple{"));
039        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.functionEnd, "}"));
040        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.whiteSpace, " "));
041        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.whiteSpace, "\t"));
042        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.whiteSpace, "\n"));
043        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.whiteSpace, "\r"));
044        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.singleQuote, "'"));
045        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.doubleQuote, "\""));
046        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.booleanValue, "true"));
047        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.booleanValue, "false"));
048        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.nullValue, "null"));
049        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.escape, "\\"));
050
051        // binary operators
052        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "=="));
053        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "=~"));
054        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, ">="));
055        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "<="));
056        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, ">"));
057        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "<"));
058        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "!="));
059        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not is"));
060        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "is"));
061        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not contains"));
062        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "contains"));
063        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not regex"));
064        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "regex"));
065        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not in"));
066        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "in"));
067        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "range"));
068        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not range"));
069        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "starts with"));
070        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "ends with"));
071
072        // unary operators
073        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.unaryOperator, "++"));
074        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.unaryOperator, "--"));
075
076        // logical operators
077        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.logicalOperator, "&&"));
078        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.logicalOperator, "||"));
079        // TODO: @deprecated logical operators, to be removed in Camel 3.0
080        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.logicalOperator, "and"));
081        KNOWN_TOKENS.add(new SimpleTokenType(TokenType.logicalOperator, "or"));
082    }
083
084    private SimpleTokenizer() {
085        // static methods
086    }
087
088    /**
089     * Does the expression include a simple function.
090     *
091     * @param expression the expression
092     * @return <tt>true</tt> if one or more simple function is included in the expression
093     */
094    public static boolean hasFunctionStartToken(String expression) {
095        if (expression != null) {
096            for (SimpleTokenType type : KNOWN_TOKENS) {
097                if (type.getType() == TokenType.functionStart) {
098                    if (expression.contains(type.getValue())) {
099                        return true;
100                    }
101                } else {
102                    // function start are always first
103                    return false;
104                }
105            }
106        }
107        return false;
108    }
109
110    /**
111     * @see SimpleLanguage#changeFunctionStartToken(String...)
112     */
113    public static void changeFunctionStartToken(String... startToken) {
114        for (SimpleTokenType type : KNOWN_TOKENS) {
115            if (type.getType() == TokenType.functionStart) {
116                KNOWN_TOKENS.remove(type);
117            }
118        }
119
120        // add in start of list as its a more common token to be used
121        for (String token : startToken) {
122            KNOWN_TOKENS.add(0, new SimpleTokenType(TokenType.functionStart, token));
123        }
124    }
125
126    /**
127     * @see SimpleLanguage#changeFunctionEndToken(String...)
128     */
129    public static void changeFunctionEndToken(String... endToken) {
130        for (SimpleTokenType type : KNOWN_TOKENS) {
131            if (type.getType() == TokenType.functionEnd) {
132                KNOWN_TOKENS.remove(type);
133            }
134        }
135
136        // add after the start tokens
137        int pos = 0;
138        for (SimpleTokenType type : KNOWN_TOKENS) {
139            if (type.getType() == TokenType.functionStart) {
140                pos++;
141            }
142        }
143
144        // add after function start of list as its a more common token to be used
145        for (String token : endToken) {
146            KNOWN_TOKENS.add(pos, new SimpleTokenType(TokenType.functionEnd, token));
147        }
148    }
149
150    /**
151     * Create the next token
152     *
153     * @param expression  the input expression
154     * @param index       the current index
155     * @param allowEscape whether to allow escapes
156     * @param filter      defines the accepted token types to be returned (character is always used as fallback)
157     * @return the created token, will always return a token
158     */
159    public static SimpleToken nextToken(String expression, int index, boolean allowEscape, TokenType... filter) {
160        return doNextToken(expression, index, allowEscape, filter);
161    }
162
163    /**
164     * Create the next token
165     *
166     * @param expression  the input expression
167     * @param index       the current index
168     * @param allowEscape whether to allow escapes
169     * @return the created token, will always return a token
170     */
171    public static SimpleToken nextToken(String expression, int index, boolean allowEscape) {
172        return doNextToken(expression, index, allowEscape);
173    }
174
175    private static SimpleToken doNextToken(String expression, int index, boolean allowEscape, TokenType... filters) {
176
177        boolean numericAllowed = acceptType(TokenType.numericValue, filters);
178        if (numericAllowed) {
179            // is it a numeric value
180            StringBuilder sb = new StringBuilder();
181            boolean digit = true;
182            while (digit && index < expression.length()) {
183                digit = Character.isDigit(expression.charAt(index));
184                if (digit) {
185                    char ch = expression.charAt(index);
186                    sb.append(ch);
187                    index++;
188                    continue;
189                }
190                // is it a dot or comma as part of a floating point number
191                boolean decimalSeparator = '.' == expression.charAt(index) || ',' == expression.charAt(index);
192                if (decimalSeparator && sb.length() > 0) {
193                    char ch = expression.charAt(index);
194                    sb.append(ch);
195                    index++;
196                    // assume its still a digit
197                    digit = true;
198                    continue;
199                }
200            }
201            if (sb.length() > 0) {
202                return new SimpleToken(new SimpleTokenType(TokenType.numericValue, sb.toString()), index);
203            }
204        }
205
206        boolean escapeAllowed = allowEscape && acceptType(TokenType.escape, filters);
207        if (escapeAllowed) {
208            StringBuilder sb = new StringBuilder();
209            char ch = expression.charAt(index);
210            boolean escaped = '\\' == ch;
211            if (escaped && index < expression.length() - 1) {
212                // grab next character to escape
213                char next = expression.charAt(++index);
214                // special for new line, tabs and carriage return
215                boolean special = false;
216                if ('n' == next) {
217                    sb.append("\n");
218                    special = true;
219                } else if ('t' == next) {
220                    sb.append("\t");
221                    special = true;
222                } else if ('r' == next) {
223                    sb.append("\r");
224                    special = true;
225                } else if ('}' == next) {
226                    sb.append("}");
227                    special = true;
228                } else {
229                    // not special just a regular character
230                    sb.append(ch);
231                }
232
233                // force 2 as length if special
234                return new SimpleToken(new SimpleTokenType(TokenType.character, sb.toString()), index, special ? 2 : 1);
235            }
236        }
237
238        // it could be any of the known tokens
239        String text = expression.substring(index);
240        for (SimpleTokenType token : KNOWN_TOKENS) {
241            if (acceptType(token.getType(), filters)) {
242                if (acceptToken(token, text, expression, index)) {
243                    return new SimpleToken(token, index);
244                }
245            }
246        }
247
248        // fallback and create a character token
249        char ch = expression.charAt(index);
250        SimpleToken token = new SimpleToken(new SimpleTokenType(TokenType.character, "" + ch), index);
251        return token;
252    }
253
254    private static boolean acceptType(TokenType type, TokenType... filters) {
255        if (filters == null || filters.length == 0) {
256            return true;
257        }
258        for (TokenType filter : filters) {
259            if (type == filter) {
260                return true;
261            }
262        }
263        return false;
264    }
265
266    private static boolean acceptToken(SimpleTokenType token, String text, String expression, int index) {
267        if (token.isUnary() && text.startsWith(token.getValue())) {
268            SimpleTokenType functionEndToken = getFunctionEndToken();
269            if (functionEndToken != null) {
270                int endLen = functionEndToken.getValue().length();
271
272                // special check for unary as the previous must be a function end, and the next a whitespace
273                // to ensure unary operators is only applied on functions as intended
274                int len = token.getValue().length();
275
276                String previous = "";
277                if (index - endLen >= 0) {
278                    previous = expression.substring(index - endLen, index);
279                }
280                String after = text.substring(len);
281                boolean whiteSpace = ObjectHelper.isEmpty(after) || after.startsWith(" ");
282                boolean functionEnd = previous.equals(functionEndToken.getValue());
283                return functionEnd && whiteSpace;
284            }
285        }
286
287        return text.startsWith(token.getValue());
288    }
289
290    private static SimpleTokenType getFunctionEndToken() {
291        for (SimpleTokenType token : KNOWN_TOKENS) {
292            if (token.isFunctionEnd()) {
293                return token;
294            }
295        }
296        return null;
297    }
298
299}