001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.camel.language.simple;
018    
019    import java.util.List;
020    import java.util.concurrent.CopyOnWriteArrayList;
021    
022    import org.apache.camel.language.simple.types.SimpleToken;
023    import org.apache.camel.language.simple.types.SimpleTokenType;
024    import org.apache.camel.language.simple.types.TokenType;
025    
026    /**
027     * Tokenizer to create {@link SimpleToken} from the input.
028     */
029    public final class SimpleTokenizer {
030    
031        // use CopyOnWriteArrayList so we can modify it in the for loop when changing function start/end tokens
032        private static final List<SimpleTokenType> KNOWN_TOKENS = new CopyOnWriteArrayList<SimpleTokenType>();
033    
034        static {
035            // add known tokens
036            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.whiteSpace, " "));
037            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.whiteSpace, "\t"));
038            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.whiteSpace, "\n"));
039            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.whiteSpace, "\r"));
040            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.singleQuote, "'"));
041            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.doubleQuote, "\""));
042            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.functionStart, "${"));
043            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.functionStart, "$simple{"));
044            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.functionEnd, "}"));
045            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.booleanValue, "true"));
046            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.booleanValue, "false"));
047            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.nullValue, "null"));
048            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.escape, "\\"));
049    
050            // binary operators
051            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "=="));
052            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, ">="));
053            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "<="));
054            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, ">"));
055            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "<"));
056            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "!="));
057            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not is"));
058            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "is"));
059            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not contains"));
060            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "contains"));
061            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not regex"));
062            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "regex"));
063            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not in"));
064            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "in"));
065            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "range"));
066            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not range"));
067    
068            // unary operators
069            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.unaryOperator, "++"));
070            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.unaryOperator, "--"));
071    
072            // logical operators
073            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.logicalOperator, "&&"));
074            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.logicalOperator, "||"));
075            // TODO: @deprecated logical operators, to be removed in Camel 3.0
076            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.logicalOperator, "and"));
077            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.logicalOperator, "or"));
078        }
079    
080        private SimpleTokenizer() {
081            // static methods
082        }
083    
084    
085        /**
086         * @see SimpleLanguage#changeFunctionStartToken(String...)
087         */
088        public static void changeFunctionStartToken(String... startToken) {
089            for (SimpleTokenType type : KNOWN_TOKENS) {
090                if (type.getType() == TokenType.functionStart) {
091                    KNOWN_TOKENS.remove(type);
092                }
093            }
094    
095            // add in start of list as its a more common token to be used
096            for (String token : startToken) {
097                KNOWN_TOKENS.add(0, new SimpleTokenType(TokenType.functionStart, token));
098            }
099        }
100    
101        /**
102         * @see SimpleLanguage#changeFunctionEndToken(String...)
103         */
104        public static void changeFunctionEndToken(String... endToken) {
105            for (SimpleTokenType type : KNOWN_TOKENS) {
106                if (type.getType() == TokenType.functionEnd) {
107                    KNOWN_TOKENS.remove(type);
108                }
109            }
110    
111            // add in start of list as its a more common token to be used
112            for (String token : endToken) {
113                KNOWN_TOKENS.add(0, new SimpleTokenType(TokenType.functionEnd, token));
114            }
115        }
116    
117        /**
118         * Create the next token
119         *
120         * @param expression  the input expression
121         * @param index       the current index
122         * @param allowEscape whether to allow escapes
123         * @param filter      defines the accepted token types to be returned (character is always used as fallback)
124         * @return the created token, will always return a token
125         */
126        public static SimpleToken nextToken(String expression, int index, boolean allowEscape, TokenType... filter) {
127            return doNextToken(expression, index, allowEscape, filter);
128        }
129    
130        /**
131         * Create the next token
132         *
133         * @param expression  the input expression
134         * @param index       the current index
135         * @param allowEscape whether to allow escapes
136         * @return the created token, will always return a token
137         */
138        public static SimpleToken nextToken(String expression, int index, boolean allowEscape) {
139            return doNextToken(expression, index, allowEscape);
140        }
141    
142        private static SimpleToken doNextToken(String expression, int index, boolean allowEscape, TokenType... filters) {
143    
144            boolean numericAllowed = acceptType(TokenType.numericValue, filters);
145            if (numericAllowed) {
146                // is it a numeric value
147                StringBuilder sb = new StringBuilder();
148                boolean digit = true;
149                while (digit && index < expression.length()) {
150                    digit = Character.isDigit(expression.charAt(index));
151                    if (digit) {
152                        char ch = expression.charAt(index);
153                        sb.append(ch);
154                        index++;
155                        continue;
156                    }
157                    // is it a dot or comma as part of a floating point number
158                    boolean decimalSeparator = '.' == expression.charAt(index) || ',' == expression.charAt(index);
159                    if (decimalSeparator && sb.length() > 0) {
160                        char ch = expression.charAt(index);
161                        sb.append(ch);
162                        index++;
163                        // assume its still a digit
164                        digit = true;
165                        continue;
166                    }
167                }
168                if (sb.length() > 0) {
169                    return new SimpleToken(new SimpleTokenType(TokenType.numericValue, sb.toString()), index);
170                }
171            }
172    
173            boolean escapeAllowed = allowEscape && acceptType(TokenType.escape, filters);
174            if (escapeAllowed) {
175                StringBuilder sb = new StringBuilder();
176                char ch = expression.charAt(index);
177                boolean escaped = '\\' == ch;
178                if (escaped && index < expression.length() - 1) {
179                    // grab next character to escape
180                    char next = expression.charAt(++index);
181                    // special for new line, tabs and carriage return
182                    if ('n' == next) {
183                        sb.append("\n");
184                    } else if ('t' == next) {
185                        sb.append("\t");
186                    } else if ('r' == next) {
187                        sb.append("\r");
188                    } else {
189                        // append the next
190                        sb.append(next);
191                    }
192                    // force 2 as length
193                    return new SimpleToken(new SimpleTokenType(TokenType.character, sb.toString()), index, 2);
194                }
195            }
196    
197            // it could be any of the known tokens
198            String text = expression.substring(index);
199            for (SimpleTokenType token : KNOWN_TOKENS) {
200                if (acceptType(token.getType(), filters)) {
201                    if (text.startsWith(token.getValue())) {
202                        return new SimpleToken(token, index);
203                    }
204                }
205            }
206    
207            // fallback and create a character token
208            char ch = expression.charAt(index);
209            SimpleToken token = new SimpleToken(new SimpleTokenType(TokenType.character, "" + ch), index);
210            return token;
211        }
212    
213        private static boolean acceptType(TokenType type, TokenType... filters) {
214            if (filters == null || filters.length == 0) {
215                return true;
216            }
217            for (TokenType filter : filters) {
218                if (type == filter) {
219                    return true;
220                }
221            }
222            return false;
223        }
224    
225    }