001/**
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.camel.support;
018
019import java.io.Closeable;
020import java.io.IOException;
021import java.io.InputStream;
022import java.util.Iterator;
023import java.util.Scanner;
024
025import org.apache.camel.Exchange;
026import org.apache.camel.InvalidPayloadException;
027import org.apache.camel.language.simple.SimpleLanguage;
028import org.apache.camel.util.IOHelper;
029import org.apache.camel.util.ObjectHelper;
030
031/**
032 * {@link org.apache.camel.Expression} to walk a {@link org.apache.camel.Message} body
033 * using an {@link Iterator}, which grabs the content between a start and end token.
034 * <p/>
035 * The message body must be able to convert to {@link InputStream} type which is used as stream
036 * to access the message body.
037 * <p/>
038 * For splitting XML files use {@link org.apache.camel.support.TokenXMLExpressionIterator} instead.
039 */
040public class TokenPairExpressionIterator extends ExpressionAdapter {
041
042    protected final String startToken;
043    protected final String endToken;
044    protected final boolean includeTokens;
045
046    public TokenPairExpressionIterator(String startToken, String endToken, boolean includeTokens) {
047        ObjectHelper.notEmpty(startToken, "startToken");
048        ObjectHelper.notEmpty(endToken, "endToken");
049        this.startToken = startToken;
050        this.endToken = endToken;
051        this.includeTokens = includeTokens;
052    }
053
054    @Override
055    public boolean matches(Exchange exchange) {
056        // as a predicate we must close the stream, as we do not return an iterator that can be used
057        // afterwards to iterate the input stream
058        Object value = doEvaluate(exchange, true);
059        return ObjectHelper.evaluateValuePredicate(value);
060    }
061
062    @Override
063    public Object evaluate(Exchange exchange) {
064        // as we return an iterator to access the input stream, we should not close it
065        return doEvaluate(exchange, false);
066    }
067
068    /**
069     * Strategy to evaluate the exchange
070     *
071     * @param exchange   the exchange
072     * @param closeStream whether to close the stream before returning from this method.
073     * @return the evaluated value
074     */
075    protected Object doEvaluate(Exchange exchange, boolean closeStream) {
076        InputStream in = null;
077        try {
078            in = exchange.getIn().getMandatoryBody(InputStream.class);
079            // we may read from a file, and want to support custom charset defined on the exchange
080            String charset = IOHelper.getCharsetName(exchange);
081            return createIterator(exchange, in, charset);
082        } catch (InvalidPayloadException e) {
083            exchange.setException(e);
084            // must close input stream
085            IOHelper.close(in);
086            return null;
087        } finally {
088            if (closeStream) {
089                IOHelper.close(in);
090            }
091        }
092    }
093
094    /**
095     * Strategy to create the iterator
096     *
097     * @param exchange the exchange
098     * @param in input stream to iterate
099     * @param charset charset
100     * @return the iterator
101     */
102    protected Iterator<?> createIterator(Exchange exchange, InputStream in, String charset) {
103        String start = startToken;
104        if (start != null && SimpleLanguage.hasSimpleFunction(start)) {
105            start = SimpleLanguage.expression(start).evaluate(exchange, String.class);
106        }
107        String end = endToken;
108        if (end != null && SimpleLanguage.hasSimpleFunction(end)) {
109            end = SimpleLanguage.expression(end).evaluate(exchange, String.class);
110        }
111        TokenPairIterator iterator = new TokenPairIterator(start, end, includeTokens, in, charset);
112        iterator.init();
113        return iterator;
114    }
115
116    @Override
117    public String toString() {
118        return "tokenize[body() using tokens: " + startToken + "..." + endToken + "]";
119    }
120
121    /**
122     * Iterator to walk the input stream
123     */
124    static class TokenPairIterator implements Iterator<Object>, Closeable {
125
126        final String startToken;
127        String scanStartToken;
128        final String endToken;
129        String scanEndToken;
130        final boolean includeTokens;
131        final InputStream in;
132        final String charset;
133        Scanner scanner;
134        Object image;
135
136        TokenPairIterator(String startToken, String endToken, boolean includeTokens, InputStream in, String charset) {
137            this.startToken = startToken;
138            this.endToken = endToken;
139            this.includeTokens = includeTokens;
140            this.in = in;
141            this.charset = charset;
142
143            // make sure [ and ] is escaped as we use scanner which is reg exp based
144            // where [ and ] have special meaning
145            scanStartToken = startToken;
146            if (scanStartToken.startsWith("[")) {
147                scanStartToken = "\\" + scanStartToken;
148            }
149            if (scanStartToken.endsWith("]")) {
150                scanStartToken = scanStartToken.substring(0, startToken.length() - 1)  + "\\]";
151            }
152            scanEndToken = endToken;
153            if (scanEndToken.startsWith("[")) {
154                scanEndToken = "\\" + scanEndToken;
155            }
156            if (scanEndToken.endsWith("]")) {
157                scanEndToken = scanEndToken.substring(0, scanEndToken.length() - 1)  + "\\]";
158            }
159        }
160
161        void init() {
162            // use end token as delimiter
163            this.scanner = new Scanner(in, charset).useDelimiter(scanEndToken);
164            // this iterator will do look ahead as we may have data
165            // after the last end token, which the scanner would find
166            // so we need to be one step ahead of the scanner
167            this.image = scanner.hasNext() ? next(true) : null;
168        }
169
170        @Override
171        public boolean hasNext() {
172            return image != null;
173        }
174
175        @Override
176        public Object next() {
177            return next(false);
178        }
179
180        Object next(boolean first) {
181            Object answer = image;
182            // calculate next
183            if (scanner.hasNext()) {
184                image = getNext(first);
185            } else {
186                image = null;
187            }
188
189            if (answer == null) {
190                // first time the image may be null
191                answer = image;
192            }
193            return answer;
194        }
195
196        Object getNext(boolean first) {
197            String next = scanner.next();
198
199            // only grab text after the start token
200            if (next != null && next.contains(startToken)) {
201                next = ObjectHelper.after(next, startToken);
202
203                // include tokens in answer
204                if (next != null && includeTokens) {
205                    StringBuilder sb = new StringBuilder();
206                    next = sb.append(startToken).append(next).append(endToken).toString();
207                }
208            } else {
209                // must have start token, otherwise we have reached beyond last tokens
210                // and should not return more data
211                return null;
212            }
213
214            return next;
215        }
216
217        @Override
218        public void remove() {
219            // noop
220        }
221
222        @Override
223        public void close() throws IOException {
224            scanner.close();
225        }
226    }
227
228}