001/**
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.camel.support;
018
019import java.io.Closeable;
020import java.io.IOException;
021import java.io.InputStream;
022import java.util.Iterator;
023import java.util.Scanner;
024
025import org.apache.camel.Exchange;
026import org.apache.camel.InvalidPayloadException;
027import org.apache.camel.util.IOHelper;
028import org.apache.camel.util.ObjectHelper;
029
030/**
031 * {@link org.apache.camel.Expression} to walk a {@link org.apache.camel.Message} body
032 * using an {@link Iterator}, which grabs the content between a start and end token.
033 * <p/>
034 * The message body must be able to convert to {@link InputStream} type which is used as stream
035 * to access the message body.
036 * <p/>
037 * For splitting XML files use {@link org.apache.camel.support.TokenXMLExpressionIterator} instead.
038 */
039public class TokenPairExpressionIterator extends ExpressionAdapter {
040
041    protected final String startToken;
042    protected final String endToken;
043    protected final boolean includeTokens;
044
045    public TokenPairExpressionIterator(String startToken, String endToken, boolean includeTokens) {
046        ObjectHelper.notEmpty(startToken, "startToken");
047        ObjectHelper.notEmpty(endToken, "endToken");
048        this.startToken = startToken;
049        this.endToken = endToken;
050        this.includeTokens = includeTokens;
051    }
052
053    @Override
054    public boolean matches(Exchange exchange) {
055        // as a predicate we must close the stream, as we do not return an iterator that can be used
056        // afterwards to iterate the input stream
057        Object value = doEvaluate(exchange, true);
058        return ObjectHelper.evaluateValuePredicate(value);
059    }
060
061    @Override
062    public Object evaluate(Exchange exchange) {
063        // as we return an iterator to access the input stream, we should not close it
064        return doEvaluate(exchange, false);
065    }
066
067    /**
068     * Strategy to evaluate the exchange
069     *
070     * @param exchange   the exchange
071     * @param closeStream whether to close the stream before returning from this method.
072     * @return the evaluated value
073     */
074    protected Object doEvaluate(Exchange exchange, boolean closeStream) {
075        InputStream in = null;
076        try {
077            in = exchange.getIn().getMandatoryBody(InputStream.class);
078            // we may read from a file, and want to support custom charset defined on the exchange
079            String charset = IOHelper.getCharsetName(exchange);
080            return createIterator(in, charset);
081        } catch (InvalidPayloadException e) {
082            exchange.setException(e);
083            // must close input stream
084            IOHelper.close(in);
085            return null;
086        } finally {
087            if (closeStream) {
088                IOHelper.close(in);
089            }
090        }
091    }
092
093    /**
094     * Strategy to create the iterator
095     *
096     * @param in input stream to iterate
097     * @param charset charset
098     * @return the iterator
099     */
100    protected Iterator<?> createIterator(InputStream in, String charset) {
101        TokenPairIterator iterator = new TokenPairIterator(startToken, endToken, includeTokens, in, charset);
102        iterator.init();
103        return iterator;
104    }
105
106    @Override
107    public String toString() {
108        return "tokenize[body() using tokens: " + startToken + "..." + endToken + "]";
109    }
110
111    /**
112     * Iterator to walk the input stream
113     */
114    static class TokenPairIterator implements Iterator<Object>, Closeable {
115
116        final String startToken;
117        String scanStartToken;
118        final String endToken;
119        String scanEndToken;
120        final boolean includeTokens;
121        final InputStream in;
122        final String charset;
123        Scanner scanner;
124        Object image;
125
126        TokenPairIterator(String startToken, String endToken, boolean includeTokens, InputStream in, String charset) {
127            this.startToken = startToken;
128            this.endToken = endToken;
129            this.includeTokens = includeTokens;
130            this.in = in;
131            this.charset = charset;
132
133            // make sure [ and ] is escaped as we use scanner which is reg exp based
134            // where [ and ] have special meaning
135            scanStartToken = startToken;
136            if (scanStartToken.startsWith("[")) {
137                scanStartToken = "\\" + scanStartToken;
138            }
139            if (scanStartToken.endsWith("]")) {
140                scanStartToken = scanStartToken.substring(0, startToken.length() - 1)  + "\\]";
141            }
142            scanEndToken = endToken;
143            if (scanEndToken.startsWith("[")) {
144                scanEndToken = "\\" + scanEndToken;
145            }
146            if (scanEndToken.endsWith("]")) {
147                scanEndToken = scanEndToken.substring(0, scanEndToken.length() - 1)  + "\\]";
148            }
149        }
150
151        void init() {
152            // use end token as delimiter
153            this.scanner = new Scanner(in, charset).useDelimiter(scanEndToken);
154            // this iterator will do look ahead as we may have data
155            // after the last end token, which the scanner would find
156            // so we need to be one step ahead of the scanner
157            this.image = scanner.hasNext() ? next(true) : null;
158        }
159
160        @Override
161        public boolean hasNext() {
162            return image != null;
163        }
164
165        @Override
166        public Object next() {
167            return next(false);
168        }
169
170        Object next(boolean first) {
171            Object answer = image;
172            // calculate next
173            if (scanner.hasNext()) {
174                image = getNext(first);
175            } else {
176                image = null;
177            }
178
179            if (answer == null) {
180                // first time the image may be null
181                answer = image;
182            }
183            return answer;
184        }
185
186        Object getNext(boolean first) {
187            String next = scanner.next();
188
189            // only grab text after the start token
190            if (next != null && next.contains(startToken)) {
191                next = ObjectHelper.after(next, startToken);
192
193                // include tokens in answer
194                if (next != null && includeTokens) {
195                    StringBuilder sb = new StringBuilder();
196                    next = sb.append(startToken).append(next).append(endToken).toString();
197                }
198            } else {
199                // must have start token, otherwise we have reached beyond last tokens
200                // and should not return more data
201                return null;
202            }
203
204            return next;
205        }
206
207        @Override
208        public void remove() {
209            // noop
210        }
211
212        @Override
213        public void close() throws IOException {
214            scanner.close();
215        }
216    }
217
218}