001/**
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.camel.support;
018
019import java.io.Closeable;
020import java.io.IOException;
021import java.io.InputStream;
022import java.util.Iterator;
023
024import org.apache.camel.Exchange;
025import org.apache.camel.InvalidPayloadException;
026import org.apache.camel.language.simple.SimpleLanguage;
027import org.apache.camel.util.IOHelper;
028import org.apache.camel.util.ObjectHelper;
029import org.apache.camel.util.Scanner;
030import org.apache.camel.util.StringHelper;
031
032/**
033 * {@link org.apache.camel.Expression} to walk a {@link org.apache.camel.Message} body
034 * using an {@link Iterator}, which grabs the content between a start and end token.
035 * <p/>
036 * The message body must be able to convert to {@link InputStream} type which is used as stream
037 * to access the message body.
038 * <p/>
039 * For splitting XML files use {@link org.apache.camel.support.TokenXMLExpressionIterator} instead.
040 */
041public class TokenPairExpressionIterator extends ExpressionAdapter {
042
043    protected final String startToken;
044    protected final String endToken;
045    protected final boolean includeTokens;
046
047    public TokenPairExpressionIterator(String startToken, String endToken, boolean includeTokens) {
048        StringHelper.notEmpty(startToken, "startToken");
049        StringHelper.notEmpty(endToken, "endToken");
050        this.startToken = startToken;
051        this.endToken = endToken;
052        this.includeTokens = includeTokens;
053    }
054
055    @Override
056    public boolean matches(Exchange exchange) {
057        // as a predicate we must close the stream, as we do not return an iterator that can be used
058        // afterwards to iterate the input stream
059        Object value = doEvaluate(exchange, true);
060        return ObjectHelper.evaluateValuePredicate(value);
061    }
062
063    @Override
064    public Object evaluate(Exchange exchange) {
065        // as we return an iterator to access the input stream, we should not close it
066        return doEvaluate(exchange, false);
067    }
068
069    /**
070     * Strategy to evaluate the exchange
071     *
072     * @param exchange   the exchange
073     * @param closeStream whether to close the stream before returning from this method.
074     * @return the evaluated value
075     */
076    protected Object doEvaluate(Exchange exchange, boolean closeStream) {
077        InputStream in = null;
078        try {
079            in = exchange.getIn().getMandatoryBody(InputStream.class);
080            // we may read from a file, and want to support custom charset defined on the exchange
081            String charset = IOHelper.getCharsetName(exchange);
082            return createIterator(exchange, in, charset);
083        } catch (InvalidPayloadException e) {
084            exchange.setException(e);
085            // must close input stream
086            IOHelper.close(in);
087            return null;
088        } finally {
089            if (closeStream) {
090                IOHelper.close(in);
091            }
092        }
093    }
094
095    /**
096     * Strategy to create the iterator
097     *
098     * @param exchange the exchange
099     * @param in input stream to iterate
100     * @param charset charset
101     * @return the iterator
102     */
103    protected Iterator<?> createIterator(Exchange exchange, InputStream in, String charset) {
104        String start = startToken;
105        if (start != null && SimpleLanguage.hasSimpleFunction(start)) {
106            start = SimpleLanguage.expression(start).evaluate(exchange, String.class);
107        }
108        String end = endToken;
109        if (end != null && SimpleLanguage.hasSimpleFunction(end)) {
110            end = SimpleLanguage.expression(end).evaluate(exchange, String.class);
111        }
112        TokenPairIterator iterator = new TokenPairIterator(start, end, includeTokens, in, charset);
113        iterator.init();
114        return iterator;
115    }
116
117    @Override
118    public String toString() {
119        return "tokenize[body() using tokens: " + startToken + "..." + endToken + "]";
120    }
121
122    /**
123     * Iterator to walk the input stream
124     */
125    static class TokenPairIterator implements Iterator<Object>, Closeable {
126
127        final String startToken;
128        String scanStartToken;
129        final String endToken;
130        String scanEndToken;
131        final boolean includeTokens;
132        final InputStream in;
133        final String charset;
134        Scanner scanner;
135        Object image;
136
137        TokenPairIterator(String startToken, String endToken, boolean includeTokens, InputStream in, String charset) {
138            this.startToken = startToken;
139            this.endToken = endToken;
140            this.includeTokens = includeTokens;
141            this.in = in;
142            this.charset = charset;
143
144            // make sure [ and ] is escaped as we use scanner which is reg exp based
145            // where [ and ] have special meaning
146            scanStartToken = startToken;
147            if (scanStartToken.startsWith("[")) {
148                scanStartToken = "\\" + scanStartToken;
149            }
150            if (scanStartToken.endsWith("]")) {
151                scanStartToken = scanStartToken.substring(0, startToken.length() - 1)  + "\\]";
152            }
153            scanEndToken = endToken;
154            if (scanEndToken.startsWith("[")) {
155                scanEndToken = "\\" + scanEndToken;
156            }
157            if (scanEndToken.endsWith("]")) {
158                scanEndToken = scanEndToken.substring(0, scanEndToken.length() - 1)  + "\\]";
159            }
160        }
161
162        void init() {
163            // use end token as delimiter
164            this.scanner = new Scanner(in, charset, scanEndToken);
165            // this iterator will do look ahead as we may have data
166            // after the last end token, which the scanner would find
167            // so we need to be one step ahead of the scanner
168            this.image = scanner.hasNext() ? next(true) : null;
169        }
170
171        @Override
172        public boolean hasNext() {
173            return image != null;
174        }
175
176        @Override
177        public Object next() {
178            return next(false);
179        }
180
181        Object next(boolean first) {
182            Object answer = image;
183            // calculate next
184            if (scanner.hasNext()) {
185                image = getNext(first);
186            } else {
187                image = null;
188            }
189
190            if (answer == null) {
191                // first time the image may be null
192                answer = image;
193            }
194            return answer;
195        }
196
197        Object getNext(boolean first) {
198            String next = scanner.next();
199
200            // only grab text after the start token
201            if (next != null && next.contains(startToken)) {
202                next = StringHelper.after(next, startToken);
203
204                // include tokens in answer
205                if (next != null && includeTokens) {
206                    StringBuilder sb = new StringBuilder();
207                    next = sb.append(startToken).append(next).append(endToken).toString();
208                }
209            } else {
210                // must have start token, otherwise we have reached beyond last tokens
211                // and should not return more data
212                return null;
213            }
214
215            return next;
216        }
217
218        @Override
219        public void remove() {
220            // noop
221        }
222
223        @Override
224        public void close() throws IOException {
225            scanner.close();
226        }
227    }
228
229}