001/** 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.camel.support; 018 019import java.io.Closeable; 020import java.io.IOException; 021import java.io.InputStream; 022import java.util.Iterator; 023 024import org.apache.camel.Exchange; 025import org.apache.camel.InvalidPayloadException; 026import org.apache.camel.language.simple.SimpleLanguage; 027import org.apache.camel.util.IOHelper; 028import org.apache.camel.util.ObjectHelper; 029import org.apache.camel.util.Scanner; 030import org.apache.camel.util.StringHelper; 031 032/** 033 * {@link org.apache.camel.Expression} to walk a {@link org.apache.camel.Message} body 034 * using an {@link Iterator}, which grabs the content between a start and end token. 035 * <p/> 036 * The message body must be able to convert to {@link InputStream} type which is used as stream 037 * to access the message body. 038 * <p/> 039 * For splitting XML files use {@link org.apache.camel.support.TokenXMLExpressionIterator} instead. 040 */ 041public class TokenPairExpressionIterator extends ExpressionAdapter { 042 043 protected final String startToken; 044 protected final String endToken; 045 protected final boolean includeTokens; 046 047 public TokenPairExpressionIterator(String startToken, String endToken, boolean includeTokens) { 048 StringHelper.notEmpty(startToken, "startToken"); 049 StringHelper.notEmpty(endToken, "endToken"); 050 this.startToken = startToken; 051 this.endToken = endToken; 052 this.includeTokens = includeTokens; 053 } 054 055 @Override 056 public boolean matches(Exchange exchange) { 057 // as a predicate we must close the stream, as we do not return an iterator that can be used 058 // afterwards to iterate the input stream 059 Object value = doEvaluate(exchange, true); 060 return ObjectHelper.evaluateValuePredicate(value); 061 } 062 063 @Override 064 public Object evaluate(Exchange exchange) { 065 // as we return an iterator to access the input stream, we should not close it 066 return doEvaluate(exchange, false); 067 } 068 069 /** 070 * Strategy to evaluate the exchange 071 * 072 * @param exchange the exchange 073 * @param closeStream whether to close the stream before returning from this method. 074 * @return the evaluated value 075 */ 076 protected Object doEvaluate(Exchange exchange, boolean closeStream) { 077 InputStream in = null; 078 try { 079 in = exchange.getIn().getMandatoryBody(InputStream.class); 080 // we may read from a file, and want to support custom charset defined on the exchange 081 String charset = IOHelper.getCharsetName(exchange); 082 return createIterator(exchange, in, charset); 083 } catch (InvalidPayloadException e) { 084 exchange.setException(e); 085 // must close input stream 086 IOHelper.close(in); 087 return null; 088 } finally { 089 if (closeStream) { 090 IOHelper.close(in); 091 } 092 } 093 } 094 095 /** 096 * Strategy to create the iterator 097 * 098 * @param exchange the exchange 099 * @param in input stream to iterate 100 * @param charset charset 101 * @return the iterator 102 */ 103 protected Iterator<?> createIterator(Exchange exchange, InputStream in, String charset) { 104 String start = startToken; 105 if (start != null && SimpleLanguage.hasSimpleFunction(start)) { 106 start = SimpleLanguage.expression(start).evaluate(exchange, String.class); 107 } 108 String end = endToken; 109 if (end != null && SimpleLanguage.hasSimpleFunction(end)) { 110 end = SimpleLanguage.expression(end).evaluate(exchange, String.class); 111 } 112 TokenPairIterator iterator = new TokenPairIterator(start, end, includeTokens, in, charset); 113 iterator.init(); 114 return iterator; 115 } 116 117 @Override 118 public String toString() { 119 return "tokenize[body() using tokens: " + startToken + "..." + endToken + "]"; 120 } 121 122 /** 123 * Iterator to walk the input stream 124 */ 125 static class TokenPairIterator implements Iterator<Object>, Closeable { 126 127 final String startToken; 128 String scanStartToken; 129 final String endToken; 130 String scanEndToken; 131 final boolean includeTokens; 132 final InputStream in; 133 final String charset; 134 Scanner scanner; 135 Object image; 136 137 TokenPairIterator(String startToken, String endToken, boolean includeTokens, InputStream in, String charset) { 138 this.startToken = startToken; 139 this.endToken = endToken; 140 this.includeTokens = includeTokens; 141 this.in = in; 142 this.charset = charset; 143 144 // make sure [ and ] is escaped as we use scanner which is reg exp based 145 // where [ and ] have special meaning 146 scanStartToken = startToken; 147 if (scanStartToken.startsWith("[")) { 148 scanStartToken = "\\" + scanStartToken; 149 } 150 if (scanStartToken.endsWith("]")) { 151 scanStartToken = scanStartToken.substring(0, startToken.length() - 1) + "\\]"; 152 } 153 scanEndToken = endToken; 154 if (scanEndToken.startsWith("[")) { 155 scanEndToken = "\\" + scanEndToken; 156 } 157 if (scanEndToken.endsWith("]")) { 158 scanEndToken = scanEndToken.substring(0, scanEndToken.length() - 1) + "\\]"; 159 } 160 } 161 162 void init() { 163 // use end token as delimiter 164 this.scanner = new Scanner(in, charset, scanEndToken); 165 // this iterator will do look ahead as we may have data 166 // after the last end token, which the scanner would find 167 // so we need to be one step ahead of the scanner 168 this.image = scanner.hasNext() ? next(true) : null; 169 } 170 171 @Override 172 public boolean hasNext() { 173 return image != null; 174 } 175 176 @Override 177 public Object next() { 178 return next(false); 179 } 180 181 Object next(boolean first) { 182 Object answer = image; 183 // calculate next 184 if (scanner.hasNext()) { 185 image = getNext(first); 186 } else { 187 image = null; 188 } 189 190 if (answer == null) { 191 // first time the image may be null 192 answer = image; 193 } 194 return answer; 195 } 196 197 Object getNext(boolean first) { 198 String next = scanner.next(); 199 200 // only grab text after the start token 201 if (next != null && next.contains(startToken)) { 202 next = StringHelper.after(next, startToken); 203 204 // include tokens in answer 205 if (next != null && includeTokens) { 206 StringBuilder sb = new StringBuilder(); 207 next = sb.append(startToken).append(next).append(endToken).toString(); 208 } 209 } else { 210 // must have start token, otherwise we have reached beyond last tokens 211 // and should not return more data 212 return null; 213 } 214 215 return next; 216 } 217 218 @Override 219 public void remove() { 220 // noop 221 } 222 223 @Override 224 public void close() throws IOException { 225 scanner.close(); 226 } 227 } 228 229}