001/** 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.camel.support; 018 019import java.io.Closeable; 020import java.io.IOException; 021import java.io.InputStream; 022import java.text.MessageFormat; 023import java.util.ArrayList; 024import java.util.Iterator; 025import java.util.LinkedHashMap; 026import java.util.List; 027import java.util.Map; 028import java.util.Scanner; 029import java.util.regex.MatchResult; 030import java.util.regex.Matcher; 031import java.util.regex.Pattern; 032 033import org.apache.camel.Exchange; 034import org.apache.camel.InvalidPayloadException; 035import org.apache.camel.util.IOHelper; 036import org.apache.camel.util.ObjectHelper; 037 038/** 039 * {@link org.apache.camel.Expression} to walk a {@link org.apache.camel.Message} XML body 040 * using an {@link java.util.Iterator}, which grabs the content between a XML start and end token, 041 * where the end token corresponds implicitly to either the end tag or the self-closing start tag. 042 * <p/> 043 * The message body must be able to convert to {@link java.io.InputStream} type which is used as stream 044 * to access the message body. 045 * <p/> 046 * Can be used to split big XML files. 047 * <p/> 048 * This implementation supports inheriting namespaces from a parent/root tag. 049 */ 050public class TokenXMLExpressionIterator extends ExpressionAdapter { 051 private static final Pattern NAMESPACE_PATTERN = Pattern.compile("xmlns(:\\w+|)\\s*=\\s*('[^']+'|\"[^\"]+\")"); 052 private static final String SCAN_TOKEN_NS_PREFIX_REGEX = "([^:<>]{1,15}?:|)"; 053 private static final String SCAN_BLOCK_TOKEN_REGEX_TEMPLATE = "<{0}(\\s+[^>]*)?/>|<{0}(\\s+[^>]*)?>(?:(?!(</{0}\\s*>)).)*</{0}\\s*>"; 054 private static final String SCAN_PARENT_TOKEN_REGEX_TEMPLATE = "<{0}(\\s+[^>]*\\s*)?>"; 055 private static final String OPTION_WRAP_TOKEN = "<*>"; 056 057 protected final String tagToken; 058 protected final String inheritNamespaceToken; 059 060 public TokenXMLExpressionIterator(String tagToken, String inheritNamespaceToken) { 061 ObjectHelper.notEmpty(tagToken, "tagToken"); 062 this.tagToken = tagToken; 063 // namespace token is optional 064 this.inheritNamespaceToken = inheritNamespaceToken; 065 066 // must be XML tokens 067 if (!tagToken.startsWith("<") || !tagToken.endsWith(">")) { 068 throw new IllegalArgumentException("XML Tag token must be a valid XML tag, was: " + tagToken); 069 } 070 if (inheritNamespaceToken != null && (!inheritNamespaceToken.startsWith("<") || !inheritNamespaceToken.endsWith(">"))) { 071 throw new IllegalArgumentException("Namespace token must be a valid XML token, was: " + inheritNamespaceToken); 072 } 073 } 074 075 protected Iterator<?> createIterator(InputStream in, String charset) { 076 XMLTokenIterator iterator = new XMLTokenIterator(tagToken, inheritNamespaceToken, in, charset); 077 iterator.init(); 078 return iterator; 079 } 080 081 @Override 082 public boolean matches(Exchange exchange) { 083 // as a predicate we must close the stream, as we do not return an iterator that can be used 084 // afterwards to iterate the input stream 085 Object value = doEvaluate(exchange, true); 086 return ObjectHelper.evaluateValuePredicate(value); 087 } 088 089 @Override 090 public Object evaluate(Exchange exchange) { 091 // as we return an iterator to access the input stream, we should not close it 092 return doEvaluate(exchange, false); 093 } 094 095 /** 096 * Strategy to evaluate the exchange 097 * 098 * @param exchange the exchange 099 * @param closeStream whether to close the stream before returning from this method. 100 * @return the evaluated value 101 */ 102 protected Object doEvaluate(Exchange exchange, boolean closeStream) { 103 InputStream in = null; 104 try { 105 in = exchange.getIn().getMandatoryBody(InputStream.class); 106 // we may read from a file, and want to support custom charset defined on the exchange 107 String charset = IOHelper.getCharsetName(exchange); 108 return createIterator(in, charset); 109 } catch (InvalidPayloadException e) { 110 exchange.setException(e); 111 // must close input stream 112 IOHelper.close(in); 113 return null; 114 } finally { 115 if (closeStream) { 116 IOHelper.close(in); 117 } 118 } 119 } 120 121 /** 122 * Iterator to walk the input stream 123 */ 124 static class XMLTokenIterator implements Iterator<Object>, Closeable { 125 final String tagToken; 126 final InputStream in; 127 final String charset; 128 Scanner scanner; 129 Object image; 130 131 private final Pattern tagTokenPattern; 132 private final String inheritNamespaceToken; 133 private final boolean wrapToken; 134 private Pattern inheritNamespaceTokenPattern; 135 private String rootTokenNamespaces; 136 private String wrapHead; 137 private String wrapTail; 138 139 XMLTokenIterator(String tagToken, String inheritNamespaceToken, InputStream in, String charset) { 140 this.tagToken = tagToken; 141 this.charset = charset; 142 143 // remove any beginning < and ending > as we need to support ns prefixes and attributes, so we use a reg exp patterns 144 this.tagTokenPattern = 145 Pattern.compile(MessageFormat.format(SCAN_BLOCK_TOKEN_REGEX_TEMPLATE, 146 SCAN_TOKEN_NS_PREFIX_REGEX + tagToken.substring(1, tagToken.length() - 1)), 147 Pattern.MULTILINE | Pattern.DOTALL); 148 149 this.inheritNamespaceToken = inheritNamespaceToken; 150 if (inheritNamespaceToken != null && OPTION_WRAP_TOKEN.equals(inheritNamespaceToken)) { 151 this.wrapToken = true; 152 this.in = new RecordableInputStream(in, charset); 153 } else { 154 this.wrapToken = false; 155 this.in = in; 156 if (inheritNamespaceToken != null) { 157 // the inherit namespace token may itself have a namespace prefix 158 // the namespaces on the parent tag can be in multi line, so we need to instruct the dot to support multilines 159 this.inheritNamespaceTokenPattern = 160 Pattern.compile(MessageFormat.format(SCAN_PARENT_TOKEN_REGEX_TEMPLATE, 161 SCAN_TOKEN_NS_PREFIX_REGEX + inheritNamespaceToken.substring(1, inheritNamespaceToken.length() - 1)), 162 Pattern.MULTILINE | Pattern.DOTALL); 163 } 164 } 165 } 166 167 void init() { 168 // use a scanner with the default delimiter 169 this.scanner = new Scanner(in, charset); 170 this.image = scanner.hasNext() ? (String) next(true) : null; 171 } 172 173 String getNext(boolean first) { 174 // initialize inherited namespaces on first 175 if (first && inheritNamespaceToken != null && !wrapToken) { 176 rootTokenNamespaces = getNamespacesFromNamespaceToken(scanner.findWithinHorizon(inheritNamespaceTokenPattern, 0)); 177 } 178 179 String next = scanner.findWithinHorizon(tagTokenPattern, 0); 180 if (next == null) { 181 return null; 182 } 183 if (first && wrapToken) { 184 MatchResult mres = scanner.match(); 185 wrapHead = ((RecordableInputStream)in).getText(mres.start()); 186 wrapTail = buildXMLTail(wrapHead); 187 } 188 189 // build answer accordingly to whether namespaces should be inherited or not 190 if (inheritNamespaceToken != null && rootTokenNamespaces != null) { 191 // REVISIT should skip the prefixes that are declared within the child itself. 192 String head = ObjectHelper.before(next, ">"); 193 boolean empty = false; 194 if (head.endsWith("/")) { 195 head = head.substring(0, head.length() - 1); 196 empty = true; 197 } 198 StringBuilder sb = new StringBuilder(); 199 // append root namespaces to local start token 200 // grab the text 201 String tail = ObjectHelper.after(next, ">"); 202 // build result with inherited namespaces 203 next = sb.append(head).append(rootTokenNamespaces).append(empty ? "/>" : ">").append(tail).toString(); 204 } else if (wrapToken) { 205 // wrap the token 206 StringBuilder sb = new StringBuilder(); 207 next = sb.append(wrapHead).append(next).append(wrapTail).toString(); 208 } 209 210 return next; 211 } 212 213 private String getNamespacesFromNamespaceToken(String text) { 214 if (text == null) { 215 return null; 216 } 217 218 // find namespaces (there can be attributes mixed, so we should only grab the namespaces) 219 Map<String, String> namespaces = new LinkedHashMap<String, String>(); 220 Matcher matcher = NAMESPACE_PATTERN.matcher(text); 221 while (matcher.find()) { 222 String prefix = matcher.group(1); 223 String url = matcher.group(2); 224 if (ObjectHelper.isEmpty(prefix)) { 225 prefix = "_DEFAULT_"; 226 } else { 227 // skip leading : 228 prefix = prefix.substring(1); 229 } 230 namespaces.put(prefix, url); 231 } 232 233 // did we find any namespaces 234 if (namespaces.isEmpty()) { 235 return null; 236 } 237 238 // build namespace String 239 StringBuilder sb = new StringBuilder(); 240 for (Map.Entry<String, String> entry : namespaces.entrySet()) { 241 String key = entry.getKey(); 242 // note the value is already quoted 243 String value = entry.getValue(); 244 if ("_DEFAULT_".equals(key)) { 245 sb.append(" xmlns=").append(value); 246 } else { 247 sb.append(" xmlns:").append(key).append("=").append(value); 248 } 249 } 250 251 return sb.toString(); 252 } 253 254 @Override 255 public boolean hasNext() { 256 return image != null; 257 } 258 259 @Override 260 public Object next() { 261 return next(false); 262 } 263 264 Object next(boolean first) { 265 Object answer = image; 266 // calculate next 267 if (scanner.hasNext()) { 268 image = getNext(first); 269 } else { 270 image = null; 271 } 272 273 if (answer == null) { 274 // first time the image may be null 275 answer = image; 276 } 277 return answer; 278 } 279 280 @Override 281 public void remove() { 282 // noop 283 } 284 285 @Override 286 public void close() throws IOException { 287 scanner.close(); 288 } 289 290 } 291 292 private static String buildXMLTail(String xmlhead) { 293 // assume the input text is a portion of a well-formed xml 294 List<String> tags = new ArrayList<String>(); 295 int p = 0; 296 while (p < xmlhead.length()) { 297 p = xmlhead.indexOf('<', p); 298 if (p < 0) { 299 break; 300 } 301 int nc = xmlhead.charAt(p + 1); 302 if (nc == '?') { 303 p++; 304 continue; 305 } else if (nc == '/') { 306 p++; 307 tags.remove(tags.size() - 1); 308 } else { 309 final int ep = xmlhead.indexOf('>', p); 310 if (xmlhead.charAt(ep - 1) == '/') { 311 p++; 312 continue; 313 } 314 final int sp = xmlhead.substring(p, ep).indexOf(' '); 315 tags.add(xmlhead.substring(p + 1, sp > 0 ? p + sp : ep)); 316 p = ep; 317 } 318 } 319 StringBuilder sb = new StringBuilder(); 320 for (int i = tags.size() - 1; i >= 0; i--) { 321 sb.append("</").append(tags.get(i)).append(">"); 322 } 323 return sb.toString(); 324 } 325}