001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 package org.apache.camel.support; 018 019 import java.io.InputStream; 020 import java.util.Iterator; 021 import java.util.LinkedHashMap; 022 import java.util.Map; 023 import java.util.Scanner; 024 import java.util.regex.Matcher; 025 import java.util.regex.Pattern; 026 027 import org.apache.camel.util.ObjectHelper; 028 029 /** 030 * {@link org.apache.camel.Expression} to walk a {@link org.apache.camel.Message} XML body 031 * using an {@link java.util.Iterator}, which grabs the content between a XML start and end token. 032 * <p/> 033 * The message body must be able to convert to {@link java.io.InputStream} type which is used as stream 034 * to access the message body. 035 * <p/> 036 * Can be used to split big XML files. 037 * <p/> 038 * This implementation supports inheriting namespaces from a parent/root tag. 039 */ 040 public class TokenXMLPairExpressionIterator extends TokenPairExpressionIterator { 041 042 private static final Pattern NAMESPACE_PATTERN = Pattern.compile("xmlns(:\\w+|)=\\\"(.*?)\\\""); 043 private static final String SCAN_TOKEN_REGEX = "(\\s+.*?|)>"; 044 private static final String SCAN_TOKEN_NS_PREFIX_REGEX = "(.{1,15}?:|)"; 045 protected final String inheritNamespaceToken; 046 047 public TokenXMLPairExpressionIterator(String startToken, String endToken, String inheritNamespaceToken) { 048 super(startToken, endToken, true); 049 // namespace token is optional 050 this.inheritNamespaceToken = inheritNamespaceToken; 051 052 // must be XML tokens 053 if (!startToken.startsWith("<") || !startToken.endsWith(">")) { 054 throw new IllegalArgumentException("Start token must be a valid XML token, was: " + startToken); 055 } 056 if (!endToken.startsWith("<") || !endToken.endsWith(">")) { 057 throw new IllegalArgumentException("End token must be a valid XML token, was: " + endToken); 058 } 059 if (inheritNamespaceToken != null && (!inheritNamespaceToken.startsWith("<") || !inheritNamespaceToken.endsWith(">"))) { 060 throw new IllegalArgumentException("Namespace token must be a valid XML token, was: " + inheritNamespaceToken); 061 } 062 } 063 064 @Override 065 protected Iterator<?> createIterator(InputStream in, String charset) { 066 XMLTokenPairIterator iterator = new XMLTokenPairIterator(startToken, endToken, inheritNamespaceToken, in, charset); 067 iterator.init(); 068 return iterator; 069 } 070 071 /** 072 * Iterator to walk the input stream 073 */ 074 static class XMLTokenPairIterator extends TokenPairIterator { 075 076 private final Pattern startTokenPattern; 077 private final String scanEndToken; 078 private final String inheritNamespaceToken; 079 private Pattern inheritNamespaceTokenPattern; 080 private String rootTokenNamespaces; 081 082 XMLTokenPairIterator(String startToken, String endToken, String inheritNamespaceToken, InputStream in, String charset) { 083 super(startToken, endToken, true, in, charset); 084 085 // remove any beginning < and ending > as we need to support ns prefixes and attributes, so we use a reg exp patterns 086 StringBuilder tokenSb = new StringBuilder("<").append(SCAN_TOKEN_NS_PREFIX_REGEX). 087 append(startToken.substring(1, startToken.length() - 1)).append(SCAN_TOKEN_REGEX); 088 this.startTokenPattern = Pattern.compile(tokenSb.toString()); 089 090 tokenSb = new StringBuilder("</").append(SCAN_TOKEN_NS_PREFIX_REGEX). 091 append(endToken.substring(2, endToken.length() - 1)).append(SCAN_TOKEN_REGEX); 092 this.scanEndToken = tokenSb.toString(); 093 094 this.inheritNamespaceToken = inheritNamespaceToken; 095 if (inheritNamespaceToken != null) { 096 // the inherit namespace token may itself have a namespace prefix 097 tokenSb = new StringBuilder("<").append(SCAN_TOKEN_NS_PREFIX_REGEX). 098 append(inheritNamespaceToken.substring(1, inheritNamespaceToken.length() - 1)).append(SCAN_TOKEN_REGEX); 099 // the namespaces on the parent tag can be in multi line, so we need to instruct the dot to support multilines 100 this.inheritNamespaceTokenPattern = Pattern.compile(tokenSb.toString(), Pattern.MULTILINE | Pattern.DOTALL); 101 } 102 } 103 104 @Override 105 void init() { 106 // use scan end token as delimiter which supports attributes/namespaces 107 this.scanner = new Scanner(in, charset).useDelimiter(scanEndToken); 108 // this iterator will do look ahead as we may have data 109 // after the last end token, which the scanner would find 110 // so we need to be one step ahead of the scanner 111 this.image = scanner.hasNext() ? (String) next(true) : null; 112 } 113 114 @Override 115 String getNext(boolean first) { 116 String next = scanner.next(); 117 if (next == null) { 118 return null; 119 } 120 121 // initialize inherited namespaces on first 122 if (first && inheritNamespaceToken != null) { 123 rootTokenNamespaces = getNamespacesFromNamespaceToken(next); 124 } 125 126 // make sure next is positioned at start token as we can have leading data 127 // or we reached EOL and there is no more start tags 128 Matcher matcher = startTokenPattern.matcher(next); 129 if (!matcher.find()) { 130 return null; 131 } else { 132 int index = matcher.start(); 133 next = next.substring(index); 134 } 135 136 // make sure the end tag matches the begin tag if the tag has a namespace prefix 137 String tag = ObjectHelper.before(next, ">"); 138 StringBuilder endTagSb = new StringBuilder("</"); 139 int firstSpaceIndex = tag.indexOf(" "); 140 if (firstSpaceIndex > 0) { 141 endTagSb.append(tag.substring(1, firstSpaceIndex)).append(">"); 142 } else { 143 endTagSb.append(tag.substring(1, tag.length())).append(">"); 144 } 145 146 // build answer accordingly to whether namespaces should be inherited or not 147 StringBuilder sb = new StringBuilder(); 148 if (inheritNamespaceToken != null && rootTokenNamespaces != null) { 149 // append root namespaces to local start token 150 // grab the text 151 String text = ObjectHelper.after(next, ">"); 152 // build result with inherited namespaces 153 next = sb.append(tag).append(rootTokenNamespaces).append(">").append(text).append(endTagSb.toString()).toString(); 154 } else { 155 next = sb.append(next).append(endTagSb.toString()).toString(); 156 } 157 158 return next; 159 } 160 161 private String getNamespacesFromNamespaceToken(String text) { 162 if (text == null) { 163 return null; 164 } 165 166 // grab the namespace tag 167 Matcher mat = inheritNamespaceTokenPattern.matcher(text); 168 if (mat.find()) { 169 text = mat.group(0); 170 } else { 171 // cannot find namespace tag 172 return null; 173 } 174 175 // find namespaces (there can be attributes mixed, so we should only grab the namespaces) 176 Map<String, String> namespaces = new LinkedHashMap<String, String>(); 177 Matcher matcher = NAMESPACE_PATTERN.matcher(text); 178 while (matcher.find()) { 179 String prefix = matcher.group(1); 180 String url = matcher.group(2); 181 if (ObjectHelper.isEmpty(prefix)) { 182 prefix = "_DEFAULT_"; 183 } else { 184 // skip leading : 185 prefix = prefix.substring(1); 186 } 187 namespaces.put(prefix, url); 188 } 189 190 // did we find any namespaces 191 if (namespaces.isEmpty()) { 192 return null; 193 } 194 195 // build namespace String 196 StringBuilder sb = new StringBuilder(); 197 for (Map.Entry<String, String> entry : namespaces.entrySet()) { 198 String key = entry.getKey(); 199 String value = entry.getValue(); 200 if ("_DEFAULT_".equals(key)) { 201 sb.append(" xmlns=\"").append(value).append("\""); 202 } else { 203 sb.append(" xmlns:").append(key).append("=\"").append(value).append("\""); 204 } 205 } 206 207 return sb.toString(); 208 } 209 } 210 211 }