001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.camel.support;
018    
019    import java.io.InputStream;
020    import java.util.Iterator;
021    import java.util.LinkedHashMap;
022    import java.util.Map;
023    import java.util.Scanner;
024    import java.util.regex.Matcher;
025    import java.util.regex.Pattern;
026    
027    import org.apache.camel.util.ObjectHelper;
028    
029    /**
030     * {@link org.apache.camel.Expression} to walk a {@link org.apache.camel.Message} XML body
031     * using an {@link java.util.Iterator}, which grabs the content between a XML start and end token.
032     * <p/>
033     * The message body must be able to convert to {@link java.io.InputStream} type which is used as stream
034     * to access the message body.
035     * <p/>
036     * Can be used to split big XML files.
037     * <p/>
038     * This implementation supports inheriting namespaces from a parent/root tag.
039     */
040    public class TokenXMLPairExpressionIterator extends TokenPairExpressionIterator {
041    
042        private static final Pattern NAMESPACE_PATTERN = Pattern.compile("xmlns(:\\w+|)=\\\"(.*?)\\\"");
043        private static final String SCAN_TOKEN_REGEX = "(\\s+.*?|)>";
044        private static final String SCAN_TOKEN_NS_PREFIX_REGEX = "(.{1,15}?:|)";
045        protected final String inheritNamespaceToken;
046    
047        public TokenXMLPairExpressionIterator(String startToken, String endToken, String inheritNamespaceToken) {
048            super(startToken, endToken, true);
049            // namespace token is optional
050            this.inheritNamespaceToken = inheritNamespaceToken;
051    
052            // must be XML tokens
053            if (!startToken.startsWith("<") || !startToken.endsWith(">")) {
054                throw new IllegalArgumentException("Start token must be a valid XML token, was: " + startToken);
055            }
056            if (!endToken.startsWith("<") || !endToken.endsWith(">")) {
057                throw new IllegalArgumentException("End token must be a valid XML token, was: " + endToken);
058            }
059            if (inheritNamespaceToken != null && (!inheritNamespaceToken.startsWith("<") || !inheritNamespaceToken.endsWith(">"))) {
060                throw new IllegalArgumentException("Namespace token must be a valid XML token, was: " + inheritNamespaceToken);
061            }
062        }
063    
064        @Override
065        protected Iterator<?> createIterator(InputStream in, String charset) {
066            XMLTokenPairIterator iterator = new XMLTokenPairIterator(startToken, endToken, inheritNamespaceToken, in, charset);
067            iterator.init();
068            return iterator;
069        }
070    
071        /**
072         * Iterator to walk the input stream
073         */
074        static class XMLTokenPairIterator extends TokenPairIterator {
075    
076            private final Pattern startTokenPattern;
077            private final String scanEndToken;
078            private final String inheritNamespaceToken;
079            private Pattern inheritNamespaceTokenPattern;
080            private String rootTokenNamespaces;
081    
082            XMLTokenPairIterator(String startToken, String endToken, String inheritNamespaceToken, InputStream in, String charset) {
083                super(startToken, endToken, true, in, charset);
084    
085                // remove any beginning < and ending > as we need to support ns prefixes and attributes, so we use a reg exp patterns
086                StringBuilder tokenSb = new StringBuilder("<").append(SCAN_TOKEN_NS_PREFIX_REGEX).
087                                    append(startToken.substring(1, startToken.length() - 1)).append(SCAN_TOKEN_REGEX);
088                this.startTokenPattern = Pattern.compile(tokenSb.toString());
089                
090                tokenSb = new StringBuilder("</").append(SCAN_TOKEN_NS_PREFIX_REGEX).
091                                    append(endToken.substring(2, endToken.length() - 1)).append(SCAN_TOKEN_REGEX);
092                this.scanEndToken = tokenSb.toString();
093                
094                this.inheritNamespaceToken = inheritNamespaceToken;
095                if (inheritNamespaceToken != null) {
096                    // the inherit namespace token may itself have a namespace prefix
097                    tokenSb = new StringBuilder("<").append(SCAN_TOKEN_NS_PREFIX_REGEX).
098                                    append(inheritNamespaceToken.substring(1, inheritNamespaceToken.length() - 1)).append(SCAN_TOKEN_REGEX);  
099                    // the namespaces on the parent tag can be in multi line, so we need to instruct the dot to support multilines
100                    this.inheritNamespaceTokenPattern = Pattern.compile(tokenSb.toString(), Pattern.MULTILINE | Pattern.DOTALL);
101                }
102            }
103    
104            @Override
105            void init() {
106                // use scan end token as delimiter which supports attributes/namespaces
107                this.scanner = new Scanner(in, charset).useDelimiter(scanEndToken);
108                // this iterator will do look ahead as we may have data
109                // after the last end token, which the scanner would find
110                // so we need to be one step ahead of the scanner
111                this.image = scanner.hasNext() ? (String) next(true) : null;
112            }
113    
114            @Override
115            String getNext(boolean first) {
116                String next = scanner.next();
117                if (next == null) {
118                    return null;
119                }
120    
121                // initialize inherited namespaces on first
122                if (first && inheritNamespaceToken != null) {
123                    rootTokenNamespaces = getNamespacesFromNamespaceToken(next);
124                }
125    
126                // make sure next is positioned at start token as we can have leading data
127                // or we reached EOL and there is no more start tags
128                Matcher matcher = startTokenPattern.matcher(next);
129                if (!matcher.find()) {
130                    return null;
131                } else {
132                    int index = matcher.start();
133                    next = next.substring(index);
134                }
135    
136                // make sure the end tag matches the begin tag if the tag has a namespace prefix
137                String tag = ObjectHelper.before(next, ">");
138                StringBuilder endTagSb = new StringBuilder("</");
139                int firstSpaceIndex = tag.indexOf(" ");
140                if (firstSpaceIndex > 0) {
141                    endTagSb.append(tag.substring(1, firstSpaceIndex)).append(">");
142                } else {
143                    endTagSb.append(tag.substring(1, tag.length())).append(">");
144                }
145                
146                // build answer accordingly to whether namespaces should be inherited or not
147                StringBuilder sb = new StringBuilder();
148                if (inheritNamespaceToken != null && rootTokenNamespaces != null) {
149                    // append root namespaces to local start token
150                    // grab the text
151                    String text = ObjectHelper.after(next, ">");
152                    // build result with inherited namespaces
153                    next = sb.append(tag).append(rootTokenNamespaces).append(">").append(text).append(endTagSb.toString()).toString();
154                } else {
155                    next = sb.append(next).append(endTagSb.toString()).toString();
156                }
157    
158                return next;
159            }
160    
161            private String getNamespacesFromNamespaceToken(String text) {
162                if (text == null) {
163                    return null;
164                }
165    
166                // grab the namespace tag
167                Matcher mat = inheritNamespaceTokenPattern.matcher(text);
168                if (mat.find()) {
169                    text = mat.group(0);
170                } else {
171                    // cannot find namespace tag
172                    return null;
173                }
174    
175                // find namespaces (there can be attributes mixed, so we should only grab the namespaces)
176                Map<String, String> namespaces = new LinkedHashMap<String, String>();
177                Matcher matcher = NAMESPACE_PATTERN.matcher(text);
178                while (matcher.find()) {
179                    String prefix = matcher.group(1);
180                    String url = matcher.group(2);
181                    if (ObjectHelper.isEmpty(prefix)) {
182                        prefix = "_DEFAULT_";
183                    } else {
184                        // skip leading :
185                        prefix = prefix.substring(1);
186                    }
187                    namespaces.put(prefix, url);
188                }
189    
190                // did we find any namespaces
191                if (namespaces.isEmpty()) {
192                    return null;
193                }
194    
195                // build namespace String
196                StringBuilder sb = new StringBuilder();
197                for (Map.Entry<String, String> entry : namespaces.entrySet()) {
198                    String key = entry.getKey();
199                    String value = entry.getValue();
200                    if ("_DEFAULT_".equals(key)) {
201                        sb.append(" xmlns=\"").append(value).append("\"");
202                    } else {
203                        sb.append(" xmlns:").append(key).append("=\"").append(value).append("\"");
204                    }
205                }
206    
207                return sb.toString();
208            }
209        }
210    
211    }