001/**
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.camel.support;
018
019import java.io.Closeable;
020import java.io.IOException;
021import java.io.InputStream;
022import java.io.InputStreamReader;
023import java.io.Reader;
024import java.io.UnsupportedEncodingException;
025import java.util.ArrayList;
026import java.util.HashMap;
027import java.util.HashSet;
028import java.util.Iterator;
029import java.util.List;
030import java.util.Map;
031import java.util.Map.Entry;
032import java.util.Set;
033import java.util.regex.Matcher;
034import java.util.regex.Pattern;
035
036import javax.xml.namespace.QName;
037import javax.xml.stream.XMLStreamConstants;
038import javax.xml.stream.XMLStreamException;
039import javax.xml.stream.XMLStreamReader;
040
041import org.apache.camel.Exchange;
042import org.apache.camel.InvalidPayloadException;
043import org.apache.camel.converter.jaxp.StaxConverter;
044import org.apache.camel.spi.NamespaceAware;
045import org.apache.camel.util.IOHelper;
046import org.apache.camel.util.ObjectHelper;
047import org.slf4j.Logger;
048import org.slf4j.LoggerFactory;
049
050/**
051 * An {@link org.apache.camel.language.tokenizer.XMLTokenizeLanguage} based iterator.
052 */
053public class XMLTokenExpressionIterator extends ExpressionAdapter implements NamespaceAware {
054    protected final String path;
055    protected char mode;
056    protected int group;
057    protected Map<String, String> nsmap;
058
059    public XMLTokenExpressionIterator(String path, char mode) {
060        this(path, mode, 1);
061    }
062
063    public XMLTokenExpressionIterator(String path, char mode, int group) {
064        ObjectHelper.notEmpty(path, "path");
065        this.path = path;
066        this.mode = mode;
067        this.group = group > 1 ? group : 1;
068    }
069
070    @Override
071    public void setNamespaces(Map<String, String> nsmap) {
072        this.nsmap = nsmap;
073    }
074
075    public void setMode(char mode) {
076        this.mode = mode;
077    }
078
079    public void setMode(String mode) {
080        this.mode = mode != null ? mode.charAt(0) : 0;
081    }
082    
083    public int getGroup() {
084        return group;
085    }
086
087    public void setGroup(int group) {
088        this.group = group;
089    }
090
091    protected Iterator<?> createIterator(InputStream in, String charset) throws XMLStreamException, UnsupportedEncodingException {
092        return new XMLTokenIterator(path, nsmap, mode, group, in, charset);
093    }
094
095    protected Iterator<?> createIterator(Reader in) throws XMLStreamException {
096        return new XMLTokenIterator(path, nsmap, mode, group, in);
097    }
098
099    @Override
100    public boolean matches(Exchange exchange) {
101        // as a predicate we must close the stream, as we do not return an iterator that can be used
102        // afterwards to iterate the input stream
103        Object value = doEvaluate(exchange, true);
104        return ObjectHelper.evaluateValuePredicate(value);
105    }
106
107    @Override
108    public Object evaluate(Exchange exchange) {
109        // as we return an iterator to access the input stream, we should not close it
110        return doEvaluate(exchange, false);
111    }
112
113    /**
114     * Strategy to evaluate the exchange
115     *
116     * @param exchange   the exchange
117     * @param closeStream whether to close the stream before returning from this method.
118     * @return the evaluated value
119     */
120    protected Object doEvaluate(Exchange exchange, boolean closeStream) {
121        InputStream in = null;
122        try {
123            in = exchange.getIn().getMandatoryBody(InputStream.class);
124            String charset = IOHelper.getCharsetName(exchange);
125            return createIterator(in, charset);
126        } catch (InvalidPayloadException e) {
127            exchange.setException(e);
128            // must close input stream
129            IOHelper.close(in);
130            return null;
131        } catch (XMLStreamException e) {
132            exchange.setException(e);
133            // must close input stream
134            IOHelper.close(in);
135            return null;
136        } catch (UnsupportedEncodingException e) {
137            exchange.setException(e);
138            // must close input stream
139            IOHelper.close(in);
140            return null;
141        } finally {
142            if (closeStream) {
143                IOHelper.close(in);
144            }
145        }
146    }
147    
148
149    static class XMLTokenIterator implements Iterator<Object>, Closeable {
150        private static final Logger LOG = LoggerFactory.getLogger(XMLTokenIterator.class);
151        private static final Pattern NAMESPACE_PATTERN = Pattern.compile("xmlns(:\\w+|)\\s*=\\s*('[^']*'|\"[^\"]*\")");
152
153        private transient InputStream originalInputStream;
154
155        private AttributedQName[] splitpath;
156        private int index;
157        private char mode;
158        private int group;
159        private RecordableReader in;
160        private XMLStreamReader reader;
161        private List<QName> path;
162        private List<Map<String, String>> namespaces;
163        private List<String> segments;
164        private List<QName> segmentlog;
165        private List<String> tokens;
166        private int code;
167        private int consumed;
168        private boolean backtrack;
169        private int trackdepth = -1;
170        private int depth;
171        private boolean compliant;
172
173        private Object nextToken;
174        
175        XMLTokenIterator(String path, Map<String, String> nsmap, char mode, InputStream in, String charset) 
176            throws XMLStreamException, UnsupportedEncodingException {
177            // woodstox's getLocation().etCharOffset() does not return the offset correctly for InputStream, so use Reader instead.
178            this(path, nsmap, mode, 1, new InputStreamReader(in, charset));
179            this.originalInputStream = in;
180        }
181
182        XMLTokenIterator(String path, Map<String, String> nsmap, char mode, int group, InputStream in, String charset) 
183            throws XMLStreamException, UnsupportedEncodingException {
184            // woodstox's getLocation().etCharOffset() does not return the offset correctly for InputStream, so use Reader instead.
185            this(path, nsmap, mode, group, new InputStreamReader(in, charset));
186            this.originalInputStream = in;
187        }
188
189        XMLTokenIterator(String path, Map<String, String> nsmap, char mode, Reader in) throws XMLStreamException {
190            this(path, nsmap, mode, 1, in);
191        }
192
193        XMLTokenIterator(String path, Map<String, String> nsmap, char mode, int group, Reader in) throws XMLStreamException {
194            final String[] sl = path.substring(1).split("/");
195            this.splitpath = new AttributedQName[sl.length];
196            for (int i = 0; i < sl.length; i++) {
197                String s = sl[i];
198                if (s.length() > 0) {
199                    int d = s.indexOf(':');
200                    String pfx = d > 0 ? s.substring(0, d) : "";
201                    this.splitpath[i] = 
202                        new AttributedQName(
203                            "*".equals(pfx) ? "*" : nsmap == null ? "" : nsmap.get(pfx), d > 0 ? s.substring(d + 1) : s, pfx);
204                }
205            }
206            
207            this.mode = mode != 0 ? mode : 'i';
208            this.group = group > 0 ? group : 1;
209            this.in = new RecordableReader(in);
210            this.reader = new StaxConverter().createXMLStreamReader(this.in);
211
212            LOG.trace("reader.class: {}", reader.getClass());
213            // perform the first offset compliance test
214            int coff = reader.getLocation().getCharacterOffset();
215            if (coff != 0) {
216                LOG.error("XMLStreamReader {} not supporting Location");
217                throw new XMLStreamException("reader not supporting Location");
218            }
219
220            this.path = new ArrayList<QName>();
221            
222            // wrapped mode needs the segments and the injected mode needs the namespaces
223            if (this.mode == 'w') {
224                this.segments = new ArrayList<String>();
225                this.segmentlog = new ArrayList<QName>();
226            } else if (this.mode == 'i') {
227                this.namespaces = new ArrayList<Map<String, String>>();
228            }
229            // when grouping the tokens, allocate the storage to temporarily store tokens. 
230            if (this.group > 1) {
231                this.tokens = new ArrayList<String>();
232            }       
233            this.nextToken = getNextToken();
234        }
235        
236        private boolean isDoS() {
237            return splitpath[index] == null;
238        }
239        
240        private AttributedQName current() {
241            return splitpath[index + (isDoS() ? 1 : 0)];
242        }
243        
244        private AttributedQName ancestor() {
245            return index == 0 ? null : splitpath[index - 1];
246        }
247
248        private void down() {
249            if (isDoS()) {
250                index++;
251            }
252            index++;
253        }
254        
255        private void up() {
256            index--;
257        }
258        
259        private boolean isBottom() {
260            return index == splitpath.length - (isDoS() ? 2 : 1);
261        }
262        
263        private boolean isTop() {
264            return index == 0;
265        }
266        
267        private int readNext() throws XMLStreamException {
268            int c = code;
269            if (c > 0) {
270                code = 0;
271            } else {
272                c = reader.next();
273            }
274            return c;
275        }
276        
277        private String getCurrentText() {
278            int pos = reader.getLocation().getCharacterOffset();
279            String txt = in.getText(pos - consumed);
280            consumed = pos;
281            // keep recording
282            in.record();
283            return txt;
284        }
285
286        private void pushName(QName name) {
287            path.add(name);
288        }
289
290        private QName popName() {
291            return path.remove(path.size() - 1);
292        }
293
294        private void pushSegment(QName qname, String token) {
295            segments.add(token);
296            segmentlog.add(qname);
297        }
298
299        private String popSegment() {
300            return segments.remove(segments.size() - 1);
301        }
302        
303        private QName peekLog() {
304            return segmentlog.get(segmentlog.size() - 1);
305        }
306        
307        private QName popLog() {
308            return segmentlog.remove(segmentlog.size() - 1);
309        }
310
311        private void pushNamespaces(XMLStreamReader reader) {
312            Map<String, String> m = new HashMap<String, String>();
313            if (namespaces.size() > 0) {
314                m.putAll(namespaces.get(namespaces.size() - 1));
315            }
316            for (int i = 0; i < reader.getNamespaceCount(); i++) {
317                m.put(reader.getNamespacePrefix(i), reader.getNamespaceURI(i));
318            }
319            namespaces.add(m);
320        }
321
322        private void popNamespaces() {
323            namespaces.remove(namespaces.size() - 1);
324        }
325
326        private Map<String, String> getCurrentNamespaceBindings() {
327            return namespaces.get(namespaces.size() - 1);
328        }
329
330        private void readCurrent(boolean incl) throws XMLStreamException {
331            int d = depth;
332            while (d <= depth) {
333                int code = reader.next();
334                if (code == XMLStreamConstants.START_ELEMENT) {
335                    depth++;
336                } else if (code == XMLStreamConstants.END_ELEMENT) {
337                    depth--;
338                }
339            }
340            // either look ahead to the next token or stay at the end element token
341            if (incl) {
342                code = reader.next();
343            } else {
344                code = reader.getEventType();
345                if (code == XMLStreamConstants.END_ELEMENT) {
346                    // revert the depth count to avoid double counting the up event
347                    depth++;
348                }
349            }
350        }
351
352        private String getCurrentToken() throws XMLStreamException {
353            readCurrent(true);
354            popName();
355            
356            String token = createContextualToken(getCurrentText());
357            if (mode == 'i') {
358                popNamespaces();
359            }
360            
361            return token;
362        }
363
364        private String createContextualToken(String token) {
365            StringBuilder sb = new StringBuilder();
366            if (mode == 'w' && group == 1) {
367                for (int i = 0; i < segments.size(); i++) {
368                    sb.append(segments.get(i));
369                }
370                sb.append(token);
371                for (int i = path.size() - 1; i >= 0; i--) {
372                    QName q = path.get(i);
373                    sb.append("</").append(makeName(q)).append(">");
374                }
375
376            } else if (mode == 'i') {
377                final String stag = token.substring(0, token.indexOf('>') + 1);
378                Set<String> skip = new HashSet<String>();
379                Matcher matcher = NAMESPACE_PATTERN.matcher(stag);
380                char quote = 0;
381                while (matcher.find()) {
382                    String prefix = matcher.group(1);
383                    if (prefix.length() > 0) {
384                        prefix = prefix.substring(1);
385                    }
386                    skip.add(prefix);
387                    if (quote == 0) {
388                        quote = matcher.group(2).charAt(0);
389                    }
390                }
391                if (quote == 0) {
392                    quote = '"';
393                }
394                boolean empty = stag.endsWith("/>"); 
395                sb.append(token.substring(0, stag.length() - (empty ? 2 : 1)));
396                for (Entry<String, String> e : getCurrentNamespaceBindings().entrySet()) {
397                    if (!skip.contains(e.getKey())) {
398                        sb.append(e.getKey().length() == 0 ? " xmlns" : " xmlns:")
399                            .append(e.getKey()).append("=").append(quote).append(e.getValue()).append(quote);
400                    }
401                }
402                sb.append(token.substring(stag.length() - (empty ? 2 : 1)));
403            } else if (mode == 'u') {
404                int bp = token.indexOf(">");
405                int ep = token.lastIndexOf("</");
406                if (bp > 0 && ep > 0) {
407                    sb.append(token.substring(bp + 1, ep));
408                }
409            } else if (mode == 't') {
410                int bp = 0;
411                for (;;) {
412                    int ep = token.indexOf('>', bp);
413                    bp = token.indexOf('<', ep);
414                    if (bp < 0) {
415                        break;
416                    }
417                    sb.append(token.substring(ep + 1, bp));
418                }
419            } else {
420                return token;
421            }
422
423            return sb.toString();
424        }
425
426        private String getGroupedToken() {
427            StringBuilder sb = new StringBuilder();
428            if (mode == 'w') {
429                 // for wrapped
430                for (int i = 0; i < segments.size(); i++) {
431                    sb.append(segments.get(i));
432                }
433                for (String s : tokens) {
434                    sb.append(s);
435                }
436                for (int i = path.size() - 1; i >= 0; i--) {
437                    QName q = path.get(i);
438                    sb.append("</").append(makeName(q)).append(">");
439                }
440            } else {
441                // for injected, unwrapped, text
442                sb.append("<group>");
443                for (String s : tokens) {
444                    sb.append(s);
445                }
446                sb.append("</group>");
447            }
448            tokens.clear();
449            return sb.toString();
450        }
451        
452        private String getNextToken() throws XMLStreamException {
453            int xcode = 0;
454            while (xcode != XMLStreamConstants.END_DOCUMENT) {
455                xcode = readNext();
456
457                switch (xcode) {
458                case XMLStreamConstants.START_ELEMENT:
459                    depth++;
460                    QName name = reader.getName();
461                    if (LOG.isTraceEnabled()) {
462                        LOG.trace("se={}; depth={}; trackdepth={}", new Object[]{name, depth, trackdepth});
463                    }
464                    
465                    String token = getCurrentText();
466                    // perform the second compliance test
467                    if (!compliant) {
468                        if (token != null && token.startsWith("<") && !token.startsWith("<?")) {
469                            LOG.error("XMLStreamReader {} not supporting Location");
470                            throw new XMLStreamException("reader not supporting Location");
471                        }
472                        compliant = true;
473                    }
474
475                    LOG.trace("token={}", token);
476                    if (!backtrack && mode == 'w') {
477                        pushSegment(name, token);
478                    }
479                    pushName(name);
480                    if (mode == 'i') {
481                        pushNamespaces(reader);
482                    }
483                    backtrack = false;
484                    if (current().matches(name)) {
485                        // mark the position of the match in the segments list
486                        if (isBottom()) {
487                            // final match
488                            token = getCurrentToken();
489                            backtrack = true;
490                            trackdepth = depth;
491                            if (group > 1) {
492                                tokens.add(token);
493                                if (group == tokens.size()) {
494                                    return getGroupedToken();
495                                }
496                            } else {
497                                return token;    
498                            }
499                        } else {
500                            // intermediary match
501                            down();
502                        }
503                    } else if (isDoS()) {
504                        // continue
505                    } else {
506                        // skip
507                        readCurrent(false);
508                    }
509                    break;
510                case XMLStreamConstants.END_ELEMENT:
511                    if ((backtrack || (trackdepth > 0 && depth == trackdepth))
512                        && (mode == 'w' && group > 1 && tokens.size() > 0)) {
513                        // flush the left over using the current context
514                        code = XMLStreamConstants.END_ELEMENT;
515                        return getGroupedToken();
516                    }
517
518                    depth--;
519                    QName endname = reader.getName();
520                    LOG.trace("ee={}", endname);
521                    
522                    popName();
523                    if (mode == 'i') {
524                        popNamespaces();
525                    }
526                    
527                    int pc = 0;
528                    if (backtrack || (trackdepth > 0 && depth == trackdepth - 1)) {
529                        // reactive backtrack if not backtracking and update the track depth
530                        backtrack = true;
531                        trackdepth--;
532                        if (mode == 'w') {
533                            while (!endname.equals(peekLog())) {
534                                pc++;
535                                popLog();
536                            }
537                        }
538                    }
539
540                    if (backtrack) {
541                        if (mode == 'w') {
542                            for (int i = 0; i < pc; i++) {
543                                popSegment();
544                            }
545                        }
546
547                        if ((ancestor() == null && !isTop())
548                            || (ancestor() != null && ancestor().matches(endname))) {
549                            up();
550                        }
551                    }
552                    break;
553                case XMLStreamConstants.END_DOCUMENT:
554                    LOG.trace("depth={}", depth);
555                    if (group > 1 && tokens.size() > 0) {
556                        // flush the left over before really going EoD
557                        code = XMLStreamConstants.END_DOCUMENT;
558                        return getGroupedToken();
559                    }
560                    break;
561                default:
562                    break;
563                }
564            }
565            return null;
566        }
567
568        private static String makeName(QName qname) {
569            String pfx = qname.getPrefix();
570            return pfx.length() == 0 ? qname.getLocalPart() : qname.getPrefix() + ":" + qname.getLocalPart();
571        }
572
573        @Override
574        public boolean hasNext() {
575            return nextToken != null;
576        }
577
578        @Override
579        public Object next() {
580            Object o = nextToken;
581            try {
582                nextToken = getNextToken();
583            } catch (XMLStreamException e) {
584                nextToken = null;
585                throw new RuntimeException(e);
586            }
587            return o;
588        }
589
590        @Override
591        public void remove() {
592            // noop
593        }
594
595        @Override
596        public void close() throws IOException {
597            try {
598                reader.close();
599            } catch (Exception e) {
600                // ignore
601            }
602            // need to close the original input stream as well as the reader do not delegate close it
603            if (originalInputStream != null) {
604                IOHelper.close(originalInputStream);
605            }
606        }
607    }
608
609    static class AttributedQName extends QName {
610        private static final long serialVersionUID = 9878370226894144L;
611        private Pattern lcpattern;
612        private boolean nsany;
613        
614        AttributedQName(String localPart) {
615            super(localPart);
616            checkWildcard("", localPart);
617        }
618
619        AttributedQName(String namespaceURI, String localPart, String prefix) {
620            super(namespaceURI, localPart, prefix);
621            checkWildcard(namespaceURI, localPart);
622        }
623
624        AttributedQName(String namespaceURI, String localPart) {
625            super(namespaceURI, localPart);
626            checkWildcard(namespaceURI, localPart);
627        }
628
629        public boolean matches(QName qname) {
630            return (nsany || getNamespaceURI().equals(qname.getNamespaceURI()))
631                && (lcpattern != null 
632                ? lcpattern.matcher(qname.getLocalPart()).matches() 
633                : getLocalPart().equals(qname.getLocalPart()));
634        }
635        
636        private void checkWildcard(String nsa, String lcp) {
637            nsany = "*".equals(nsa);
638            boolean wc = false;
639            for (int i = 0; i < lcp.length(); i++) {
640                char c = lcp.charAt(i);
641                if (c == '?' || c == '*') {
642                    wc = true;
643                    break;
644                }
645            }
646            if (wc) {
647                StringBuilder sb = new StringBuilder();
648                for (int i = 0; i < lcp.length(); i++) {
649                    char c = lcp.charAt(i);
650                    switch (c) {
651                    case '.':
652                        sb.append("\\.");
653                        break;
654                    case '*':
655                        sb.append(".*");
656                        break;
657                    case '?':
658                        sb.append('.');
659                        break;
660                    default:
661                        sb.append(c);
662                        break;
663                    }
664                }
665                lcpattern = Pattern.compile(sb.toString());
666            }
667        }
668    }
669}