001/** 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.camel.support; 018 019import java.io.Closeable; 020import java.io.IOException; 021import java.io.InputStream; 022import java.io.InputStreamReader; 023import java.io.Reader; 024import java.io.UnsupportedEncodingException; 025import java.util.ArrayList; 026import java.util.HashMap; 027import java.util.HashSet; 028import java.util.Iterator; 029import java.util.List; 030import java.util.Map; 031import java.util.Map.Entry; 032import java.util.Set; 033import java.util.regex.Matcher; 034import java.util.regex.Pattern; 035 036import javax.xml.namespace.QName; 037import javax.xml.stream.XMLStreamConstants; 038import javax.xml.stream.XMLStreamException; 039import javax.xml.stream.XMLStreamReader; 040 041import org.apache.camel.Exchange; 042import org.apache.camel.InvalidPayloadException; 043import org.apache.camel.converter.jaxp.StaxConverter; 044import org.apache.camel.spi.NamespaceAware; 045import org.apache.camel.util.IOHelper; 046import org.apache.camel.util.ObjectHelper; 047import org.slf4j.Logger; 048import org.slf4j.LoggerFactory; 049 050/** 051 * An {@link org.apache.camel.language.tokenizer.XMLTokenizeLanguage} based iterator. 052 */ 053public class XMLTokenExpressionIterator extends ExpressionAdapter implements NamespaceAware { 054 protected final String path; 055 protected char mode; 056 protected int group; 057 protected Map<String, String> nsmap; 058 059 public XMLTokenExpressionIterator(String path, char mode) { 060 this(path, mode, 1); 061 } 062 063 public XMLTokenExpressionIterator(String path, char mode, int group) { 064 ObjectHelper.notEmpty(path, "path"); 065 this.path = path; 066 this.mode = mode; 067 this.group = group > 1 ? group : 1; 068 } 069 070 @Override 071 public void setNamespaces(Map<String, String> nsmap) { 072 this.nsmap = nsmap; 073 } 074 075 public void setMode(char mode) { 076 this.mode = mode; 077 } 078 079 public void setMode(String mode) { 080 this.mode = mode != null ? mode.charAt(0) : 0; 081 } 082 083 public int getGroup() { 084 return group; 085 } 086 087 public void setGroup(int group) { 088 this.group = group; 089 } 090 091 protected Iterator<?> createIterator(InputStream in, String charset) throws XMLStreamException, UnsupportedEncodingException { 092 return new XMLTokenIterator(path, nsmap, mode, group, in, charset); 093 } 094 095 protected Iterator<?> createIterator(Reader in) throws XMLStreamException { 096 return new XMLTokenIterator(path, nsmap, mode, group, in); 097 } 098 099 @Override 100 public boolean matches(Exchange exchange) { 101 // as a predicate we must close the stream, as we do not return an iterator that can be used 102 // afterwards to iterate the input stream 103 Object value = doEvaluate(exchange, true); 104 return ObjectHelper.evaluateValuePredicate(value); 105 } 106 107 @Override 108 public Object evaluate(Exchange exchange) { 109 // as we return an iterator to access the input stream, we should not close it 110 return doEvaluate(exchange, false); 111 } 112 113 /** 114 * Strategy to evaluate the exchange 115 * 116 * @param exchange the exchange 117 * @param closeStream whether to close the stream before returning from this method. 118 * @return the evaluated value 119 */ 120 protected Object doEvaluate(Exchange exchange, boolean closeStream) { 121 InputStream in = null; 122 try { 123 in = exchange.getIn().getMandatoryBody(InputStream.class); 124 String charset = IOHelper.getCharsetName(exchange); 125 return createIterator(in, charset); 126 } catch (InvalidPayloadException e) { 127 exchange.setException(e); 128 // must close input stream 129 IOHelper.close(in); 130 return null; 131 } catch (XMLStreamException e) { 132 exchange.setException(e); 133 // must close input stream 134 IOHelper.close(in); 135 return null; 136 } catch (UnsupportedEncodingException e) { 137 exchange.setException(e); 138 // must close input stream 139 IOHelper.close(in); 140 return null; 141 } finally { 142 if (closeStream) { 143 IOHelper.close(in); 144 } 145 } 146 } 147 148 149 static class XMLTokenIterator implements Iterator<Object>, Closeable { 150 private static final Logger LOG = LoggerFactory.getLogger(XMLTokenIterator.class); 151 private static final Pattern NAMESPACE_PATTERN = Pattern.compile("xmlns(:\\w+|)\\s*=\\s*('[^']*'|\"[^\"]*\")"); 152 153 private transient InputStream originalInputStream; 154 155 private AttributedQName[] splitpath; 156 private int index; 157 private char mode; 158 private int group; 159 private RecordableReader in; 160 private XMLStreamReader reader; 161 private List<QName> path; 162 private List<Map<String, String>> namespaces; 163 private List<String> segments; 164 private List<QName> segmentlog; 165 private List<String> tokens; 166 private int code; 167 private int consumed; 168 private boolean backtrack; 169 private int trackdepth = -1; 170 private int depth; 171 private boolean compliant; 172 173 private Object nextToken; 174 175 XMLTokenIterator(String path, Map<String, String> nsmap, char mode, InputStream in, String charset) 176 throws XMLStreamException, UnsupportedEncodingException { 177 // woodstox's getLocation().etCharOffset() does not return the offset correctly for InputStream, so use Reader instead. 178 this(path, nsmap, mode, 1, new InputStreamReader(in, charset)); 179 this.originalInputStream = in; 180 } 181 182 XMLTokenIterator(String path, Map<String, String> nsmap, char mode, int group, InputStream in, String charset) 183 throws XMLStreamException, UnsupportedEncodingException { 184 // woodstox's getLocation().etCharOffset() does not return the offset correctly for InputStream, so use Reader instead. 185 this(path, nsmap, mode, group, new InputStreamReader(in, charset)); 186 this.originalInputStream = in; 187 } 188 189 XMLTokenIterator(String path, Map<String, String> nsmap, char mode, Reader in) throws XMLStreamException { 190 this(path, nsmap, mode, 1, in); 191 } 192 193 XMLTokenIterator(String path, Map<String, String> nsmap, char mode, int group, Reader in) throws XMLStreamException { 194 final String[] sl = path.substring(1).split("/"); 195 this.splitpath = new AttributedQName[sl.length]; 196 for (int i = 0; i < sl.length; i++) { 197 String s = sl[i]; 198 if (s.length() > 0) { 199 int d = s.indexOf(':'); 200 String pfx = d > 0 ? s.substring(0, d) : ""; 201 this.splitpath[i] = 202 new AttributedQName( 203 "*".equals(pfx) ? "*" : nsmap == null ? "" : nsmap.get(pfx), d > 0 ? s.substring(d + 1) : s, pfx); 204 } 205 } 206 207 this.mode = mode != 0 ? mode : 'i'; 208 this.group = group > 0 ? group : 1; 209 this.in = new RecordableReader(in); 210 this.reader = new StaxConverter().createXMLStreamReader(this.in); 211 212 LOG.trace("reader.class: {}", reader.getClass()); 213 // perform the first offset compliance test 214 int coff = reader.getLocation().getCharacterOffset(); 215 if (coff != 0) { 216 LOG.error("XMLStreamReader {} not supporting Location"); 217 throw new XMLStreamException("reader not supporting Location"); 218 } 219 220 this.path = new ArrayList<QName>(); 221 222 // wrapped mode needs the segments and the injected mode needs the namespaces 223 if (this.mode == 'w') { 224 this.segments = new ArrayList<String>(); 225 this.segmentlog = new ArrayList<QName>(); 226 } else if (this.mode == 'i') { 227 this.namespaces = new ArrayList<Map<String, String>>(); 228 } 229 // when grouping the tokens, allocate the storage to temporarily store tokens. 230 if (this.group > 1) { 231 this.tokens = new ArrayList<String>(); 232 } 233 this.nextToken = getNextToken(); 234 } 235 236 private boolean isDoS() { 237 return splitpath[index] == null; 238 } 239 240 private AttributedQName current() { 241 return splitpath[index + (isDoS() ? 1 : 0)]; 242 } 243 244 private AttributedQName ancestor() { 245 return index == 0 ? null : splitpath[index - 1]; 246 } 247 248 private void down() { 249 if (isDoS()) { 250 index++; 251 } 252 index++; 253 } 254 255 private void up() { 256 index--; 257 } 258 259 private boolean isBottom() { 260 return index == splitpath.length - (isDoS() ? 2 : 1); 261 } 262 263 private boolean isTop() { 264 return index == 0; 265 } 266 267 private int readNext() throws XMLStreamException { 268 int c = code; 269 if (c > 0) { 270 code = 0; 271 } else { 272 c = reader.next(); 273 } 274 return c; 275 } 276 277 private String getCurrentText() { 278 int pos = reader.getLocation().getCharacterOffset(); 279 String txt = in.getText(pos - consumed); 280 consumed = pos; 281 // keep recording 282 in.record(); 283 return txt; 284 } 285 286 private void pushName(QName name) { 287 path.add(name); 288 } 289 290 private QName popName() { 291 return path.remove(path.size() - 1); 292 } 293 294 private void pushSegment(QName qname, String token) { 295 segments.add(token); 296 segmentlog.add(qname); 297 } 298 299 private String popSegment() { 300 return segments.remove(segments.size() - 1); 301 } 302 303 private QName peekLog() { 304 return segmentlog.get(segmentlog.size() - 1); 305 } 306 307 private QName popLog() { 308 return segmentlog.remove(segmentlog.size() - 1); 309 } 310 311 private void pushNamespaces(XMLStreamReader reader) { 312 Map<String, String> m = new HashMap<String, String>(); 313 if (namespaces.size() > 0) { 314 m.putAll(namespaces.get(namespaces.size() - 1)); 315 } 316 for (int i = 0; i < reader.getNamespaceCount(); i++) { 317 m.put(reader.getNamespacePrefix(i), reader.getNamespaceURI(i)); 318 } 319 namespaces.add(m); 320 } 321 322 private void popNamespaces() { 323 namespaces.remove(namespaces.size() - 1); 324 } 325 326 private Map<String, String> getCurrentNamespaceBindings() { 327 return namespaces.get(namespaces.size() - 1); 328 } 329 330 private void readCurrent(boolean incl) throws XMLStreamException { 331 int d = depth; 332 while (d <= depth) { 333 int code = reader.next(); 334 if (code == XMLStreamConstants.START_ELEMENT) { 335 depth++; 336 } else if (code == XMLStreamConstants.END_ELEMENT) { 337 depth--; 338 } 339 } 340 // either look ahead to the next token or stay at the end element token 341 if (incl) { 342 code = reader.next(); 343 } else { 344 code = reader.getEventType(); 345 if (code == XMLStreamConstants.END_ELEMENT) { 346 // revert the depth count to avoid double counting the up event 347 depth++; 348 } 349 } 350 } 351 352 private String getCurrentToken() throws XMLStreamException { 353 readCurrent(true); 354 popName(); 355 356 String token = createContextualToken(getCurrentText()); 357 if (mode == 'i') { 358 popNamespaces(); 359 } 360 361 return token; 362 } 363 364 private String createContextualToken(String token) { 365 StringBuilder sb = new StringBuilder(); 366 if (mode == 'w' && group == 1) { 367 for (int i = 0; i < segments.size(); i++) { 368 sb.append(segments.get(i)); 369 } 370 sb.append(token); 371 for (int i = path.size() - 1; i >= 0; i--) { 372 QName q = path.get(i); 373 sb.append("</").append(makeName(q)).append(">"); 374 } 375 376 } else if (mode == 'i') { 377 final String stag = token.substring(0, token.indexOf('>') + 1); 378 Set<String> skip = new HashSet<String>(); 379 Matcher matcher = NAMESPACE_PATTERN.matcher(stag); 380 char quote = 0; 381 while (matcher.find()) { 382 String prefix = matcher.group(1); 383 if (prefix.length() > 0) { 384 prefix = prefix.substring(1); 385 } 386 skip.add(prefix); 387 if (quote == 0) { 388 quote = matcher.group(2).charAt(0); 389 } 390 } 391 if (quote == 0) { 392 quote = '"'; 393 } 394 boolean empty = stag.endsWith("/>"); 395 sb.append(token.substring(0, stag.length() - (empty ? 2 : 1))); 396 for (Entry<String, String> e : getCurrentNamespaceBindings().entrySet()) { 397 if (!skip.contains(e.getKey())) { 398 sb.append(e.getKey().length() == 0 ? " xmlns" : " xmlns:") 399 .append(e.getKey()).append("=").append(quote).append(e.getValue()).append(quote); 400 } 401 } 402 sb.append(token.substring(stag.length() - (empty ? 2 : 1))); 403 } else if (mode == 'u') { 404 int bp = token.indexOf(">"); 405 int ep = token.lastIndexOf("</"); 406 if (bp > 0 && ep > 0) { 407 sb.append(token.substring(bp + 1, ep)); 408 } 409 } else if (mode == 't') { 410 int bp = 0; 411 for (;;) { 412 int ep = token.indexOf('>', bp); 413 bp = token.indexOf('<', ep); 414 if (bp < 0) { 415 break; 416 } 417 sb.append(token.substring(ep + 1, bp)); 418 } 419 } else { 420 return token; 421 } 422 423 return sb.toString(); 424 } 425 426 private String getGroupedToken() { 427 StringBuilder sb = new StringBuilder(); 428 if (mode == 'w') { 429 // for wrapped 430 for (int i = 0; i < segments.size(); i++) { 431 sb.append(segments.get(i)); 432 } 433 for (String s : tokens) { 434 sb.append(s); 435 } 436 for (int i = path.size() - 1; i >= 0; i--) { 437 QName q = path.get(i); 438 sb.append("</").append(makeName(q)).append(">"); 439 } 440 } else { 441 // for injected, unwrapped, text 442 sb.append("<group>"); 443 for (String s : tokens) { 444 sb.append(s); 445 } 446 sb.append("</group>"); 447 } 448 tokens.clear(); 449 return sb.toString(); 450 } 451 452 private String getNextToken() throws XMLStreamException { 453 int xcode = 0; 454 while (xcode != XMLStreamConstants.END_DOCUMENT) { 455 xcode = readNext(); 456 457 switch (xcode) { 458 case XMLStreamConstants.START_ELEMENT: 459 depth++; 460 QName name = reader.getName(); 461 if (LOG.isTraceEnabled()) { 462 LOG.trace("se={}; depth={}; trackdepth={}", new Object[]{name, depth, trackdepth}); 463 } 464 465 String token = getCurrentText(); 466 // perform the second compliance test 467 if (!compliant) { 468 if (token != null && token.startsWith("<") && !token.startsWith("<?")) { 469 LOG.error("XMLStreamReader {} not supporting Location"); 470 throw new XMLStreamException("reader not supporting Location"); 471 } 472 compliant = true; 473 } 474 475 LOG.trace("token={}", token); 476 if (!backtrack && mode == 'w') { 477 pushSegment(name, token); 478 } 479 pushName(name); 480 if (mode == 'i') { 481 pushNamespaces(reader); 482 } 483 backtrack = false; 484 if (current().matches(name)) { 485 // mark the position of the match in the segments list 486 if (isBottom()) { 487 // final match 488 token = getCurrentToken(); 489 backtrack = true; 490 trackdepth = depth; 491 if (group > 1) { 492 tokens.add(token); 493 if (group == tokens.size()) { 494 return getGroupedToken(); 495 } 496 } else { 497 return token; 498 } 499 } else { 500 // intermediary match 501 down(); 502 } 503 } else if (isDoS()) { 504 // continue 505 } else { 506 // skip 507 readCurrent(false); 508 } 509 break; 510 case XMLStreamConstants.END_ELEMENT: 511 if ((backtrack || (trackdepth > 0 && depth == trackdepth)) 512 && (mode == 'w' && group > 1 && tokens.size() > 0)) { 513 // flush the left over using the current context 514 code = XMLStreamConstants.END_ELEMENT; 515 return getGroupedToken(); 516 } 517 518 depth--; 519 QName endname = reader.getName(); 520 LOG.trace("ee={}", endname); 521 522 popName(); 523 if (mode == 'i') { 524 popNamespaces(); 525 } 526 527 int pc = 0; 528 if (backtrack || (trackdepth > 0 && depth == trackdepth - 1)) { 529 // reactive backtrack if not backtracking and update the track depth 530 backtrack = true; 531 trackdepth--; 532 if (mode == 'w') { 533 while (!endname.equals(peekLog())) { 534 pc++; 535 popLog(); 536 } 537 } 538 } 539 540 if (backtrack) { 541 if (mode == 'w') { 542 for (int i = 0; i < pc; i++) { 543 popSegment(); 544 } 545 } 546 547 if ((ancestor() == null && !isTop()) 548 || (ancestor() != null && ancestor().matches(endname))) { 549 up(); 550 } 551 } 552 break; 553 case XMLStreamConstants.END_DOCUMENT: 554 LOG.trace("depth={}", depth); 555 if (group > 1 && tokens.size() > 0) { 556 // flush the left over before really going EoD 557 code = XMLStreamConstants.END_DOCUMENT; 558 return getGroupedToken(); 559 } 560 break; 561 default: 562 break; 563 } 564 } 565 return null; 566 } 567 568 private static String makeName(QName qname) { 569 String pfx = qname.getPrefix(); 570 return pfx.length() == 0 ? qname.getLocalPart() : qname.getPrefix() + ":" + qname.getLocalPart(); 571 } 572 573 @Override 574 public boolean hasNext() { 575 return nextToken != null; 576 } 577 578 @Override 579 public Object next() { 580 Object o = nextToken; 581 try { 582 nextToken = getNextToken(); 583 } catch (XMLStreamException e) { 584 nextToken = null; 585 throw new RuntimeException(e); 586 } 587 return o; 588 } 589 590 @Override 591 public void remove() { 592 // noop 593 } 594 595 @Override 596 public void close() throws IOException { 597 try { 598 reader.close(); 599 } catch (Exception e) { 600 // ignore 601 } 602 // need to close the original input stream as well as the reader do not delegate close it 603 if (originalInputStream != null) { 604 IOHelper.close(originalInputStream); 605 } 606 } 607 } 608 609 static class AttributedQName extends QName { 610 private static final long serialVersionUID = 9878370226894144L; 611 private Pattern lcpattern; 612 private boolean nsany; 613 614 AttributedQName(String localPart) { 615 super(localPart); 616 checkWildcard("", localPart); 617 } 618 619 AttributedQName(String namespaceURI, String localPart, String prefix) { 620 super(namespaceURI, localPart, prefix); 621 checkWildcard(namespaceURI, localPart); 622 } 623 624 AttributedQName(String namespaceURI, String localPart) { 625 super(namespaceURI, localPart); 626 checkWildcard(namespaceURI, localPart); 627 } 628 629 public boolean matches(QName qname) { 630 return (nsany || getNamespaceURI().equals(qname.getNamespaceURI())) 631 && (lcpattern != null 632 ? lcpattern.matcher(qname.getLocalPart()).matches() 633 : getLocalPart().equals(qname.getLocalPart())); 634 } 635 636 private void checkWildcard(String nsa, String lcp) { 637 nsany = "*".equals(nsa); 638 boolean wc = false; 639 for (int i = 0; i < lcp.length(); i++) { 640 char c = lcp.charAt(i); 641 if (c == '?' || c == '*') { 642 wc = true; 643 break; 644 } 645 } 646 if (wc) { 647 StringBuilder sb = new StringBuilder(); 648 for (int i = 0; i < lcp.length(); i++) { 649 char c = lcp.charAt(i); 650 switch (c) { 651 case '.': 652 sb.append("\\."); 653 break; 654 case '*': 655 sb.append(".*"); 656 break; 657 case '?': 658 sb.append('.'); 659 break; 660 default: 661 sb.append(c); 662 break; 663 } 664 } 665 lcpattern = Pattern.compile(sb.toString()); 666 } 667 } 668 } 669}