001/** 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.camel.util; 018 019import java.io.IOException; 020import java.io.InputStream; 021import java.io.StringReader; 022import java.util.Stack; 023 024import javax.xml.parsers.DocumentBuilder; 025import javax.xml.parsers.DocumentBuilderFactory; 026import javax.xml.parsers.SAXParser; 027import javax.xml.parsers.SAXParserFactory; 028 029import org.w3c.dom.Document; 030import org.w3c.dom.Element; 031import org.w3c.dom.Node; 032 033import org.xml.sax.Attributes; 034import org.xml.sax.InputSource; 035import org.xml.sax.Locator; 036import org.xml.sax.SAXException; 037import org.xml.sax.helpers.DefaultHandler; 038 039/** 040 * An XML parser that uses SAX to include line and column number for each XML element in the parsed Document. 041 * <p> 042 * The line number and column number can be obtained from a Node/Element using 043 * <pre> 044 * String lineNumber = (String) node.getUserData(XmlLineNumberParser.LINE_NUMBER); 045 * String lineNumberEnd = (String) node.getUserData(XmlLineNumberParser.LINE_NUMBER_END); 046 * String columnNumber = (String) node.getUserData(XmlLineNumberParser.COLUMN_NUMBER); 047 * String columnNumberEnd = (String) node.getUserData(XmlLineNumberParser.COLUMN_NUMBER_END); 048 * </pre> 049 */ 050public final class XmlLineNumberParser { 051 052 public static final String LINE_NUMBER = "lineNumber"; 053 public static final String COLUMN_NUMBER = "colNumber"; 054 public static final String LINE_NUMBER_END = "lineNumberEnd"; 055 public static final String COLUMN_NUMBER_END = "colNumberEnd"; 056 057 /** 058 * Allows to plugin a custom text transformer in the parser, that can transform all the text content 059 */ 060 public interface XmlTextTransformer { 061 062 String transform(String text); 063 064 } 065 066 private XmlLineNumberParser() { 067 } 068 069 /** 070 * Parses the XML. 071 * 072 * @param is the XML content as an input stream 073 * @return the DOM model 074 * @throws Exception is thrown if error parsing 075 */ 076 public static Document parseXml(final InputStream is) throws Exception { 077 return parseXml(is, null); 078 } 079 080 /** 081 * Parses the XML. 082 * 083 * @param is the XML content as an input stream 084 * @param xmlTransformer the XML transformer 085 * @return the DOM model 086 * @throws Exception is thrown if error parsing 087 */ 088 public static Document parseXml(final InputStream is, final XmlTextTransformer xmlTransformer) throws Exception { 089 return parseXml(is, xmlTransformer, null, null); 090 } 091 092 /** 093 * Parses the XML. 094 * 095 * @param is the XML content as an input stream 096 * @param xmlTransformer the XML transformer 097 * @param rootNames one or more root names that is used as baseline for beginning the parsing, for example camelContext to start parsing 098 * when Camel is discovered. Multiple names can be defined separated by comma 099 * @param forceNamespace an optional namespaces to force assign to each node. This may be needed for JAXB unmarshalling from XML -> POJO. 100 * @return the DOM model 101 * @throws Exception is thrown if error parsing 102 */ 103 public static Document parseXml(final InputStream is, XmlTextTransformer xmlTransformer, String rootNames, final String forceNamespace) throws Exception { 104 ObjectHelper.notNull(is, "is"); 105 106 final XmlTextTransformer transformer = xmlTransformer == null ? new NoopTransformer() : xmlTransformer; 107 final Document doc; 108 SAXParser parser; 109 final SAXParserFactory factory = SAXParserFactory.newInstance(); 110 parser = factory.newSAXParser(); 111 final DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); 112 // turn off validator and loading external dtd 113 dbf.setValidating(false); 114 dbf.setNamespaceAware(true); 115 dbf.setFeature("http://xml.org/sax/features/namespaces", false); 116 dbf.setFeature("http://xml.org/sax/features/validation", false); 117 dbf.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false); 118 dbf.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); 119 dbf.setFeature("http://xml.org/sax/features/external-parameter-entities", false); 120 dbf.setFeature("http://xml.org/sax/features/external-general-entities", false); 121 final DocumentBuilder docBuilder = dbf.newDocumentBuilder(); 122 doc = docBuilder.newDocument(); 123 124 final Stack<Element> elementStack = new Stack<>(); 125 final StringBuilder textBuffer = new StringBuilder(); 126 final DefaultHandler handler = new DefaultHandler() { 127 private Locator locator; 128 private boolean found; 129 130 @Override 131 public void setDocumentLocator(final Locator locator) { 132 this.locator = locator; // Save the locator, so that it can be used later for line tracking when traversing nodes. 133 this.found = rootNames == null; 134 } 135 136 private boolean isRootName(String qName) { 137 for (String root : rootNames.split(",")) { 138 if (qName.equals(root)) { 139 return true; 140 } 141 } 142 return false; 143 } 144 145 @Override 146 public void startElement(final String uri, final String localName, final String qName, final Attributes attributes) throws SAXException { 147 addTextIfNeeded(); 148 149 if (rootNames != null && !found) { 150 if (isRootName(qName)) { 151 found = true; 152 } 153 } 154 155 if (found) { 156 Element el; 157 if (forceNamespace != null) { 158 el = doc.createElementNS(forceNamespace, qName); 159 } else { 160 el = doc.createElement(qName); 161 } 162 163 for (int i = 0; i < attributes.getLength(); i++) { 164 el.setAttribute(transformer.transform(attributes.getQName(i)), transformer.transform(attributes.getValue(i))); 165 } 166 167 el.setUserData(LINE_NUMBER, String.valueOf(this.locator.getLineNumber()), null); 168 el.setUserData(COLUMN_NUMBER, String.valueOf(this.locator.getColumnNumber()), null); 169 elementStack.push(el); 170 } 171 } 172 173 @Override 174 public void endElement(final String uri, final String localName, final String qName) { 175 if (!found) { 176 return; 177 } 178 179 addTextIfNeeded(); 180 181 final Element closedEl = elementStack.isEmpty() ? null : elementStack.pop(); 182 if (closedEl != null) { 183 if (elementStack.isEmpty()) { 184 // Is this the root element? 185 doc.appendChild(closedEl); 186 } else { 187 final Element parentEl = elementStack.peek(); 188 parentEl.appendChild(closedEl); 189 } 190 191 closedEl.setUserData(LINE_NUMBER_END, String.valueOf(this.locator.getLineNumber()), null); 192 closedEl.setUserData(COLUMN_NUMBER_END, String.valueOf(this.locator.getColumnNumber()), null); 193 } 194 } 195 196 @Override 197 public void characters(final char ch[], final int start, final int length) throws SAXException { 198 char[] chars = new char[length]; 199 System.arraycopy(ch, start, chars, 0, length); 200 String s = new String(chars); 201 s = transformer.transform(s); 202 textBuffer.append(s); 203 } 204 205 @Override 206 public InputSource resolveEntity(String publicId, String systemId) throws IOException, SAXException { 207 // do not resolve external dtd 208 return new InputSource(new StringReader("")); 209 } 210 211 // Outputs text accumulated under the current node 212 private void addTextIfNeeded() { 213 if (textBuffer.length() > 0) { 214 final Element el = elementStack.isEmpty() ? null : elementStack.peek(); 215 if (el != null) { 216 final Node textNode = doc.createTextNode(textBuffer.toString()); 217 el.appendChild(textNode); 218 textBuffer.delete(0, textBuffer.length()); 219 } 220 } 221 } 222 }; 223 parser.parse(is, handler); 224 225 return doc; 226 } 227 228 private static final class NoopTransformer implements XmlTextTransformer { 229 230 @Override 231 public String transform(String text) { 232 return text; 233 } 234 235 } 236 237}