001/**
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.camel.util;
018
019import java.io.IOException;
020import java.io.InputStream;
021import java.io.StringReader;
022import java.util.Stack;
023import javax.xml.parsers.DocumentBuilder;
024import javax.xml.parsers.DocumentBuilderFactory;
025import javax.xml.parsers.SAXParser;
026import javax.xml.parsers.SAXParserFactory;
027
028import org.w3c.dom.Document;
029import org.w3c.dom.Element;
030import org.w3c.dom.Node;
031import org.xml.sax.Attributes;
032import org.xml.sax.InputSource;
033import org.xml.sax.Locator;
034import org.xml.sax.SAXException;
035import org.xml.sax.helpers.DefaultHandler;
036
037/**
038 * An XML parser that uses SAX to include line and column number for each XML element in the parsed Document.
039 * <p>
040 * The line number and column number can be obtained from a Node/Element using
041 * <pre>
042 *   String lineNumber = (String) node.getUserData(XmlLineNumberParser.LINE_NUMBER);
043 *   String lineNumberEnd = (String) node.getUserData(XmlLineNumberParser.LINE_NUMBER_END);
044 *   String columnNumber = (String) node.getUserData(XmlLineNumberParser.COLUMN_NUMBER);
045 *   String columnNumberEnd = (String) node.getUserData(XmlLineNumberParser.COLUMN_NUMBER_END);
046 * </pre>
047 */
048public final class XmlLineNumberParser {
049
050    public static final String LINE_NUMBER = "lineNumber";
051    public static final String COLUMN_NUMBER = "colNumber";
052    public static final String LINE_NUMBER_END = "lineNumberEnd";
053    public static final String COLUMN_NUMBER_END = "colNumberEnd";
054
055    /**
056     * Allows to plugin a custom text transformer in the parser, that can transform all the text content
057     */
058    public interface XmlTextTransformer {
059
060        String transform(String text);
061
062    }
063
064    private XmlLineNumberParser() {
065    }
066
067    /**
068     * Parses the XML.
069     *
070     * @param is the XML content as an input stream
071     * @return the DOM model
072     * @throws Exception is thrown if error parsing
073     */
074    public static Document parseXml(final InputStream is) throws Exception {
075        return parseXml(is, new NoopTransformer());
076    }
077
078    /**
079     * Parses the XML.
080     *
081     * @param is the XML content as an input stream
082     * @return the DOM model
083     * @throws Exception is thrown if error parsing
084     */
085    public static Document parseXml(final InputStream is, final XmlTextTransformer transformer) throws Exception {
086        ObjectHelper.notNull(is, "is");
087        ObjectHelper.notNull(transformer, "transformer");
088
089        final Document doc;
090        SAXParser parser;
091        final SAXParserFactory factory = SAXParserFactory.newInstance();
092        parser = factory.newSAXParser();
093        final DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
094        // turn off validator and loading external dtd
095        dbf.setValidating(false);
096        dbf.setNamespaceAware(true);
097        dbf.setFeature("http://xml.org/sax/features/namespaces", false);
098        dbf.setFeature("http://xml.org/sax/features/validation", false);
099        dbf.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false);
100        dbf.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
101        dbf.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
102        dbf.setFeature("http://xml.org/sax/features/external-general-entities", false);
103        final DocumentBuilder docBuilder = dbf.newDocumentBuilder();
104        doc = docBuilder.newDocument();
105
106        final Stack<Element> elementStack = new Stack<Element>();
107        final StringBuilder textBuffer = new StringBuilder();
108        final DefaultHandler handler = new DefaultHandler() {
109            private Locator locator;
110            private boolean found;
111
112            @Override
113            public void setDocumentLocator(final Locator locator) {
114                this.locator = locator; // Save the locator, so that it can be used later for line tracking when traversing nodes.
115            }
116
117            @Override
118            public void startElement(final String uri, final String localName, final String qName, final Attributes attributes) throws SAXException {
119                addTextIfNeeded();
120
121                Element el = doc.createElement(qName);
122
123                for (int i = 0; i < attributes.getLength(); i++) {
124                    el.setAttribute(transformer.transform(attributes.getQName(i)), transformer.transform(attributes.getValue(i)));
125                }
126
127                el.setUserData(LINE_NUMBER, String.valueOf(this.locator.getLineNumber()), null);
128                el.setUserData(COLUMN_NUMBER, String.valueOf(this.locator.getColumnNumber()), null);
129                elementStack.push(el);
130            }
131
132            @Override
133            public void endElement(final String uri, final String localName, final String qName) {
134                addTextIfNeeded();
135
136                final Element closedEl = elementStack.isEmpty() ? null : elementStack.pop();
137                if (closedEl != null) {
138                    if (elementStack.isEmpty()) {
139                        // Is this the root element?
140                        doc.appendChild(closedEl);
141                    } else {
142                        final Element parentEl = elementStack.peek();
143                        parentEl.appendChild(closedEl);
144                    }
145
146                    closedEl.setUserData(LINE_NUMBER_END, String.valueOf(this.locator.getLineNumber()), null);
147                    closedEl.setUserData(COLUMN_NUMBER_END, String.valueOf(this.locator.getColumnNumber()), null);
148                }
149            }
150
151            @Override
152            public void characters(final char ch[], final int start, final int length) throws SAXException {
153                char[] chars = new char[length];
154                System.arraycopy(ch, start, chars, 0, length);
155                String s = new String(chars);
156                s = transformer.transform(s);
157                textBuffer.append(s);
158            }
159
160            @Override
161            public InputSource resolveEntity(String publicId, String systemId) throws IOException, SAXException {
162                // do not resolve external dtd
163                return new InputSource(new StringReader(""));
164            }
165
166            // Outputs text accumulated under the current node
167            private void addTextIfNeeded() {
168                if (textBuffer.length() > 0) {
169                    final Element el = elementStack.isEmpty() ? null : elementStack.peek();
170                    if (el != null) {
171                        final Node textNode = doc.createTextNode(textBuffer.toString());
172                        el.appendChild(textNode);
173                        textBuffer.delete(0, textBuffer.length());
174                    }
175                }
176            }
177        };
178        parser.parse(is, handler);
179
180        return doc;
181    }
182
183    private static final class NoopTransformer implements XmlTextTransformer {
184
185        @Override
186        public String transform(String text) {
187            return text;
188        }
189
190    }
191
192}