1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46 package groovy.util;
47
48 import groovy.xml.QName;
49
50 import java.io.File;
51 import java.io.FileInputStream;
52 import java.io.IOException;
53 import java.io.InputStream;
54 import java.io.Reader;
55 import java.io.StringReader;
56 import java.security.AccessController;
57 import java.security.PrivilegedActionException;
58 import java.security.PrivilegedExceptionAction;
59 import java.util.ArrayList;
60 import java.util.HashMap;
61 import java.util.List;
62 import java.util.Map;
63
64 import javax.xml.parsers.ParserConfigurationException;
65 import javax.xml.parsers.SAXParser;
66 import javax.xml.parsers.SAXParserFactory;
67
68 import org.xml.sax.*;
69
70 /***
71 * A helper class for parsing XML into a tree of Node instances for
72 * a simple way of processing XML. This parser does not preserve the
73 * XML InfoSet - if thats what you need try using W3C DOM, dom4j, JDOM, XOM etc.
74 * This parser ignores comments and processing instructions and converts the
75 * XML into a Node for each element in the XML with attributes
76 * and child Nodes and Strings. This simple model is sufficient for
77 * most simple use cases of processing XML.
78 *
79 * @author <a href="mailto:james@coredevelopers.net">James Strachan</a>
80 * @version $Revision: 1.5 $
81 */
82 public class XmlParser implements ContentHandler {
83
84 private StringBuffer bodyText = new StringBuffer();
85 private List stack = new ArrayList();
86 private Locator locator;
87 private XMLReader reader;
88 private Node parent;
89 private boolean trimWhitespace = true;
90
91 public XmlParser() throws ParserConfigurationException, SAXException {
92 this(false, true);
93 }
94
95 public XmlParser(boolean validating, boolean namespaceAware) throws ParserConfigurationException, SAXException {
96 SAXParserFactory factory = null;
97 try {
98 factory = (SAXParserFactory) AccessController.doPrivileged(new PrivilegedExceptionAction() {
99 public Object run() throws ParserConfigurationException {
100 return SAXParserFactory.newInstance();
101 }
102 });
103 } catch (PrivilegedActionException pae) {
104 Exception e = pae.getException();
105 if (e instanceof ParserConfigurationException) {
106 throw (ParserConfigurationException) e;
107 } else {
108 throw new RuntimeException(e);
109 }
110 }
111 factory.setNamespaceAware(namespaceAware);
112 factory.setValidating(validating);
113
114 SAXParser parser = factory.newSAXParser();
115 reader = parser.getXMLReader();
116 }
117
118 public XmlParser(XMLReader reader) {
119 this.reader = reader;
120 }
121
122 public XmlParser(SAXParser parser) throws SAXException {
123 reader = parser.getXMLReader();
124 }
125
126
127 /***
128 * Parses the content of the given file as XML turning it into a tree
129 * of Nodes
130 */
131 public Node parse(File file) throws IOException, SAXException {
132
133 InputSource input = new InputSource(new FileInputStream(file));
134 input.setSystemId("file://" + file.getAbsolutePath());
135 getXMLReader().parse(input);
136 return parent;
137
138 }
139
140 /***
141 * Parse the content of the specified input source into a tree of Nodes.
142 */
143 public Node parse(InputSource input) throws IOException, SAXException {
144 getXMLReader().parse(input);
145 return parent;
146 }
147
148 /***
149 * Parse the content of the specified input stream into a tree of Nodes.
150 * Note that using this method will not provide the parser with any URI
151 * for which to find DTDs etc
152 */
153 public Node parse(InputStream input) throws IOException, SAXException {
154 InputSource is = new InputSource(input);
155 getXMLReader().parse(is);
156 return parent;
157 }
158
159 /***
160 * Parse the content of the specified reader into a tree of Nodes.
161 * Note that using this method will not provide the parser with any URI
162 * for which to find DTDs etc
163 */
164 public Node parse(Reader in) throws IOException, SAXException {
165 InputSource is = new InputSource(in);
166 getXMLReader().parse(is);
167 return parent;
168 }
169
170 /***
171 * Parse the content of the specified URI into a tree of Nodes
172 */
173 public Node parse(String uri) throws IOException, SAXException {
174 InputSource is = new InputSource(uri);
175 getXMLReader().parse(is);
176 return parent;
177 }
178
179 /***
180 * A helper method to parse the given text as XML
181 *
182 * @param text
183 * @return
184 */
185 public Node parseText(String text) throws IOException, SAXException {
186 return parse(new StringReader(text));
187 }
188
189
190
191
192
193
194 public DTDHandler getDTDHandler() {
195 return this.reader.getDTDHandler();
196 }
197
198
199
200
201 public EntityResolver getEntityResolver() {
202 return this.reader.getEntityResolver();
203 }
204
205
206
207
208 public ErrorHandler getErrorHandler() {
209 return this.reader.getErrorHandler();
210 }
211
212
213
214
215 public boolean getFeature(final String uri) throws SAXNotRecognizedException, SAXNotSupportedException {
216 return this.reader.getFeature(uri);
217 }
218
219
220
221
222 public Object getProperty(final String uri) throws SAXNotRecognizedException, SAXNotSupportedException {
223 return this.reader.getProperty(uri);
224 }
225
226
227
228
229 public void setDTDHandler(final DTDHandler dtdHandler) {
230 this.reader.setDTDHandler(dtdHandler);
231 }
232
233
234
235
236 public void setEntityResolver(final EntityResolver entityResolver) {
237 this.reader.setEntityResolver(entityResolver);
238 }
239
240
241
242
243 public void setErrorHandler(final ErrorHandler errorHandler) {
244 this.reader.setErrorHandler(errorHandler);
245 }
246
247
248
249
250 public void setFeature(final String uri, final boolean value) throws SAXNotRecognizedException, SAXNotSupportedException {
251 this.reader.setFeature(uri, value);
252 }
253
254
255
256
257 public void setProperty(final String uri, final Object value) throws SAXNotRecognizedException, SAXNotSupportedException {
258 this.reader.setProperty(uri, value);
259 }
260
261
262
263 public void startDocument() throws SAXException {
264 parent = null;
265 }
266
267 public void endDocument() throws SAXException {
268 stack.clear();
269 }
270
271 public void startElement(String namespaceURI, String localName, String qName, Attributes list)
272 throws SAXException {
273 addTextToNode();
274
275 Object name = getElementName(namespaceURI, localName, qName);
276
277 int size = list.getLength();
278 Map attributes = new HashMap(size);
279 for (int i = 0; i < size; i++) {
280 Object attributeName = getElementName(list.getURI(i), list.getLocalName(i), list.getQName(i));
281 String value = list.getValue(i);
282 attributes.put(attributeName, value);
283 }
284 parent = new Node(parent, name, attributes, new ArrayList());
285 stack.add(parent);
286 }
287
288 public void endElement(String namespaceURI, String localName, String qName) throws SAXException {
289 addTextToNode();
290
291 if (!stack.isEmpty()) {
292 stack.remove(stack.size() - 1);
293 if (!stack.isEmpty()) {
294 parent = (Node) stack.get(stack.size() - 1);
295 }
296 }
297 }
298
299 public void characters(char buffer[], int start, int length) throws SAXException {
300 bodyText.append(buffer, start, length);
301 }
302
303 public void startPrefixMapping(String prefix, String namespaceURI) throws SAXException {
304 }
305
306 public void endPrefixMapping(String prefix) throws SAXException {
307 }
308
309 public void ignorableWhitespace(char buffer[], int start, int len) throws SAXException {
310 }
311
312 public void processingInstruction(String target, String data) throws SAXException {
313 }
314
315 public Locator getDocumentLocator() {
316 return locator;
317 }
318
319 public void setDocumentLocator(Locator locator) {
320 this.locator = locator;
321 }
322
323 public void skippedEntity(String name) throws SAXException {
324 }
325
326
327
328 protected XMLReader getXMLReader() {
329 reader.setContentHandler(this);
330 return reader;
331 }
332
333 protected void addTextToNode() {
334 String text = bodyText.toString();
335 if (trimWhitespace) {
336 text = text.trim();
337 }
338 if (text.length() > 0) {
339 parent.children().add(text);
340 }
341 bodyText = new StringBuffer();
342 }
343
344 protected Object getElementName(String namespaceURI, String localName, String qName) throws SAXException {
345 String name = localName;
346 if ((name == null) || (name.length() < 1)) {
347 name = qName;
348 }
349 if (namespaceURI == null || namespaceURI.length() <= 0) {
350 return name;
351 }
352 else {
353 return new QName(namespaceURI, name, qName);
354 }
355 }
356 }