@ -30,59 +30,43 @@ import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler ;
import org.xml.sax.helpers.XMLReaderFactory ;
import javax.xml.parsers.SAXParserFactory ;
/ * *
* < p >
* < code > SAXReader < / code > creates a DOM4J tree from SAX parsing events .
* < / p >
*
* < p >
* The actual SAX parser that is used by this class is configurable so you can
* use your favourite SAX parser if you wish . DOM4J comes configured with its
* own SAX parser so you do not need to worry about configuring the SAX parser .
* < / p >
*
* < p >
* To explicitly configure the SAX parser that is used via Java code you can use
* a constructor or use the { @link # setXMLReader ( XMLReader ) } or { @link
* # setXMLReaderClassName ( String ) } methods .
* < / p >
*
* < p >
* If the parser is not specified explicitly then the standard SAX policy of
* using the < code > org . xml . sax . driver < / code > system property is used to
* determine the implementation class of { @link XMLReader } .
* < / p >
*
* < p >
* If the < code > org . xml . sax . driver < / code > system property is not defined then
* JAXP is used via reflection ( so that DOM4J is not explicitly dependent on the
* JAXP classes ) to load the JAXP configured SAXParser . If there is any error
* creating a JAXP SAXParser an informational message is output and then the
* default ( Aelfred ) SAX parser is used instead .
* < / p >
*
* < p >
* If you are trying to use JAXP to explicitly set your SAX parser and are
* experiencing problems , you can turn on verbose error reporting by defining
* the system property < code > org . dom4j . verbose < / code > to be "true" which will
* output a more detailed description of why JAXP could not find a SAX parser
* < / p >
*
* < p >
* For more information on JAXP please go to < a
* href = "http://java.sun.com/xml/" > Sun ' s Java & amp ; XML site < / a >
* < / p >
*
* @author < a href = "mailto:james.strachan@metastuff.com" > James Strachan < / a >
* @version $Revision$
* @version $Revision : 1 . 58 $
* /
public class SAXReader {
private static final String SAX_STRING_INTERNING =
"http://xml.org/sax/features/string-interning" ;
private static final String SAX_NAMESPACE_PREFIXES =
"http://xml.org/sax/features/namespace-prefixes" ;
private static final String SAX_NAMESPACES =
"http://xml.org/sax/features/namespaces" ;
private static final String SAX_DECL_HANDLER =
"http://xml.org/sax/properties/declaration-handler" ;
private static final String SAX_LEXICAL_HANDLER =
@ -90,62 +74,151 @@ public class SAXReader {
private static final String SAX_LEXICALHANDLER =
"http://xml.org/sax/handlers/LexicalHandler" ;
/** <code>DocumentFactory</code> used to create new document objects */
/ * *
* < code > DocumentFactory < / code > used to create new document objects
* /
private DocumentFactory factory ;
/** <code>XMLReader</code> used to parse the SAX events */
/ * *
* < code > XMLReader < / code > used to parse the SAX events
* /
private XMLReader xmlReader ;
/** Whether validation should occur */
/ * *
* Whether validation should occur
* /
private boolean validating ;
/** DispatchHandler to call when each <code>Element</code> is encountered */
/ * *
* DispatchHandler to call when each < code > Element < / code > is encountered
* /
private DispatchHandler dispatchHandler ;
/** ErrorHandler class to use */
/ * *
* ErrorHandler class to use
* /
private ErrorHandler errorHandler ;
/** The entity resolver */
/ * *
* The entity resolver
* /
private EntityResolver entityResolver ;
/** Should element & attribute names and namespace URIs be interned? */
/ * *
* Should element & attribute names and namespace URIs be interned ?
* /
private boolean stringInternEnabled = true ;
/** Should internal DTD declarations be expanded into a List in the DTD */
/ * *
* Should internal DTD declarations be expanded into a List in the DTD
* /
private boolean includeInternalDTDDeclarations = false ;
/** Should external DTD declarations be expanded into a List in the DTD */
/ * *
* Should external DTD declarations be expanded into a List in the DTD
* /
private boolean includeExternalDTDDeclarations = false ;
/** Whether adjacent text nodes should be merged */
/ * *
* Whether adjacent text nodes should be merged
* /
private boolean mergeAdjacentText = false ;
/** Holds value of property stripWhitespaceText. */
/ * *
* Holds value of property stripWhitespaceText .
* /
private boolean stripWhitespaceText = false ;
/** Should we ignore comments */
/ * *
* Should we ignore comments
* /
private boolean ignoreComments = false ;
/** Encoding of InputSource - null means system default encoding */
/ * *
* Encoding of InputSource - null means system default encoding
* /
private String encoding = null ;
// private boolean includeExternalGeneralEntities = false;
// private boolean includeExternalParameterEntities = false;
/** The SAX filter used to filter SAX events */
/ * *
* The SAX filter used to filter SAX events
*
* @since 2 . 1 . 2
* /
private XMLFilter xmlFilter ;
public static SAXReader createDefault ( ) {
SAXReader reader = new SAXReader ( ) ;
try {
reader . setFeature ( "http://apache.org/xml/features/nonvalidating/load-external-dtd" , false ) ;
reader . setFeature ( "http://xml.org/sax/features/external-general-entities" , false ) ;
reader . setFeature ( "http://xml.org/sax/features/external-parameter-entities" , false ) ;
} catch ( SAXException e ) {
// nothing to do, incompatible reader
}
return reader ;
}
/ * *
* This method internally calls { @link SAXParserFactory } { @code . newInstance ( ) . newSAXParser ( ) . getXMLReader ( ) } or { @link XMLReaderFactory # createXMLReader ( ) } .
* Be sure to configure returned reader if the default configuration does not suit you . Consider setting the following properties :
*
* < pre >
* reader . setFeature ( "http://apache.org/xml/features/nonvalidating/load-external-dtd" , false ) ;
* reader . setFeature ( "http://xml.org/sax/features/external-general-entities" , false ) ;
* reader . setFeature ( "http://xml.org/sax/features/external-parameter-entities" , false ) ;
* < / pre >
* /
public SAXReader ( ) {
}
/ * *
* This method internally calls { @link SAXParserFactory } { @code . newInstance ( ) . newSAXParser ( ) . getXMLReader ( ) } or { @link XMLReaderFactory # createXMLReader ( ) } .
* Be sure to configure returned reader if the default configuration does not suit you . Consider setting the following properties :
*
* < pre >
* reader . setFeature ( "http://apache.org/xml/features/nonvalidating/load-external-dtd" , false ) ;
* reader . setFeature ( "http://xml.org/sax/features/external-general-entities" , false ) ;
* reader . setFeature ( "http://xml.org/sax/features/external-parameter-entities" , false ) ;
* < / pre >
*
* @param validating
* /
public SAXReader ( boolean validating ) {
this . validating = validating ;
}
/ * *
* This method internally calls { @link SAXParserFactory } { @code . newInstance ( ) . newSAXParser ( ) . getXMLReader ( ) } or { @link XMLReaderFactory # createXMLReader ( ) } .
* Be sure to configure returned reader if the default configuration does not suit you . Consider setting the following properties :
*
* < pre >
* reader . setFeature ( "http://apache.org/xml/features/nonvalidating/load-external-dtd" , false ) ;
* reader . setFeature ( "http://xml.org/sax/features/external-general-entities" , false ) ;
* reader . setFeature ( "http://xml.org/sax/features/external-parameter-entities" , false ) ;
* < / pre >
*
* @param factory
* /
public SAXReader ( DocumentFactory factory ) {
this . factory = factory ;
}
/ * *
* This method internally calls { @link SAXParserFactory } { @code . newInstance ( ) . newSAXParser ( ) . getXMLReader ( ) } or { @link XMLReaderFactory # createXMLReader ( ) } .
* Be sure to configure returned reader if the default configuration does not suit you . Consider setting the following properties :
*
* < pre >
* reader . setFeature ( "http://apache.org/xml/features/nonvalidating/load-external-dtd" , false ) ;
* reader . setFeature ( "http://xml.org/sax/features/external-general-entities" , false ) ;
* reader . setFeature ( "http://xml.org/sax/features/external-parameter-entities" , false ) ;
* < / pre >
*
* @param factory
* @param validating
* /
public SAXReader ( DocumentFactory factory , boolean validating ) {
this . factory = factory ;
this . validating = validating ;
@ -185,13 +258,9 @@ public class SAXReader {
* this method is to correctly configure an XMLReader object instance and
* call the { @link # setXMLReader ( XMLReader ) } method
*
* @param name
* is the SAX property name
* @param value
* is the value of the SAX property
*
* @throws SAXException
* if the XMLReader could not be created or the property could
* @param name is the SAX property name
* @param value is the value of the SAX property
* @throws SAXException if the XMLReader could not be created or the property could
* not be changed .
* /
public void setProperty ( String name , Object value ) throws SAXException {
@ -205,13 +274,9 @@ public class SAXReader {
* calling this method is to correctly configure an XMLReader object
* instance and call the { @link # setXMLReader ( XMLReader ) } method
*
* @param name
* is the SAX feature name
* @param value
* is the value of the SAX feature
*
* @throws SAXException
* if the XMLReader could not be created or the feature could
* @param name is the SAX feature name
* @param value is the value of the SAX feature
* @throws SAXException if the XMLReader could not be created or the feature could
* not be changed .
* /
public void setFeature ( String name , boolean value ) throws SAXException {
@ -223,13 +288,9 @@ public class SAXReader {
* Reads a Document from the given < code > File < / code >
* < / p >
*
* @param file
* is the < code > File < / code > to read from .
*
* @param file is the < code > File < / code > to read from .
* @return the newly created Document instance
*
* @throws DocumentException
* if an error occurs during parsing .
* @throws DocumentException if an error occurs during parsing .
* /
public Document read ( File file ) throws DocumentException {
try {
@ -272,13 +333,9 @@ public class SAXReader {
* Reads a Document from the given < code > URL < / code > using SAX
* < / p >
*
* @param url
* < code > URL < / code > to read from .
*
* @param url < code > URL < / code > to read from .
* @return the newly created Document instance
*
* @throws DocumentException
* if an error occurs during parsing .
* @throws DocumentException if an error occurs during parsing .
* /
public Document read ( URL url ) throws DocumentException {
String systemID = url . toExternalForm ( ) ;
@ -304,13 +361,9 @@ public class SAXReader {
* String } to denote the source of the document .
* < / p >
*
* @param systemId
* is a URL for a document or a file name .
*
* @param systemId is a URL for a document or a file name .
* @return the newly created Document instance
*
* @throws DocumentException
* if an error occurs during parsing .
* @throws DocumentException if an error occurs during parsing .
* /
public Document read ( String systemId ) throws DocumentException {
InputSource source = new InputSource ( systemId ) ;
@ -326,13 +379,9 @@ public class SAXReader {
* Reads a Document from the given stream using SAX
* < / p >
*
* @param in
* < code > InputStream < / code > to read from .
*
* @param in < code > InputStream < / code > to read from .
* @return the newly created Document instance
*
* @throws DocumentException
* if an error occurs during parsing .
* @throws DocumentException if an error occurs during parsing .
* /
public Document read ( InputStream in ) throws DocumentException {
InputSource source = new InputSource ( in ) ;
@ -344,17 +393,11 @@ public class SAXReader {
}
/ * *
* < p >
* Reads a Document from the given < code > Reader < / code > using SAX
* < / p >
*
* @param reader
* is the reader for the input
*
* @param reader is the reader for the input
* @return the newly created Document instance
*
* @throws DocumentException
* if an error occurs during parsing .
* @throws DocumentException if an error occurs during parsing .
* /
public Document read ( Reader reader ) throws DocumentException {
InputSource source = new InputSource ( reader ) ;
@ -370,15 +413,10 @@ public class SAXReader {
* Reads a Document from the given stream using SAX
* < / p >
*
* @param in
* < code > InputStream < / code > to read from .
* @param systemId
* is the URI for the input
*
* @param in < code > InputStream < / code > to read from .
* @param systemId is the URI for the input
* @return the newly created Document instance
*
* @throws DocumentException
* if an error occurs during parsing .
* @throws DocumentException if an error occurs during parsing .
* /
public Document read ( InputStream in , String systemId )
throws DocumentException {
@ -396,15 +434,10 @@ public class SAXReader {
* Reads a Document from the given < code > Reader < / code > using SAX
* < / p >
*
* @param reader
* is the reader for the input
* @param systemId
* is the URI for the input
*
* @param reader is the reader for the input
* @param systemId is the URI for the input
* @return the newly created Document instance
*
* @throws DocumentException
* if an error occurs during parsing .
* @throws DocumentException if an error occurs during parsing .
* /
public Document read ( Reader reader , String systemId )
throws DocumentException {
@ -422,13 +455,9 @@ public class SAXReader {
* Reads a Document from the given < code > InputSource < / code > using SAX
* < / p >
*
* @param in
* < code > InputSource < / code > to read from .
*
* @param in < code > InputSource < / code > to read from .
* @return the newly created Document instance
*
* @throws DocumentException
* if an error occurs during parsing .
* @throws DocumentException if an error occurs during parsing .
* /
public Document read ( InputSource in ) throws DocumentException {
try {
@ -502,8 +531,7 @@ public class SAXReader {
/ * *
* Sets the validation mode .
*
* @param validation
* indicates whether or not validation should occur .
* @param validation indicates whether or not validation should occur .
* /
public void setValidation ( boolean validation ) {
this . validating = validation ;
@ -523,8 +551,7 @@ public class SAXReader {
* Sets whether internal DTD declarations should be expanded into the
* DocumentType object or not .
*
* @param include
* whether or not DTD declarations should be expanded and
* @param include whether or not DTD declarations should be expanded and
* included into the DocumentType object .
* /
public void setIncludeInternalDTDDeclarations ( boolean include ) {
@ -545,8 +572,7 @@ public class SAXReader {
* Sets whether DTD external declarations should be expanded into the
* DocumentType object or not .
*
* @param include
* whether or not DTD declarations should be expanded and
* @param include whether or not DTD declarations should be expanded and
* included into the DocumentType object .
* /
public void setIncludeExternalDTDDeclarations ( boolean include ) {
@ -554,7 +580,7 @@ public class SAXReader {
}
/ * *
* Sets whether String interning is enabled or disabled for element &
* Sets whether String interning is enabled or disabled for element & amp ;
* attribute names and namespace URIs . This proprety is enabled by default .
*
* @return DOCUMENT ME !
@ -564,11 +590,10 @@ public class SAXReader {
}
/ * *
* Sets whether String interning is enabled or disabled for element &
* Sets whether String interning is enabled or disabled for element & amp ;
* attribute names and namespace URIs
*
* @param stringInternEnabled
* DOCUMENT ME !
* @param stringInternEnabled DOCUMENT ME !
* /
public void setStringInternEnabled ( boolean stringInternEnabled ) {
this . stringInternEnabled = stringInternEnabled ;
@ -587,8 +612,7 @@ public class SAXReader {
* Sets whether or not adjacent text nodes should be merged together when
* parsing .
*
* @param mergeAdjacentText
* New value of property mergeAdjacentText .
* @param mergeAdjacentText New value of property mergeAdjacentText .
* /
public void setMergeAdjacentText ( boolean mergeAdjacentText ) {
this . mergeAdjacentText = mergeAdjacentText ;
@ -608,8 +632,7 @@ public class SAXReader {
* Sets whether whitespace between element start and end tags should be
* ignored .
*
* @param stripWhitespaceText
* New value of property stripWhitespaceText .
* @param stripWhitespaceText New value of property stripWhitespaceText .
* /
public void setStripWhitespaceText ( boolean stripWhitespaceText ) {
this . stripWhitespaceText = stripWhitespaceText ;
@ -627,8 +650,7 @@ public class SAXReader {
/ * *
* Sets whether we should ignore comments or not .
*
* @param ignoreComments
* whether we should ignore comments or not .
* @param ignoreComments whether we should ignore comments or not .
* /
public void setIgnoreComments ( boolean ignoreComments ) {
this . ignoreComments = ignoreComments ;
@ -656,8 +678,7 @@ public class SAXReader {
* { @link DocumentFactory }
* < / p >
*
* @param documentFactory
* < code > DocumentFactory < / code > used to create DOM4J objects
* @param documentFactory < code > DocumentFactory < / code > used to create DOM4J objects
* /
public void setDocumentFactory ( DocumentFactory documentFactory ) {
this . factory = documentFactory ;
@ -676,8 +697,7 @@ public class SAXReader {
* Sets the < code > ErrorHandler < / code > used by the SAX
* < code > XMLReader < / code > .
*
* @param errorHandler
* is the < code > ErrorHandler < / code > used by SAX
* @param errorHandler is the < code > ErrorHandler < / code > used by SAX
* /
public void setErrorHandler ( ErrorHandler errorHandler ) {
this . errorHandler = errorHandler ;
@ -695,8 +715,7 @@ public class SAXReader {
/ * *
* Sets the entity resolver used to resolve entities .
*
* @param entityResolver
* DOCUMENT ME !
* @param entityResolver DOCUMENT ME !
* /
public void setEntityResolver ( EntityResolver entityResolver ) {
this . entityResolver = entityResolver ;
@ -706,9 +725,7 @@ public class SAXReader {
* DOCUMENT ME !
*
* @return the < code > XMLReader < / code > used to parse SAX events
*
* @throws SAXException
* DOCUMENT ME !
* @throws SAXException DOCUMENT ME !
* /
public XMLReader getXMLReader ( ) throws SAXException {
if ( xmlReader = = null ) {
@ -721,8 +738,7 @@ public class SAXReader {
/ * *
* Sets the < code > XMLReader < / code > used to parse SAX events
*
* @param reader
* is the < code > XMLReader < / code > to parse SAX events
* @param reader is the < code > XMLReader < / code > to parse SAX events
* /
public void setXMLReader ( XMLReader reader ) {
this . xmlReader = reader ;
@ -733,7 +749,6 @@ public class SAXReader {
* encoding )
*
* @return encoding used for InputSource
*
* /
public String getEncoding ( ) {
return encoding ;
@ -742,8 +757,7 @@ public class SAXReader {
/ * *
* Sets encoding used for InputSource ( null means system default encoding )
*
* @param encoding
* is encoding used for InputSource
* @param encoding is encoding used for InputSource
* /
public void setEncoding ( String encoding ) {
this . encoding = encoding ;
@ -753,12 +767,9 @@ public class SAXReader {
* Sets the class name of the < code > XMLReader < / code > to be used to parse
* SAX events .
*
* @param xmlReaderClassName
* is the class name of the < code > XMLReader < / code > to parse SAX
* @param xmlReaderClassName is the class name of the < code > XMLReader < / code > to parse SAX
* events
*
* @throws SAXException
* DOCUMENT ME !
* @throws SAXException DOCUMENT ME !
* /
public void setXMLReaderClassName ( String xmlReaderClassName )
throws SAXException {
@ -769,10 +780,8 @@ public class SAXReader {
* Adds the < code > ElementHandler < / code > to be called when the specified
* path is encounted .
*
* @param path
* is the path to be handled
* @param handler
* is the < code > ElementHandler < / code > to be called by the event
* @param path is the path to be handled
* @param handler is the < code > ElementHandler < / code > to be called by the event
* based processor .
* /
public void addHandler ( String path , ElementHandler handler ) {
@ -783,8 +792,7 @@ public class SAXReader {
* Removes the < code > ElementHandler < / code > from the event based processor ,
* for the specified path .
*
* @param path
* is the path to remove the < code > ElementHandler < / code > for .
* @param path is the path to remove the < code > ElementHandler < / code > for .
* /
public void removeHandler ( String path ) {
getDispatchHandler ( ) . removeHandler ( path ) ;
@ -795,8 +803,7 @@ public class SAXReader {
* registered , this will set a default < code > ElementHandler < / code > to be
* called for any path which does < b > NOT < / b > have a handler registered .
*
* @param handler
* is the < code > ElementHandler < / code > to be called by the event
* @param handler is the < code > ElementHandler < / code > to be called by the event
* based processor .
* /
public void setDefaultHandler ( ElementHandler handler ) {
@ -824,8 +831,7 @@ public class SAXReader {
/ * *
* Sets the SAX filter to be used when filtering SAX events
*
* @param filter
* is the SAX filter to use or null to disable filtering
* @param filter is the SAX filter to use or null to disable filtering
* /
public void setXMLFilter ( XMLFilter filter ) {
this . xmlFilter = filter ;
@ -838,9 +844,7 @@ public class SAXReader {
* Installs any XMLFilter objects required to allow the SAX event stream to
* be filtered and preprocessed before it gets to dom4j .
*
* @param reader
* DOCUMENT ME !
*
* @param reader DOCUMENT ME !
* @return the new XMLFilter if applicable or the original XMLReader if no
* filter is being used .
* /
@ -886,9 +890,7 @@ public class SAXReader {
* XMLReader objects
*
* @return DOCUMENT ME !
*
* @throws SAXException
* DOCUMENT ME !
* @throws SAXException DOCUMENT ME !
* /
protected XMLReader createXMLReader ( ) throws SAXException {
return SAXHelper . createXMLReader ( isValidating ( ) ) ;
@ -897,13 +899,9 @@ public class SAXReader {
/ * *
* Configures the XMLReader before use
*
* @param reader
* DOCUMENT ME !
* @param handler
* DOCUMENT ME !
*
* @throws DocumentException
* DOCUMENT ME !
* @param reader DOCUMENT ME !
* @param handler DOCUMENT ME !
* @throws DocumentException DOCUMENT ME !
* /
protected void configureReader ( XMLReader reader , DefaultHandler handler )
throws DocumentException {
@ -918,27 +916,10 @@ public class SAXReader {
SAXHelper . setParserProperty ( reader , SAX_DECL_HANDLER , handler ) ;
}
// configure namespace support
SAXHelper . setParserFeature ( reader , SAX_NAMESPACES , true ) ;
SAXHelper . setParserFeature ( reader , SAX_NAMESPACE_PREFIXES , false ) ;
// string interning
SAXHelper . setParserFeature ( reader , SAX_STRING_INTERNING ,
isStringInternEnabled ( ) ) ;
// external entites
/ *
* SAXHelper . setParserFeature ( reader ,
* "http://xml.org/sax/properties/external-general-entities" ,
* includeExternalGeneralEntities ) ; SAXHelper . setParserFeature ( reader ,
* "http://xml.org/sax/properties/external-parameter-entities" ,
* includeExternalParameterEntities ) ;
* /
// use Locator2 if possible
SAXHelper . setParserFeature ( reader ,
"http://xml.org/sax/features/use-locator2" , true ) ;
try {
// configure validation support
reader . setFeature ( "http://xml.org/sax/features/validation" ,
@ -960,9 +941,7 @@ public class SAXReader {
/ * *
* Factory Method to allow user derived SAXContentHandler objects to be used
*
* @param reader
* DOCUMENT ME !
*
* @param reader DOCUMENT ME !
* @return DOCUMENT ME !
* /
protected SAXContentHandler createContentHandler ( XMLReader reader ) {