Java : DocumentBuilder (XML) with Examples
DocumentBuilder (Java SE 22 & JDK 22) with Examples.
You will find code examples on most DocumentBuilder methods.
Summary
Defines the API to obtain DOM Document instances from an XML document. Using this class, an application programmer can obtain a Document from XML.
final var xml = """
<root>
<child-a>AAA</child-a>
<child-b>BBB</child-b>
</root>
""";
final var factory = DocumentBuilderFactory.newInstance();
final var builder = factory.newDocumentBuilder();
final var document = builder.parse(new ByteArrayInputStream(xml.getBytes()));
final var childA = document.getElementsByTagName("child-a").item(0);
System.out.println(childA); // [child-a: null]
System.out.println(childA.getTextContent()); // AAA
final var childB = document.getElementsByTagName("child-b").item(0);
System.out.println(childB); // [child-b: null]
System.out.println(childB.getTextContent()); // BBB
Code examples on this page use the printDocument method below.
public void printDocument(Document document) throws TransformerException {
final var transformer = TransformerFactory.newInstance().newTransformer();
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
final var docType = document.getDoctype();
if (docType != null) {
final var publicId = docType.getPublicId();
if (publicId != null) {
transformer.setOutputProperty(OutputKeys.DOCTYPE_PUBLIC, publicId);
}
final var systemId = docType.getSystemId();
if (systemId != null) {
transformer.setOutputProperty(OutputKeys.DOCTYPE_SYSTEM, systemId);
}
}
final var result = new StreamResult(new StringWriter());
transformer.transform(new DOMSource(document), result);
System.out.print(result.getWriter());
}
final var builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
final var document = builder.newDocument();
final var root = document.createElement("root");
System.out.println(root); // [root: null]
document.appendChild(root);
final var child = document.createElement("child");
System.out.println(child); // [child: null]
root.appendChild(child);
final var text = document.createTextNode("abcd");
System.out.println(text); // [#text: abcd]
child.appendChild(text);
//<root>
// <child>abcd</child>
//</root>
printDocument(document);
Constructors
DocumentBuilder ()
Protected constructor
protected. I think it's rare to create a subclass of this class. Therefore, the code example is omitted.
Methods
abstract DOMImplementation getDOMImplementation ()
Obtain an instance of a DOMImplementation object.
final var factory = DocumentBuilderFactory.newInstance();
final var builder = factory.newDocumentBuilder();
final var domImpl = builder.getDOMImplementation();
final var docType = domImpl.createDocumentType(
"root", null, "file:///R:/java-work/sample.dtd");
System.out.println(docType); // [root: null]
final var document = domImpl.createDocument(null, "root", docType);
//<!DOCTYPE root SYSTEM "file:///R:/java-work/sample.dtd">
//<root/>
printDocument(document);
Schema getSchema ()
Get a reference to the Schema being used by the XML processor.
final var xsd = """
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
<xsd:element name="root" type="xsd:string"/>
</xsd:schema>
""";
final var schemaFactory = SchemaFactory.newDefaultInstance();
final var schema = schemaFactory.newSchema(
new StreamSource(new ByteArrayInputStream(xsd.getBytes())));
final var factory = DocumentBuilderFactory.newInstance();
factory.setSchema(schema);
final var errorHandler = new DefaultHandler() {
@Override
public void error(SAXParseException e) {
System.out.println("-- ErrorHandler error --");
System.out.println(e);
}
};
{
final var xml = """
<root>abcd</root>
""";
final var builder = factory.newDocumentBuilder();
System.out.println(builder.getSchema().equals(schema)); // true
builder.setErrorHandler(errorHandler);
final var document = builder.parse(new ByteArrayInputStream(xml.getBytes()));
//<root>abcd</root>
printDocument(document);
}
{
final var xml = """
<root><child>abcd</child></root>
""";
final var builder = factory.newDocumentBuilder();
System.out.println(builder.getSchema().equals(schema)); // true
builder.setErrorHandler(errorHandler);
final var document = builder.parse(new ByteArrayInputStream(xml.getBytes()));
// Result
// ↓
//-- ErrorHandler error --
//org.xml.sax.SAXParseException; lineNumber: 1; columnNumber: 33; cvc-type.3.1.2:
// Element 'root' is a simple type, so it must have no element information item [children].
}
abstract boolean isNamespaceAware ()
Indicates whether or not this parser is configured to understand namespaces.
final var xml = """
<ns:root xmlns:ns="sample">
<ns:child/>
</ns:root>
""";
final var factory = DocumentBuilderFactory.newNSInstance();
final var builder = factory.newDocumentBuilder();
System.out.println(builder.isNamespaceAware()); // true
final var document = builder.parse(new ByteArrayInputStream(xml.getBytes()));
final var child = document.getElementsByTagNameNS("sample", "child").item(0);
System.out.println(child); // [ns:child: null]
abstract boolean isValidating ()
Indicates whether or not this parser is configured to validate XML documents.
// The XML document intentionally does not match the DTD.
final var xml = """
<!DOCTYPE root [
<!ELEMENT root (child-a)>
]>
<root><child-z/></root>
""";
final var factory = DocumentBuilderFactory.newInstance();
factory.setValidating(true);
final var builder = factory.newDocumentBuilder();
System.out.println(builder.isValidating()); // true
builder.setErrorHandler(new DefaultHandler() {
@Override
public void error(SAXParseException e) {
System.out.println("-- ErrorHandler error --");
System.out.println(e);
}
});
final var document = builder.parse(new ByteArrayInputStream(xml.getBytes()));
// Result
// ↓
//-- ErrorHandler error --
//org.xml.sax.SAXParseException; lineNumber: 4; columnNumber: 17;
// Element type "child-z" must be declared.
//-- ErrorHandler error --
//org.xml.sax.SAXParseException; lineNumber: 4; columnNumber: 24;
// The content of element type "root" must match "(child-a)".
boolean isXIncludeAware ()
Get the XInclude processing mode for this parser.
final var sampleFile = Path.of("R:", "java-work", "sample.xml");
System.out.println(sampleFile); // R:\java-work\sample.xml
Files.writeString(sampleFile, """
<child>abcd</child>
""");
final var xml = """
<root xmlns:xi="http://www.w3.org/2001/XInclude">
<xi:include href="file:///R:/java-work/sample.xml" parse="xml" />
</root>
""";
final var factory = DocumentBuilderFactory.newNSInstance();
factory.setXIncludeAware(true);
final var builder = factory.newDocumentBuilder();
System.out.println(builder.isXIncludeAware()); // true
final var document = builder.parse(new ByteArrayInputStream(xml.getBytes()));
final var child = document.getElementsByTagName("child").item(0);
System.out.println(child); // [child: null]
System.out.println(child.getTextContent()); // abcd
abstract Document newDocument ()
Obtain a new instance of a DOM Document object to build a DOM tree with.
final var factory = DocumentBuilderFactory.newInstance();
final var builder = factory.newDocumentBuilder();
final var document = builder.newDocument();
final var root = document.createElement("root");
document.appendChild(root);
final var child = document.createElement("child");
root.appendChild(child);
child.setAttribute("aa", "AA");
final var text = document.createTextNode("abcd");
child.appendChild(text);
//<root>
// <child aa="AA">abcd</child>
//</root>
printDocument(document);
Document parse (File f)
Parse the content of the given file as an XML document and return a new DOM Document object.
final var path = Path.of("R:", "java-work", "sample.xml");
System.out.println(path); // R:\java-work\sample.xml
Files.writeString(path, """
<root><child>abcd</child></root>
""");
final var factory = DocumentBuilderFactory.newInstance();
final var builder = factory.newDocumentBuilder();
final var document = builder.parse(path.toFile());
//<root>
// <child>abcd</child>
//</root>
printDocument(document);
Document parse (InputStream is)
Parse the content of the given InputStream as an XML document and return a new DOM Document object.
final var xml = """
<root><child>abcd</child></root>
""";
final var factory = DocumentBuilderFactory.newInstance();
final var builder = factory.newDocumentBuilder();
final var document = builder.parse(new ByteArrayInputStream(xml.getBytes()));
//<root>
// <child>abcd</child>
//</root>
printDocument(document);
Document parse (InputStream is, String systemId)
Parse the content of the given InputStream as an XML document and return a new DOM Document object.
final var xml = """
<root><child>abcd</child></root>
""";
final var factory = DocumentBuilderFactory.newInstance();
final var builder = factory.newDocumentBuilder();
final var baseUri = Path.of("R:", "java-work").toUri();
System.out.println(baseUri); // file:///R:/java-work/
final var document = builder.parse(new ByteArrayInputStream(xml.getBytes()),
baseUri.toString());
System.out.println(document.getBaseURI()); // file:///R:/java-work/
//<root>
// <child>abcd</child>
//</root>
printDocument(document);
Document parse (String uri)
Parse the content of the given URI as an XML document and return a new DOM Document object.
final var path = Path.of("R:", "java-work", "sample.xml");
System.out.println(path); // R:\java-work\sample.xml
Files.writeString(path, """
<root><child>abcd</child></root>
""");
final var uri = path.toUri();
System.out.println(uri); // file:///R:/java-work/sample.xml
final var factory = DocumentBuilderFactory.newInstance();
final var builder = factory.newDocumentBuilder();
final var document = builder.parse(uri.toString());
//<root>
// <child>abcd</child>
//</root>
printDocument(document);
abstract Document parse (InputSource is)
Parse the content of the given input source as an XML document and return a new DOM Document object.
final var xml = """
<root><child>abcd</child></root>
""";
final var factory = DocumentBuilderFactory.newInstance();
final var builder = factory.newDocumentBuilder();
try (final var reader = new StringReader(xml)) {
final var is = new InputSource(reader);
final var document = builder.parse(is);
//<root>
// <child>abcd</child>
//</root>
printDocument(document);
}
void reset ()
Reset this DocumentBuilder to its original configuration.
final var dtdB = Path.of("R:", "java-work", "bbb.dtd");
System.out.println(dtdB); // R:\java-work\bbb.dtd
Files.writeString(dtdB, """
<!ENTITY aaa "bbb">
""");
final var dtdC = Path.of("R:", "java-work", "ccc.dtd");
System.out.println(dtdC); // R:\java-work\ccc.dtd
Files.writeString(dtdC, """
<!ENTITY aaa "ccc">
""");
final var xml = """
<!DOCTYPE root SYSTEM "file:///R:/java-work/bbb.dtd">
<root>&aaa;</root>
""";
final var factory = DocumentBuilderFactory.newInstance();
final var builder = factory.newDocumentBuilder();
builder.setEntityResolver((_, systemId) -> {
if ("file:///R:/java-work/bbb.dtd".equals(systemId)) {
return new InputSource(dtdC.toUri().toString());
} else {
return null;
}
});
final var document1 = builder.parse(new ByteArrayInputStream(xml.getBytes()));
//<!DOCTYPE root SYSTEM "file:///R:/java-work/bbb.dtd">
//<root>ccc</root>
printDocument(document1);
builder.reset();
final var document2 = builder.parse(new ByteArrayInputStream(xml.getBytes()));
//<!DOCTYPE root SYSTEM "file:///R:/java-work/bbb.dtd">
//<root>bbb</root>
printDocument(document2);
abstract void setEntityResolver (EntityResolver er)
Specify the EntityResolver to be used to resolve entities present in the XML document to be parsed.
final var xml = """
<!DOCTYPE root SYSTEM "file:///R:/java-work/xxx.dtd">
<root>&aaa;</root>
""";
final var factory = DocumentBuilderFactory.newInstance();
final var builder = factory.newDocumentBuilder();
builder.setEntityResolver((_, systemId) -> {
if ("file:///R:/java-work/xxx.dtd".equals(systemId)) {
final var dtd = """
<!ENTITY aaa "bbb">
""";
return new InputSource(new ByteArrayInputStream(dtd.getBytes()));
} else {
return null;
}
});
final var document = builder.parse(new ByteArrayInputStream(xml.getBytes()));
//<!DOCTYPE root SYSTEM "file:///R:/java-work/xxx.dtd">
//<root>bbb</root>
printDocument(document);
abstract void setErrorHandler (ErrorHandler eh)
Specify the ErrorHandler to be used by the parser.
// The XML document intentionally does not match the DTD.
final var xml = """
<!DOCTYPE root [
<!ELEMENT root (child-a)>
]>
<root><child-z/></root>
""";
final var factory = DocumentBuilderFactory.newInstance();
factory.setValidating(true);
final var builder = factory.newDocumentBuilder();
System.out.println(builder.isValidating()); // true
builder.setErrorHandler(new DefaultHandler() {
@Override
public void error(SAXParseException e) {
System.out.println("-- ErrorHandler error --");
System.out.println(e);
}
});
final var document = builder.parse(new ByteArrayInputStream(xml.getBytes()));
// Result
// ↓
//-- ErrorHandler error --
//org.xml.sax.SAXParseException; lineNumber: 4; columnNumber: 17;
// Element type "child-z" must be declared.
//-- ErrorHandler error --
//org.xml.sax.SAXParseException; lineNumber: 4; columnNumber: 24;
// The content of element type "root" must match "(child-a)".