Java : DocumentBuilder (XML) with Examples

DocumentBuilder (Java SE 22 & JDK 22) with Examples.
You will find code examples on most DocumentBuilder methods.


Summary

Defines the API to obtain DOM Document instances from an XML document. Using this class, an application programmer can obtain a Document from XML.

Class diagram

final var xml = """
        <root>
            <child-a>AAA</child-a>
            <child-b>BBB</child-b>
        </root>
        """;

final var factory = DocumentBuilderFactory.newInstance();
final var builder = factory.newDocumentBuilder();

final var document = builder.parse(new ByteArrayInputStream(xml.getBytes()));

final var childA = document.getElementsByTagName("child-a").item(0);
System.out.println(childA); // [child-a: null]
System.out.println(childA.getTextContent()); // AAA

final var childB = document.getElementsByTagName("child-b").item(0);
System.out.println(childB); // [child-b: null]
System.out.println(childB.getTextContent()); // BBB

Code examples on this page use the printDocument method below.

public void printDocument(Document document) throws TransformerException {
    final var transformer = TransformerFactory.newInstance().newTransformer();
    transformer.setOutputProperty(OutputKeys.INDENT, "yes");
    transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");

    final var docType = document.getDoctype();
    if (docType != null) {
        final var publicId = docType.getPublicId();
        if (publicId != null) {
            transformer.setOutputProperty(OutputKeys.DOCTYPE_PUBLIC, publicId);
        }
        final var systemId = docType.getSystemId();
        if (systemId != null) {
            transformer.setOutputProperty(OutputKeys.DOCTYPE_SYSTEM, systemId);
        }
    }

    final var result = new StreamResult(new StringWriter());
    transformer.transform(new DOMSource(document), result);

    System.out.print(result.getWriter());
}
final var builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
final var document = builder.newDocument();

final var root = document.createElement("root");
System.out.println(root); // [root: null]

document.appendChild(root);

final var child = document.createElement("child");
System.out.println(child); // [child: null]

root.appendChild(child);

final var text = document.createTextNode("abcd");
System.out.println(text); // [#text: abcd]

child.appendChild(text);

//<root>
//    <child>abcd</child>
//</root>
printDocument(document);

Constructors

DocumentBuilder ()

Protected constructor

protected. I think it's rare to create a subclass of this class. Therefore, the code example is omitted.

Methods

abstract DOMImplementation getDOMImplementation ()

Obtain an instance of a DOMImplementation object.

final var factory = DocumentBuilderFactory.newInstance();
final var builder = factory.newDocumentBuilder();

final var domImpl = builder.getDOMImplementation();
final var docType = domImpl.createDocumentType(
        "root", null, "file:///R:/java-work/sample.dtd");
System.out.println(docType); // [root: null]

final var document = domImpl.createDocument(null, "root", docType);

//<!DOCTYPE root SYSTEM "file:///R:/java-work/sample.dtd">
//<root/>
printDocument(document);

Schema getSchema ()

Get a reference to the Schema being used by the XML processor.

final var xsd = """
        <xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
            <xsd:element name="root" type="xsd:string"/>
        </xsd:schema>
        """;

final var schemaFactory = SchemaFactory.newDefaultInstance();
final var schema = schemaFactory.newSchema(
        new StreamSource(new ByteArrayInputStream(xsd.getBytes())));

final var factory = DocumentBuilderFactory.newInstance();
factory.setSchema(schema);

final var errorHandler = new DefaultHandler() {
    @Override
    public void error(SAXParseException e) {
        System.out.println("-- ErrorHandler error --");
        System.out.println(e);
    }
};

{
    final var xml = """
            <root>abcd</root>
            """;

    final var builder = factory.newDocumentBuilder();
    System.out.println(builder.getSchema().equals(schema)); // true

    builder.setErrorHandler(errorHandler);

    final var document = builder.parse(new ByteArrayInputStream(xml.getBytes()));

    //<root>abcd</root>
    printDocument(document);
}

{
    final var xml = """
            <root><child>abcd</child></root>
            """;

    final var builder = factory.newDocumentBuilder();
    System.out.println(builder.getSchema().equals(schema)); // true

    builder.setErrorHandler(errorHandler);

    final var document = builder.parse(new ByteArrayInputStream(xml.getBytes()));

    // Result
    // ↓
    //-- ErrorHandler error --
    //org.xml.sax.SAXParseException; lineNumber: 1; columnNumber: 33; cvc-type.3.1.2:
    // Element 'root' is a simple type, so it must have no element information item [children].
}

abstract boolean isNamespaceAware ()

Indicates whether or not this parser is configured to understand namespaces.

final var xml = """
        <ns:root xmlns:ns="sample">
            <ns:child/>
        </ns:root>
        """;

final var factory = DocumentBuilderFactory.newNSInstance();

final var builder = factory.newDocumentBuilder();
System.out.println(builder.isNamespaceAware()); // true

final var document = builder.parse(new ByteArrayInputStream(xml.getBytes()));

final var child = document.getElementsByTagNameNS("sample", "child").item(0);
System.out.println(child); // [ns:child: null]

abstract boolean isValidating ()

Indicates whether or not this parser is configured to validate XML documents.

// The XML document intentionally does not match the DTD.
final var xml = """
        <!DOCTYPE root [
            <!ELEMENT root (child-a)>
        ]>
        <root><child-z/></root>
        """;

final var factory = DocumentBuilderFactory.newInstance();
factory.setValidating(true);

final var builder = factory.newDocumentBuilder();
System.out.println(builder.isValidating()); // true

builder.setErrorHandler(new DefaultHandler() {
    @Override
    public void error(SAXParseException e) {
        System.out.println("-- ErrorHandler error --");
        System.out.println(e);
    }
});

final var document = builder.parse(new ByteArrayInputStream(xml.getBytes()));

// Result
// ↓
//-- ErrorHandler error --
//org.xml.sax.SAXParseException; lineNumber: 4; columnNumber: 17;
// Element type "child-z" must be declared.
//-- ErrorHandler error --
//org.xml.sax.SAXParseException; lineNumber: 4; columnNumber: 24;
// The content of element type "root" must match "(child-a)".

boolean isXIncludeAware ()

Get the XInclude processing mode for this parser.

final var sampleFile = Path.of("R:", "java-work", "sample.xml");
System.out.println(sampleFile); // R:\java-work\sample.xml

Files.writeString(sampleFile, """
        <child>abcd</child>
        """);

final var xml = """
        <root xmlns:xi="http://www.w3.org/2001/XInclude">
            <xi:include href="file:///R:/java-work/sample.xml" parse="xml" />
        </root>
        """;

final var factory = DocumentBuilderFactory.newNSInstance();
factory.setXIncludeAware(true);

final var builder = factory.newDocumentBuilder();
System.out.println(builder.isXIncludeAware()); // true

final var document = builder.parse(new ByteArrayInputStream(xml.getBytes()));

final var child = document.getElementsByTagName("child").item(0);
System.out.println(child); // [child: null]
System.out.println(child.getTextContent()); // abcd

abstract Document newDocument ()

Obtain a new instance of a DOM Document object to build a DOM tree with.

final var factory = DocumentBuilderFactory.newInstance();
final var builder = factory.newDocumentBuilder();

final var document = builder.newDocument();

final var root = document.createElement("root");
document.appendChild(root);

final var child = document.createElement("child");
root.appendChild(child);

child.setAttribute("aa", "AA");

final var text = document.createTextNode("abcd");
child.appendChild(text);

//<root>
//    <child aa="AA">abcd</child>
//</root>
printDocument(document);

Document parse (File f)

Parse the content of the given file as an XML document and return a new DOM Document object.

final var path = Path.of("R:", "java-work", "sample.xml");
System.out.println(path); // R:\java-work\sample.xml

Files.writeString(path, """
        <root><child>abcd</child></root>
        """);

final var factory = DocumentBuilderFactory.newInstance();
final var builder = factory.newDocumentBuilder();

final var document = builder.parse(path.toFile());

//<root>
//    <child>abcd</child>
//</root>
printDocument(document);

Document parse (InputStream is)

Parse the content of the given InputStream as an XML document and return a new DOM Document object.

final var xml = """
        <root><child>abcd</child></root>
        """;

final var factory = DocumentBuilderFactory.newInstance();
final var builder = factory.newDocumentBuilder();

final var document = builder.parse(new ByteArrayInputStream(xml.getBytes()));

//<root>
//    <child>abcd</child>
//</root>
printDocument(document);

Document parse (InputStream is, String systemId)

Parse the content of the given InputStream as an XML document and return a new DOM Document object.

final var xml = """
        <root><child>abcd</child></root>
        """;

final var factory = DocumentBuilderFactory.newInstance();
final var builder = factory.newDocumentBuilder();

final var baseUri = Path.of("R:", "java-work").toUri();
System.out.println(baseUri); // file:///R:/java-work/

final var document = builder.parse(new ByteArrayInputStream(xml.getBytes()),
        baseUri.toString());
System.out.println(document.getBaseURI()); // file:///R:/java-work/

//<root>
//    <child>abcd</child>
//</root>
printDocument(document);

Document parse (String uri)

Parse the content of the given URI as an XML document and return a new DOM Document object.

final var path = Path.of("R:", "java-work", "sample.xml");
System.out.println(path); // R:\java-work\sample.xml

Files.writeString(path, """
        <root><child>abcd</child></root>
        """);

final var uri = path.toUri();
System.out.println(uri); // file:///R:/java-work/sample.xml

final var factory = DocumentBuilderFactory.newInstance();
final var builder = factory.newDocumentBuilder();

final var document = builder.parse(uri.toString());

//<root>
//    <child>abcd</child>
//</root>
printDocument(document);

abstract Document parse (InputSource is)

Parse the content of the given input source as an XML document and return a new DOM Document object.

final var xml = """
        <root><child>abcd</child></root>
        """;

final var factory = DocumentBuilderFactory.newInstance();
final var builder = factory.newDocumentBuilder();

try (final var reader = new StringReader(xml)) {
    final var is = new InputSource(reader);
    final var document = builder.parse(is);

    //<root>
    //    <child>abcd</child>
    //</root>
    printDocument(document);
}

void reset ()

Reset this DocumentBuilder to its original configuration.

final var dtdB = Path.of("R:", "java-work", "bbb.dtd");
System.out.println(dtdB); // R:\java-work\bbb.dtd

Files.writeString(dtdB, """
        <!ENTITY aaa "bbb">
        """);

final var dtdC = Path.of("R:", "java-work", "ccc.dtd");
System.out.println(dtdC); // R:\java-work\ccc.dtd

Files.writeString(dtdC, """
        <!ENTITY aaa "ccc">
        """);

final var xml = """
        <!DOCTYPE root SYSTEM "file:///R:/java-work/bbb.dtd">
        <root>&aaa;</root>
        """;

final var factory = DocumentBuilderFactory.newInstance();
final var builder = factory.newDocumentBuilder();

builder.setEntityResolver((_, systemId) -> {
    if ("file:///R:/java-work/bbb.dtd".equals(systemId)) {
        return new InputSource(dtdC.toUri().toString());
    } else {
        return null;
    }
});

final var document1 = builder.parse(new ByteArrayInputStream(xml.getBytes()));

//<!DOCTYPE root SYSTEM "file:///R:/java-work/bbb.dtd">
//<root>ccc</root>
printDocument(document1);

builder.reset();

final var document2 = builder.parse(new ByteArrayInputStream(xml.getBytes()));

//<!DOCTYPE root SYSTEM "file:///R:/java-work/bbb.dtd">
//<root>bbb</root>
printDocument(document2);

abstract void setEntityResolver (EntityResolver er)

Specify the EntityResolver to be used to resolve entities present in the XML document to be parsed.

final var xml = """
        <!DOCTYPE root SYSTEM "file:///R:/java-work/xxx.dtd">
        <root>&aaa;</root>
        """;

final var factory = DocumentBuilderFactory.newInstance();
final var builder = factory.newDocumentBuilder();

builder.setEntityResolver((_, systemId) -> {
    if ("file:///R:/java-work/xxx.dtd".equals(systemId)) {
        final var dtd = """
                <!ENTITY aaa "bbb">
                """;
        return new InputSource(new ByteArrayInputStream(dtd.getBytes()));
    } else {
        return null;
    }
});

final var document = builder.parse(new ByteArrayInputStream(xml.getBytes()));

//<!DOCTYPE root SYSTEM "file:///R:/java-work/xxx.dtd">
//<root>bbb</root>
printDocument(document);

abstract void setErrorHandler (ErrorHandler eh)

Specify the ErrorHandler to be used by the parser.

// The XML document intentionally does not match the DTD.
final var xml = """
        <!DOCTYPE root [
            <!ELEMENT root (child-a)>
        ]>
        <root><child-z/></root>
        """;

final var factory = DocumentBuilderFactory.newInstance();
factory.setValidating(true);

final var builder = factory.newDocumentBuilder();
System.out.println(builder.isValidating()); // true

builder.setErrorHandler(new DefaultHandler() {
    @Override
    public void error(SAXParseException e) {
        System.out.println("-- ErrorHandler error --");
        System.out.println(e);
    }
});

final var document = builder.parse(new ByteArrayInputStream(xml.getBytes()));

// Result
// ↓
//-- ErrorHandler error --
//org.xml.sax.SAXParseException; lineNumber: 4; columnNumber: 17;
// Element type "child-z" must be declared.
//-- ErrorHandler error --
//org.xml.sax.SAXParseException; lineNumber: 4; columnNumber: 24;
// The content of element type "root" must match "(child-a)".

Related posts

To top of page