Java : DocumentBuilder (XML) 示例

DocumentBuilder (Java SE 22 & JDK 22) 示例。
您将在大多数 DocumentBuilder 方法中找到代码示例。

注解 :

  • 本文可能使用了翻译软件以方便阅读。 另请查看英文原文

简介

定义从 XML 文档获取 DOM Document 实例的 API。使用此类,应用程序员可以从 XML 获取 Document。 (机器翻译)

Class diagram

final var xml = """
        <root>
            <child-a>AAA</child-a>
            <child-b>BBB</child-b>
        </root>
        """;

final var factory = DocumentBuilderFactory.newInstance();
final var builder = factory.newDocumentBuilder();

final var document = builder.parse(new ByteArrayInputStream(xml.getBytes()));

final var childA = document.getElementsByTagName("child-a").item(0);
System.out.println(childA); // [child-a: null]
System.out.println(childA.getTextContent()); // AAA

final var childB = document.getElementsByTagName("child-b").item(0);
System.out.println(childB); // [child-b: null]
System.out.println(childB.getTextContent()); // BBB

Code examples on this page use the printDocument method below.

public void printDocument(Document document) throws TransformerException {
    final var transformer = TransformerFactory.newInstance().newTransformer();
    transformer.setOutputProperty(OutputKeys.INDENT, "yes");
    transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");

    final var docType = document.getDoctype();
    if (docType != null) {
        final var publicId = docType.getPublicId();
        if (publicId != null) {
            transformer.setOutputProperty(OutputKeys.DOCTYPE_PUBLIC, publicId);
        }
        final var systemId = docType.getSystemId();
        if (systemId != null) {
            transformer.setOutputProperty(OutputKeys.DOCTYPE_SYSTEM, systemId);
        }
    }

    final var result = new StreamResult(new StringWriter());
    transformer.transform(new DOMSource(document), result);

    System.out.print(result.getWriter());
}
final var builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
final var document = builder.newDocument();

final var root = document.createElement("root");
System.out.println(root); // [root: null]

document.appendChild(root);

final var child = document.createElement("child");
System.out.println(child); // [child: null]

root.appendChild(child);

final var text = document.createTextNode("abcd");
System.out.println(text); // [#text: abcd]

child.appendChild(text);

//<root>
//    <child>abcd</child>
//</root>
printDocument(document);

Constructors

DocumentBuilder ()

受保护的构造函数 (机器翻译)

protected. I think it's rare to create a subclass of this class. Therefore, the code example is omitted.

Methods

abstract DOMImplementation getDOMImplementation ()

获取 DOMImplementation 对象的实例。 (机器翻译)

final var factory = DocumentBuilderFactory.newInstance();
final var builder = factory.newDocumentBuilder();

final var domImpl = builder.getDOMImplementation();
final var docType = domImpl.createDocumentType(
        "root", null, "file:///R:/java-work/sample.dtd");
System.out.println(docType); // [root: null]

final var document = domImpl.createDocument(null, "root", docType);

//<!DOCTYPE root SYSTEM "file:///R:/java-work/sample.dtd">
//<root/>
printDocument(document);

Schema getSchema ()

获取对 XML 处理器正在使用的 Schema 的引用。 (机器翻译)

final var xsd = """
        <xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
            <xsd:element name="root" type="xsd:string"/>
        </xsd:schema>
        """;

final var schemaFactory = SchemaFactory.newDefaultInstance();
final var schema = schemaFactory.newSchema(
        new StreamSource(new ByteArrayInputStream(xsd.getBytes())));

final var factory = DocumentBuilderFactory.newInstance();
factory.setSchema(schema);

final var errorHandler = new DefaultHandler() {
    @Override
    public void error(SAXParseException e) {
        System.out.println("-- ErrorHandler error --");
        System.out.println(e);
    }
};

{
    final var xml = """
            <root>abcd</root>
            """;

    final var builder = factory.newDocumentBuilder();
    System.out.println(builder.getSchema().equals(schema)); // true

    builder.setErrorHandler(errorHandler);

    final var document = builder.parse(new ByteArrayInputStream(xml.getBytes()));

    //<root>abcd</root>
    printDocument(document);
}

{
    final var xml = """
            <root><child>abcd</child></root>
            """;

    final var builder = factory.newDocumentBuilder();
    System.out.println(builder.getSchema().equals(schema)); // true

    builder.setErrorHandler(errorHandler);

    final var document = builder.parse(new ByteArrayInputStream(xml.getBytes()));

    // Result
    // ↓
    //-- ErrorHandler error --
    //org.xml.sax.SAXParseException; lineNumber: 1; columnNumber: 33; cvc-type.3.1.2:
    // Element 'root' is a simple type, so it must have no element information item [children].
}

abstract boolean isNamespaceAware ()

指示此解析器是否配置为理解命名空间。 (机器翻译)

final var xml = """
        <ns:root xmlns:ns="sample">
            <ns:child/>
        </ns:root>
        """;

final var factory = DocumentBuilderFactory.newNSInstance();

final var builder = factory.newDocumentBuilder();
System.out.println(builder.isNamespaceAware()); // true

final var document = builder.parse(new ByteArrayInputStream(xml.getBytes()));

final var child = document.getElementsByTagNameNS("sample", "child").item(0);
System.out.println(child); // [ns:child: null]

abstract boolean isValidating ()

指示此解析器是否配置为验证 XML 文档。 (机器翻译)

// The XML document intentionally does not match the DTD.
final var xml = """
        <!DOCTYPE root [
            <!ELEMENT root (child-a)>
        ]>
        <root><child-z/></root>
        """;

final var factory = DocumentBuilderFactory.newInstance();
factory.setValidating(true);

final var builder = factory.newDocumentBuilder();
System.out.println(builder.isValidating()); // true

builder.setErrorHandler(new DefaultHandler() {
    @Override
    public void error(SAXParseException e) {
        System.out.println("-- ErrorHandler error --");
        System.out.println(e);
    }
});

final var document = builder.parse(new ByteArrayInputStream(xml.getBytes()));

// Result
// ↓
//-- ErrorHandler error --
//org.xml.sax.SAXParseException; lineNumber: 4; columnNumber: 17;
// Element type "child-z" must be declared.
//-- ErrorHandler error --
//org.xml.sax.SAXParseException; lineNumber: 4; columnNumber: 24;
// The content of element type "root" must match "(child-a)".

boolean isXIncludeAware ()

获取此解析器的 XInclude 处理模式。 (机器翻译)

final var sampleFile = Path.of("R:", "java-work", "sample.xml");
System.out.println(sampleFile); // R:\java-work\sample.xml

Files.writeString(sampleFile, """
        <child>abcd</child>
        """);

final var xml = """
        <root xmlns:xi="http://www.w3.org/2001/XInclude">
            <xi:include href="file:///R:/java-work/sample.xml" parse="xml" />
        </root>
        """;

final var factory = DocumentBuilderFactory.newNSInstance();
factory.setXIncludeAware(true);

final var builder = factory.newDocumentBuilder();
System.out.println(builder.isXIncludeAware()); // true

final var document = builder.parse(new ByteArrayInputStream(xml.getBytes()));

final var child = document.getElementsByTagName("child").item(0);
System.out.println(child); // [child: null]
System.out.println(child.getTextContent()); // abcd

abstract Document newDocument ()

获取 DOM Document 对象的新实例,用来构建 DOM 树。 (机器翻译)

final var factory = DocumentBuilderFactory.newInstance();
final var builder = factory.newDocumentBuilder();

final var document = builder.newDocument();

final var root = document.createElement("root");
document.appendChild(root);

final var child = document.createElement("child");
root.appendChild(child);

child.setAttribute("aa", "AA");

final var text = document.createTextNode("abcd");
child.appendChild(text);

//<root>
//    <child aa="AA">abcd</child>
//</root>
printDocument(document);

Document parse (File f)

将给定文件的内容解析为 XML 文档并返回一个新的 DOM 文档对象。 (机器翻译)

final var path = Path.of("R:", "java-work", "sample.xml");
System.out.println(path); // R:\java-work\sample.xml

Files.writeString(path, """
        <root><child>abcd</child></root>
        """);

final var factory = DocumentBuilderFactory.newInstance();
final var builder = factory.newDocumentBuilder();

final var document = builder.parse(path.toFile());

//<root>
//    <child>abcd</child>
//</root>
printDocument(document);

Document parse (InputStream is)

将给定 InputStream 的内容解析为 XML 文档并返回一个新的 DOM Document 对象。 (机器翻译)

final var xml = """
        <root><child>abcd</child></root>
        """;

final var factory = DocumentBuilderFactory.newInstance();
final var builder = factory.newDocumentBuilder();

final var document = builder.parse(new ByteArrayInputStream(xml.getBytes()));

//<root>
//    <child>abcd</child>
//</root>
printDocument(document);

Document parse (InputStream is, String systemId)

将给定 InputStream 的内容解析为 XML 文档并返回一个新的 DOM Document 对象。 (机器翻译)

final var xml = """
        <root><child>abcd</child></root>
        """;

final var factory = DocumentBuilderFactory.newInstance();
final var builder = factory.newDocumentBuilder();

final var baseUri = Path.of("R:", "java-work").toUri();
System.out.println(baseUri); // file:///R:/java-work/

final var document = builder.parse(new ByteArrayInputStream(xml.getBytes()),
        baseUri.toString());
System.out.println(document.getBaseURI()); // file:///R:/java-work/

//<root>
//    <child>abcd</child>
//</root>
printDocument(document);

Document parse (String uri)

将给定 URI 的内容解析为 XML 文档并返回一个新的 DOM 文档对象。 (机器翻译)

final var path = Path.of("R:", "java-work", "sample.xml");
System.out.println(path); // R:\java-work\sample.xml

Files.writeString(path, """
        <root><child>abcd</child></root>
        """);

final var uri = path.toUri();
System.out.println(uri); // file:///R:/java-work/sample.xml

final var factory = DocumentBuilderFactory.newInstance();
final var builder = factory.newDocumentBuilder();

final var document = builder.parse(uri.toString());

//<root>
//    <child>abcd</child>
//</root>
printDocument(document);

abstract Document parse (InputSource is)

将给定输入源的内容解析为 XML 文档并返回一个新的 DOM 文档对象。 (机器翻译)

final var xml = """
        <root><child>abcd</child></root>
        """;

final var factory = DocumentBuilderFactory.newInstance();
final var builder = factory.newDocumentBuilder();

try (final var reader = new StringReader(xml)) {
    final var is = new InputSource(reader);
    final var document = builder.parse(is);

    //<root>
    //    <child>abcd</child>
    //</root>
    printDocument(document);
}

void reset ()

将此 DocumentBuilder 重置为其原始配置。 (机器翻译)

final var dtdB = Path.of("R:", "java-work", "bbb.dtd");
System.out.println(dtdB); // R:\java-work\bbb.dtd

Files.writeString(dtdB, """
        <!ENTITY aaa "bbb">
        """);

final var dtdC = Path.of("R:", "java-work", "ccc.dtd");
System.out.println(dtdC); // R:\java-work\ccc.dtd

Files.writeString(dtdC, """
        <!ENTITY aaa "ccc">
        """);

final var xml = """
        <!DOCTYPE root SYSTEM "file:///R:/java-work/bbb.dtd">
        <root>&aaa;</root>
        """;

final var factory = DocumentBuilderFactory.newInstance();
final var builder = factory.newDocumentBuilder();

builder.setEntityResolver((_, systemId) -> {
    if ("file:///R:/java-work/bbb.dtd".equals(systemId)) {
        return new InputSource(dtdC.toUri().toString());
    } else {
        return null;
    }
});

final var document1 = builder.parse(new ByteArrayInputStream(xml.getBytes()));

//<!DOCTYPE root SYSTEM "file:///R:/java-work/bbb.dtd">
//<root>ccc</root>
printDocument(document1);

builder.reset();

final var document2 = builder.parse(new ByteArrayInputStream(xml.getBytes()));

//<!DOCTYPE root SYSTEM "file:///R:/java-work/bbb.dtd">
//<root>bbb</root>
printDocument(document2);

abstract void setEntityResolver (EntityResolver er)

指定用于解析 XML 文档中存在的实体的 EntityResolver。 (机器翻译)

final var xml = """
        <!DOCTYPE root SYSTEM "file:///R:/java-work/xxx.dtd">
        <root>&aaa;</root>
        """;

final var factory = DocumentBuilderFactory.newInstance();
final var builder = factory.newDocumentBuilder();

builder.setEntityResolver((_, systemId) -> {
    if ("file:///R:/java-work/xxx.dtd".equals(systemId)) {
        final var dtd = """
                <!ENTITY aaa "bbb">
                """;
        return new InputSource(new ByteArrayInputStream(dtd.getBytes()));
    } else {
        return null;
    }
});

final var document = builder.parse(new ByteArrayInputStream(xml.getBytes()));

//<!DOCTYPE root SYSTEM "file:///R:/java-work/xxx.dtd">
//<root>bbb</root>
printDocument(document);

abstract void setErrorHandler (ErrorHandler eh)

指定解析器要使用的 ErrorHandler。 (机器翻译)

// The XML document intentionally does not match the DTD.
final var xml = """
        <!DOCTYPE root [
            <!ELEMENT root (child-a)>
        ]>
        <root><child-z/></root>
        """;

final var factory = DocumentBuilderFactory.newInstance();
factory.setValidating(true);

final var builder = factory.newDocumentBuilder();
System.out.println(builder.isValidating()); // true

builder.setErrorHandler(new DefaultHandler() {
    @Override
    public void error(SAXParseException e) {
        System.out.println("-- ErrorHandler error --");
        System.out.println(e);
    }
});

final var document = builder.parse(new ByteArrayInputStream(xml.getBytes()));

// Result
// ↓
//-- ErrorHandler error --
//org.xml.sax.SAXParseException; lineNumber: 4; columnNumber: 17;
// Element type "child-z" must be declared.
//-- ErrorHandler error --
//org.xml.sax.SAXParseException; lineNumber: 4; columnNumber: 24;
// The content of element type "root" must match "(child-a)".

相关文章

To top of page