Java : DocumentBuilder (XML) - API使用例
DocumentBuilder (Java SE 22 & JDK 22) の使い方まとめです。
ほとんどのメソッドにサンプルコードがあります。
API仕様書のおともにどうぞ。
概要
DocumentBuilderクラスは、XML形式の文字列やファイルから Documentクラスを生成します。
DocumentBuilderそのものを生成するには DocumentBuilderFactory を使います。
関連記事 : XML (DOM) の基本操作
final var xml = """
<root>
<child-a>AAA</child-a>
<child-b>BBB</child-b>
</root>
""";
final var factory = DocumentBuilderFactory.newInstance();
final var builder = factory.newDocumentBuilder();
final var document = builder.parse(new ByteArrayInputStream(xml.getBytes()));
final var childA = document.getElementsByTagName("child-a").item(0);
System.out.println(childA); // [child-a: null]
System.out.println(childA.getTextContent()); // AAA
final var childB = document.getElementsByTagName("child-b").item(0);
System.out.println(childB); // [child-b: null]
System.out.println(childB.getTextContent()); // BBB
本記事のコード例では、利便性のために以下の printDocument メソッドを使います。
public void printDocument(Document document) throws TransformerException {
final var transformer = TransformerFactory.newInstance().newTransformer();
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
final var docType = document.getDoctype();
if (docType != null) {
final var publicId = docType.getPublicId();
if (publicId != null) {
transformer.setOutputProperty(OutputKeys.DOCTYPE_PUBLIC, publicId);
}
final var systemId = docType.getSystemId();
if (systemId != null) {
transformer.setOutputProperty(OutputKeys.DOCTYPE_SYSTEM, systemId);
}
}
final var result = new StreamResult(new StringWriter());
transformer.transform(new DOMSource(document), result);
System.out.print(result.getWriter());
}
final var builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
final var document = builder.newDocument();
final var root = document.createElement("root");
System.out.println(root); // [root: null]
document.appendChild(root);
final var child = document.createElement("child");
System.out.println(child); // [child: null]
root.appendChild(child);
final var text = document.createTextNode("abcd");
System.out.println(text); // [#text: abcd]
child.appendChild(text);
//<root>
// <child>abcd</child>
//</root>
printDocument(document);
コンストラクタ
DocumentBuilder ()
protectedです。
独自にサブクラスを作ることは少ないと思いますので、コード例は割愛します。
メソッド
abstract DOMImplementation getDOMImplementation ()
final var factory = DocumentBuilderFactory.newInstance();
final var builder = factory.newDocumentBuilder();
final var domImpl = builder.getDOMImplementation();
final var docType = domImpl.createDocumentType(
"root", null, "file:///R:/java-work/sample.dtd");
System.out.println(docType); // [root: null]
final var document = domImpl.createDocument(null, "root", docType);
//<!DOCTYPE root SYSTEM "file:///R:/java-work/sample.dtd">
//<root/>
printDocument(document);
Schema getSchema ()
final var xsd = """
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
<xsd:element name="root" type="xsd:string"/>
</xsd:schema>
""";
final var schemaFactory = SchemaFactory.newDefaultInstance();
final var schema = schemaFactory.newSchema(
new StreamSource(new ByteArrayInputStream(xsd.getBytes())));
final var factory = DocumentBuilderFactory.newInstance();
factory.setSchema(schema);
final var errorHandler = new DefaultHandler() {
@Override
public void error(SAXParseException e) {
System.out.println("-- ErrorHandler error --");
System.out.println(e);
}
};
{
// スキーマで指定した文書構造と一致する例
final var xml = """
<root>abcd</root>
""";
final var builder = factory.newDocumentBuilder();
System.out.println(builder.getSchema().equals(schema)); // true
builder.setErrorHandler(errorHandler);
final var document = builder.parse(new ByteArrayInputStream(xml.getBytes()));
//<root>abcd</root>
printDocument(document);
}
{
// スキーマで指定した文書構造と一致しない例
final var xml = """
<root><child>abcd</child></root>
""";
final var builder = factory.newDocumentBuilder();
System.out.println(builder.getSchema().equals(schema)); // true
builder.setErrorHandler(errorHandler);
final var document = builder.parse(new ByteArrayInputStream(xml.getBytes()));
// 結果
// ↓
//-- ErrorHandler error --
//org.xml.sax.SAXParseException; lineNumber: 1; columnNumber: 33; cvc-type.3.1.2:
// 要素'root'は単純型であるため、要素情報アイテム[children]を含めることはできません。
}
abstract boolean isNamespaceAware ()
final var xml = """
<ns:root xmlns:ns="sample">
<ns:child/>
</ns:root>
""";
final var factory = DocumentBuilderFactory.newNSInstance();
final var builder = factory.newDocumentBuilder();
System.out.println(builder.isNamespaceAware()); // true
final var document = builder.parse(new ByteArrayInputStream(xml.getBytes()));
final var child = document.getElementsByTagNameNS("sample", "child").item(0);
System.out.println(child); // [ns:child: null]
abstract boolean isValidating ()
// 意図的に文書構造(DTD) と一致しないXMLにしています。
final var xml = """
<!DOCTYPE root [
<!ELEMENT root (child-a)>
]>
<root><child-z/></root>
""";
final var factory = DocumentBuilderFactory.newInstance();
factory.setValidating(true);
final var builder = factory.newDocumentBuilder();
System.out.println(builder.isValidating()); // true
builder.setErrorHandler(new DefaultHandler() {
@Override
public void error(SAXParseException e) {
System.out.println("-- ErrorHandler error --");
System.out.println(e);
}
});
final var document = builder.parse(new ByteArrayInputStream(xml.getBytes()));
// 結果
// ↓
//-- ErrorHandler error --
//org.xml.sax.SAXParseException; lineNumber: 4; columnNumber: 17;
// 要素タイプ"child-z"を宣言する必要があります。
//-- ErrorHandler error --
//org.xml.sax.SAXParseException; lineNumber: 4; columnNumber: 24;
// 要素タイプ"root"のコンテンツは"(child-a)"と一致する必要があります。
boolean isXIncludeAware ()
final var sampleFile = Path.of("R:", "java-work", "sample.xml");
System.out.println(sampleFile); // R:\java-work\sample.xml
Files.writeString(sampleFile, """
<child>abcd</child>
""");
final var xml = """
<root xmlns:xi="http://www.w3.org/2001/XInclude">
<xi:include href="file:///R:/java-work/sample.xml" parse="xml" />
</root>
""";
final var factory = DocumentBuilderFactory.newNSInstance();
factory.setXIncludeAware(true);
final var builder = factory.newDocumentBuilder();
System.out.println(builder.isXIncludeAware()); // true
final var document = builder.parse(new ByteArrayInputStream(xml.getBytes()));
final var child = document.getElementsByTagName("child").item(0);
System.out.println(child); // [child: null]
System.out.println(child.getTextContent()); // abcd
abstract Document newDocument ()
final var factory = DocumentBuilderFactory.newInstance();
final var builder = factory.newDocumentBuilder();
final var document = builder.newDocument();
final var root = document.createElement("root");
document.appendChild(root);
final var child = document.createElement("child");
root.appendChild(child);
child.setAttribute("aa", "AA");
final var text = document.createTextNode("abcd");
child.appendChild(text);
//<root>
// <child aa="AA">abcd</child>
//</root>
printDocument(document);
Document parse (File f)
final var path = Path.of("R:", "java-work", "sample.xml");
System.out.println(path); // R:\java-work\sample.xml
Files.writeString(path, """
<root><child>abcd</child></root>
""");
final var factory = DocumentBuilderFactory.newInstance();
final var builder = factory.newDocumentBuilder();
final var document = builder.parse(path.toFile());
//<root>
// <child>abcd</child>
//</root>
printDocument(document);
Document parse (InputStream is)
final var xml = """
<root><child>abcd</child></root>
""";
final var factory = DocumentBuilderFactory.newInstance();
final var builder = factory.newDocumentBuilder();
final var document = builder.parse(new ByteArrayInputStream(xml.getBytes()));
//<root>
// <child>abcd</child>
//</root>
printDocument(document);
Document parse (InputStream is, String systemId)
final var xml = """
<root><child>abcd</child></root>
""";
final var factory = DocumentBuilderFactory.newInstance();
final var builder = factory.newDocumentBuilder();
final var baseUri = Path.of("R:", "java-work").toUri();
System.out.println(baseUri); // file:///R:/java-work/
final var document = builder.parse(new ByteArrayInputStream(xml.getBytes()),
baseUri.toString());
System.out.println(document.getBaseURI()); // file:///R:/java-work/
//<root>
// <child>abcd</child>
//</root>
printDocument(document);
Document parse (String uri)
final var path = Path.of("R:", "java-work", "sample.xml");
System.out.println(path); // R:\java-work\sample.xml
Files.writeString(path, """
<root><child>abcd</child></root>
""");
final var uri = path.toUri();
System.out.println(uri); // file:///R:/java-work/sample.xml
final var factory = DocumentBuilderFactory.newInstance();
final var builder = factory.newDocumentBuilder();
final var document = builder.parse(uri.toString());
//<root>
// <child>abcd</child>
//</root>
printDocument(document);
abstract Document parse (InputSource is)
final var xml = """
<root><child>abcd</child></root>
""";
final var factory = DocumentBuilderFactory.newInstance();
final var builder = factory.newDocumentBuilder();
try (final var reader = new StringReader(xml)) {
final var is = new InputSource(reader);
final var document = builder.parse(is);
//<root>
// <child>abcd</child>
//</root>
printDocument(document);
}
void reset ()
final var dtdB = Path.of("R:", "java-work", "bbb.dtd");
System.out.println(dtdB); // R:\java-work\bbb.dtd
Files.writeString(dtdB, """
<!ENTITY aaa "bbb">
""");
final var dtdC = Path.of("R:", "java-work", "ccc.dtd");
System.out.println(dtdC); // R:\java-work\ccc.dtd
Files.writeString(dtdC, """
<!ENTITY aaa "ccc">
""");
final var xml = """
<!DOCTYPE root SYSTEM "file:///R:/java-work/bbb.dtd">
<root>&aaa;</root>
""";
final var factory = DocumentBuilderFactory.newInstance();
final var builder = factory.newDocumentBuilder();
builder.setEntityResolver((_, systemId) -> {
if ("file:///R:/java-work/bbb.dtd".equals(systemId)) {
return new InputSource(dtdC.toUri().toString());
} else {
return null;
}
});
final var document1 = builder.parse(new ByteArrayInputStream(xml.getBytes()));
//<!DOCTYPE root SYSTEM "file:///R:/java-work/bbb.dtd">
//<root>ccc</root>
printDocument(document1);
builder.reset();
final var document2 = builder.parse(new ByteArrayInputStream(xml.getBytes()));
//<!DOCTYPE root SYSTEM "file:///R:/java-work/bbb.dtd">
//<root>bbb</root>
printDocument(document2);
abstract void setEntityResolver (EntityResolver er)
final var xml = """
<!DOCTYPE root SYSTEM "file:///R:/java-work/xxx.dtd">
<root>&aaa;</root>
""";
final var factory = DocumentBuilderFactory.newInstance();
final var builder = factory.newDocumentBuilder();
builder.setEntityResolver((_, systemId) -> {
if ("file:///R:/java-work/xxx.dtd".equals(systemId)) {
final var dtd = """
<!ENTITY aaa "bbb">
""";
return new InputSource(new ByteArrayInputStream(dtd.getBytes()));
} else {
return null;
}
});
final var document = builder.parse(new ByteArrayInputStream(xml.getBytes()));
//<!DOCTYPE root SYSTEM "file:///R:/java-work/xxx.dtd">
//<root>bbb</root>
printDocument(document);
abstract void setErrorHandler (ErrorHandler eh)
// 意図的に文書構造(DTD) と一致しないXMLにしています。
final var xml = """
<!DOCTYPE root [
<!ELEMENT root (child-a)>
]>
<root><child-z/></root>
""";
final var factory = DocumentBuilderFactory.newInstance();
factory.setValidating(true);
final var builder = factory.newDocumentBuilder();
System.out.println(builder.isValidating()); // true
builder.setErrorHandler(new DefaultHandler() {
@Override
public void error(SAXParseException e) {
System.out.println("-- ErrorHandler error --");
System.out.println(e);
}
});
final var document = builder.parse(new ByteArrayInputStream(xml.getBytes()));
// 結果
// ↓
//-- ErrorHandler error --
//org.xml.sax.SAXParseException; lineNumber: 4; columnNumber: 17;
// 要素タイプ"child-z"を宣言する必要があります。
//-- ErrorHandler error --
//org.xml.sax.SAXParseException; lineNumber: 4; columnNumber: 24;
// 要素タイプ"root"のコンテンツは"(child-a)"と一致する必要があります。