From 13bf5b5e7e2c8697a3b07d3449b35511d0661e19 Mon Sep 17 00:00:00 2001 From: Michael Williamson Date: Mon, 30 Dec 2024 10:27:22 +0000 Subject: [PATCH] Convert SDT checkboxes to checkbox inputs --- NEWS | 5 +++ .../mammoth/internal/docx/OfficeXml.java | 5 +++ .../internal/docx/StatefulBodyXmlReader.java | 23 ++++++++-- .../mammoth/tests/docx/BodyXmlTests.java | 43 +++++++++++++++++++ 4 files changed, 72 insertions(+), 4 deletions(-) diff --git a/NEWS b/NEWS index 93ff42d..bced145 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,8 @@ +# 1.9.0 + +* Detect checkboxes, both as complex fields and structured document tags, and + convert them to checkbox inputs. + # 1.8.0 * Add style mapping for highlights. diff --git a/src/main/java/org/zwobble/mammoth/internal/docx/OfficeXml.java b/src/main/java/org/zwobble/mammoth/internal/docx/OfficeXml.java index d4dc5f6..8582237 100644 --- a/src/main/java/org/zwobble/mammoth/internal/docx/OfficeXml.java +++ b/src/main/java/org/zwobble/mammoth/internal/docx/OfficeXml.java @@ -31,6 +31,11 @@ public class OfficeXml { .put("mc", "http://schemas.openxmlformats.org/markup-compatibility/2006") .put("v", "urn:schemas-microsoft-com:vml") .put("office-word", "urn:schemas-microsoft-com:office:word") + + // [MS-DOCX]: Word Extensions to the Office Open XML (.docx) File Format + // https://learn.microsoft.com/en-us/openspecs/office_standards/ms-docx/b839fe1f-e1ca-4fa6-8c26-5954d0abbccd + .put("wordml", "http://schemas.microsoft.com/office/word/2010/wordml") + .build(); public static XmlElement parseXml(InputStream inputStream) { diff --git a/src/main/java/org/zwobble/mammoth/internal/docx/StatefulBodyXmlReader.java b/src/main/java/org/zwobble/mammoth/internal/docx/StatefulBodyXmlReader.java index cdbc96a..4962400 100644 --- a/src/main/java/org/zwobble/mammoth/internal/docx/StatefulBodyXmlReader.java +++ b/src/main/java/org/zwobble/mammoth/internal/docx/StatefulBodyXmlReader.java @@ -257,12 +257,16 @@ private boolean isSmallCaps(XmlElementLike properties) { private boolean readBooleanElement(XmlElementLike properties, String tagName) { return properties.findChild(tagName) - .map(child -> child.getAttributeOrNone("w:val") - .map(value -> !value.equals("false") && !value.equals("0")) - .orElse(true)) + .map(child -> readBooleanAttributeValue(child.getAttributeOrNone("w:val"))) .orElse(false); } + private boolean readBooleanAttributeValue(Optional valAttributeValue) { + return valAttributeValue + .map(value -> !value.equals("false") && !value.equals("0")) + .orElse(true); + } + private VerticalAlignment readVerticalAlignment(XmlElementLike properties) { String verticalAlignment = readVal(properties, "w:vertAlign").orElse(""); switch (verticalAlignment) { @@ -697,7 +701,18 @@ private ReadResult readImage(String imagePath, Optional altText, InputSt } private ReadResult readSdt(XmlElement element) { - return readElements(element.findChildOrEmpty("w:sdtContent").getChildren()); + Optional checkbox = element + .findChildOrEmpty("w:sdtPr") + .findChild("wordml:checkbox"); + + if (checkbox.isPresent()) { + Optional checkedElement = checkbox.get().findChild("wordml:checked"); + boolean isChecked = checkedElement.isPresent() && + readBooleanAttributeValue(checkedElement.get().getAttributeOrNone("wordml:val")); + return success(new Checkbox(isChecked)); + } else { + return readElements(element.findChildOrEmpty("w:sdtContent").getChildren()); + } } private String relationshipIdToDocxPath(String relationshipId) { diff --git a/src/test/java/org/zwobble/mammoth/tests/docx/BodyXmlTests.java b/src/test/java/org/zwobble/mammoth/tests/docx/BodyXmlTests.java index 30796ef..7ab1ec0 100644 --- a/src/test/java/org/zwobble/mammoth/tests/docx/BodyXmlTests.java +++ b/src/test/java/org/zwobble/mammoth/tests/docx/BodyXmlTests.java @@ -722,6 +722,49 @@ public void complexFieldCheckboxWithDefault0AndChecked1IsUnchecked() { ))); } + @Test + public void structuredDocumentTagCheckboxWithoutCheckedIsNotChecked() { + XmlElement element = element("w:sdt", list( + element("w:sdtPr", list( + element("wordml:checkbox") + )) + )); + + DocumentElement result = readSuccess(bodyReader(), element); + + assertThat(result, isCheckbox(false)); + } + + @Test + public void structuredDocumentTagCheckboxWithChecked0IsNotChecked() { + XmlElement element = element("w:sdt", list( + element("w:sdtPr", list( + element("wordml:checkbox", list( + element("wordml:checked", map("wordml:val", "0")) + )) + )) + )); + + DocumentElement result = readSuccess(bodyReader(), element); + + assertThat(result, isCheckbox(false)); + } + + @Test + public void structuredDocumentTagCheckboxWithChecked1IsChecked() { + XmlElement element = element("w:sdt", list( + element("w:sdtPr", list( + element("wordml:checkbox", list( + element("wordml:checked", map("wordml:val", "1")) + )) + )) + )); + + DocumentElement result = readSuccess(bodyReader(), element); + + assertThat(result, isCheckbox(true)); + } + private XmlElement complexFieldCheckboxParagraph(List ffDataChildren) { return element("w:p", list( element("w:r", list(