Skip to content

Commit

Permalink
FIX: Document info date parsing
Browse files Browse the repository at this point in the history
- fixes for date handling in the document info dictionary:
  - factored out getting the String literal from `addDateProperty` to `getDateLiteral` method, to simplify the methods;
  - no longer throwing a `PdfInvalidException` from `addDateProperty` so that bad dates don't halt docinfo processing, i.e. `ModDate` is still validated if `CreationDate` is invalid;
- fixes to `jhove-bbt/scripts/create-1.33-target.sh' to adjust output affected by the above; and
- bumped PDF module -> 1.12.7 and date.
  • Loading branch information
carlwilson committed Jan 7, 2025
1 parent 8ee2c10 commit a0d2e0d
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 20 deletions.
12 changes: 12 additions & 0 deletions jhove-bbt/scripts/create-1.33-target.sh
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,15 @@ echo "TEST BASELINE: Creating baseline"
echo " - copying ${baselineRoot} baseline to ${targetRoot}"
cp -R "${baselineRoot}" "${targetRoot}"


# Update release details for PDF module
find "${targetRoot}" -type f -name "*.pdf.jhove.xml" -exec sed -i 's/<reportingModule release="1.12.7" date="2024-08-22">PDF-hul<\/reportingModule>/<reportingModule release="1.12.8" date="2025-01-24">PDF-hul<\/reportingModule>/' {} \;
find "${targetRoot}" -type f -name "audit.jhove.xml" -exec sed -i 's/<module release="1.12.7">PDF-hul<\/module>/<module release="1.12.8">PDF-hul<\/module>/' {} \;
find "${targetRoot}" -type f -name "audit-PDF-hul.jhove.xml" -exec sed -i 's/<release>1.12.7<\/release>/<release>1.12.8<\/release>/' {} \;
find "${targetRoot}" -type f -name "audit-PDF-hul.jhove.xml" -exec sed -i 's/2024-08-22/2025-01-24/' {} \;

# Fix the results affected by the improvements to date handling in the PDF module
sed -i 's/<message offset/<message subMessage="For date property CreationDate" offset/' "${targetRoot}/examples/modules/PDF-hul/AA_Banner.pdf.jhove.xml"
if [[ -f "${candidateRoot}/errors/modules/PDF-hul/pdf-hul-9-govdocs-065694.pdf.jhove.xml" ]]; then
cp "${candidateRoot}/errors/modules/PDF-hul/pdf-hul-9-govdocs-065694.pdf.jhove.xml" "${targetRoot}/errors/modules/PDF-hul/pdf-hul-9-govdocs-065694.pdf.jhove.xml"
fi
2 changes: 1 addition & 1 deletion jhove-installer/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
<html.hul.version>1.4.4</html.hul.version>
<jpeg2000.hul.version>1.4.4</jpeg2000.hul.version>
<jpeg.hul.version>1.5.4</jpeg.hul.version>
<pdf.hul.version>1.12.7</pdf.hul.version>
<pdf.hul.version>1.12.8</pdf.hul.version>
<tiff.hul.version>1.9.5</tiff.hul.version>
<utf8.hul.version>1.7.4</utf8.hul.version>
<wave.hul.version>1.8.3</wave.hul.version>
Expand Down
2 changes: 1 addition & 1 deletion jhove-modules/pdf-hul/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
<version>1.33.0-SNAPSHOT</version>
</parent>
<artifactId>pdf-hul</artifactId>
<version>1.12.7</version>
<version>1.12.8</version>
<name>JHOVE PDF Module HUL</name>
<description>PDF module developed by Harvard University Library</description>
</project>
Original file line number Diff line number Diff line change
Expand Up @@ -380,8 +380,8 @@ public class PdfModule extends ModuleBase {
******************************************************************/

private static final String NAME = "PDF-hul";
private static final String RELEASE = "1.12.7";
private static final int[] DATE = { 2024, 8, 22 };
private static final String RELEASE = "1.12.8";
private static final int[] DATE = { 2025, 1, 24 };
private static final String[] FORMAT = { "PDF",
"Portable Document Format" };
private static final String COVERAGE = "PDF 1.0-1.6; "
Expand Down Expand Up @@ -2031,9 +2031,9 @@ protected boolean readDocInfoDict(RepInfo info) {
// CreationDate requires string-to-date conversion
// ModDate does too
addDateProperty(_docInfoDict, _docInfoList, DICT_KEY_CREATION_DATE,
PROP_NAME_CREATION_DATE);
PROP_NAME_CREATION_DATE, info);
addDateProperty(_docInfoDict, _docInfoList, DICT_KEY_MODIFIED_DATE,
PROP_NAME_MODIFIED_DATE);
PROP_NAME_MODIFIED_DATE, info);
addStringProperty(_docInfoDict, _docInfoList, DICT_KEY_TRAPPED,
PROP_NAME_TRAPPED);
} catch (PdfException e) {
Expand Down Expand Up @@ -4333,27 +4333,42 @@ protected void addStringProperty(PdfDictionary dict,
* with a string value, to a specified List.
*/
protected void addDateProperty(PdfDictionary dict, List<Property> propList,
String key, String propName) throws PdfInvalidException {
String key, String propName, final RepInfo info) {
if (_encrypted) {
String propText = ENCRYPTED;
propList.add(new Property(propName, PropertyType.STRING, propText));
} else {
PdfObject propObject = dict.get(key);
if (propObject instanceof PdfSimpleObject) {
Token tok = ((PdfSimpleObject) propObject).getToken();
if (tok instanceof Literal) {
Literal lit = (Literal) tok;
if (!lit.getValue().isEmpty()) {
Date propDate = lit.parseDate();
if (propDate != null) {
propList.add(new Property(propName, PropertyType.DATE, propDate));
}
}
return;
}
PdfObject propObject = dict.get(key);
if (propObject == null) {
return;
}
Literal lit = getDateLiteral(propObject);
if (lit != null && !lit.getValue().isEmpty()) {
try {
Date propDate = lit.parseDate();
if (propDate != null) {
propList.add(new Property(propName, PropertyType.DATE, propDate));
return;
}
} catch (PdfInvalidException e) {
info.setValid(false);
info.setMessage(new ErrorMessage(JhoveMessages.getMessageInstance(
MessageConstants.PDF_HUL_133.getId(), MessageConstants.PDF_HUL_133.getMessage(), "For date property " + propName), _parser.getOffset()));
}
}
}

private Literal getDateLiteral(final PdfObject obj) {
if (obj instanceof PdfSimpleObject) {
Token tok = ((PdfSimpleObject) obj).getToken();
if (tok instanceof Literal) {
return (Literal) tok;
}
}
return null;
}

/*
* General function for adding a property with a 32-bit
* value, with an array of Strings to interpret
Expand Down
2 changes: 1 addition & 1 deletion jhove-modules/utf8-hul/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
<dependency>
<groupId>org.openpreservation.jhove.modules</groupId>
<artifactId>pdf-hul</artifactId>
<version>1.12.7</version>
<version>1.12.8</version>
<scope>test</scope>
</dependency>
</dependencies>
Expand Down

0 comments on commit a0d2e0d

Please sign in to comment.