Skip to content

Commit 586e361

Browse files
committed
TIKA-4444: get subject from xmp + add tests
1 parent 77b1d1e commit 586e361

File tree

2 files changed

+10
-0
lines changed

2 files changed

+10
-0
lines changed

tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDMetadataExtractor.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@ private static void extractDublin(XMPMetadata xmp, Metadata metadata) {
124124
extractDublinCoreListItems(metadata, TikaCoreProperties.PUBLISHER, dcSchema);
125125
extractDublinCoreListItems(metadata, TikaCoreProperties.RELATION, dcSchema);
126126
extractDublinCoreSimpleItem(metadata, TikaCoreProperties.SOURCE, dcSchema);
127+
extractDublinCoreListItems(metadata, TikaCoreProperties.SUBJECT, dcSchema);
127128
}
128129
}
129130

tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/CustomTikaXMPTest.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,9 @@ public void testPDFX() throws Exception {
5959
public void testPDFUA() throws Exception {
6060
Metadata metadata = extract("testPDFUA.xmp");
6161
assertEquals(1, metadata.getInt(PDF.PDFUAID_PART));
62+
String[] subjects = metadata.getValues(TikaCoreProperties.SUBJECT);
63+
assertEquals("keywords", subjects[0]);
64+
assertEquals("subject", subjects[1]);
6265
}
6366

6467
@Test
@@ -83,6 +86,12 @@ public void testDublinCore() throws Exception {
8386
assertEquals("International Union of Thinkology", metadata.get(TikaCoreProperties.PUBLISHER));
8487
assertEquals("Relation", metadata.get(TikaCoreProperties.RELATION));
8588
assertEquals("Journal of Thinkology", metadata.get(TikaCoreProperties.SOURCE));
89+
String[] subjects = metadata.getValues(TikaCoreProperties.SUBJECT);
90+
assertEquals("THOUGHTS", subjects[0]);
91+
assertEquals("HAPPINESS", subjects[1]);
92+
assertEquals("FEAR", subjects[2]);
93+
assertEquals("ANGER", subjects[3]);
94+
assertEquals("DESPAIR", subjects[4]);
8695
}
8796

8897
private Metadata extract(String xmpFileName) throws IOException, TikaException, SAXException {

0 commit comments

Comments
 (0)