From 94dfab535d29f0bdff4c0e33259ee4c5ff3949c9 Mon Sep 17 00:00:00 2001 From: Matthias Valvekens Date: Wed, 14 Sep 2022 11:20:33 +0200 Subject: [PATCH] Correct XMP URI serialisation in PDF/UA ext schema URIs and text follow different conventions in the XMP data model. While many parsers tolerate URIs encoded as text, it's technically not allowed. --- .../java/com/itextpdf/pdfa/PdfAXMPUtil.java | 2 +- .../java/com/itextpdf/pdfa/PdfAXmpTest.java | 61 ++++++++++++++++++ .../cmp_testPdfUAExtensionMetadata.pdf | Bin 0 -> 8487 bytes 3 files changed, 62 insertions(+), 1 deletion(-) create mode 100644 pdfa/src/test/resources/com/itextpdf/pdfa/cmp/PdfAXmpTest/cmp_testPdfUAExtensionMetadata.pdf diff --git a/pdfa/src/main/java/com/itextpdf/pdfa/PdfAXMPUtil.java b/pdfa/src/main/java/com/itextpdf/pdfa/PdfAXMPUtil.java index 5ddf056ca8..3a65059008 100644 --- a/pdfa/src/main/java/com/itextpdf/pdfa/PdfAXMPUtil.java +++ b/pdfa/src/main/java/com/itextpdf/pdfa/PdfAXMPUtil.java @@ -54,7 +54,7 @@ public class PdfAXMPUtil { " \n" + " \n" + " \n" + - " http://www.aiim.org/pdfua/ns/id/\n" + + " \n" + " pdfuaid\n" + " PDF/UA identification schema\n" + " \n" + diff --git a/pdfa/src/test/java/com/itextpdf/pdfa/PdfAXmpTest.java b/pdfa/src/test/java/com/itextpdf/pdfa/PdfAXmpTest.java index 333b1427d0..1a423957e3 100644 --- a/pdfa/src/test/java/com/itextpdf/pdfa/PdfAXmpTest.java +++ b/pdfa/src/test/java/com/itextpdf/pdfa/PdfAXmpTest.java @@ -42,11 +42,15 @@ This file is part of the iText (R) project. */ package com.itextpdf.pdfa; +import com.itextpdf.io.source.ByteArrayOutputStream; import com.itextpdf.kernel.pdf.PdfAConformanceLevel; import com.itextpdf.kernel.pdf.PdfDocument; import com.itextpdf.kernel.pdf.PdfOutputIntent; import com.itextpdf.kernel.pdf.PdfReader; +import com.itextpdf.kernel.pdf.PdfString; +import com.itextpdf.kernel.pdf.PdfViewerPreferences; import com.itextpdf.kernel.pdf.PdfWriter; +import com.itextpdf.kernel.pdf.WriterProperties; import com.itextpdf.kernel.utils.CompareTool; import com.itextpdf.kernel.xmp.XMPConst; import com.itextpdf.kernel.xmp.XMPException; @@ -56,9 +60,15 @@ This file is part of the iText (R) project. import com.itextpdf.kernel.xmp.options.SerializeOptions; import com.itextpdf.test.ExtendedITextTest; import com.itextpdf.test.annotations.type.IntegrationTest; + +import java.io.ByteArrayInputStream; import java.io.FileInputStream; +import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; +import java.io.OutputStream; +import java.nio.charset.StandardCharsets; + import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; @@ -147,6 +157,57 @@ public void saveAndReadDocumentWithCanonicalXmpMetadata() throws IOException, XM } } + @Test + public void testPdfUAExtensionMetadata() throws IOException { + + String outFile = destinationFolder + "testPdfUAExtensionMetadata.pdf"; + String cmpFile = cmpFolder + "cmp_testPdfUAExtensionMetadata.pdf"; + + try (FileOutputStream fos = new FileOutputStream(outFile)) { + generatePdfAWithUA(fos); + } + + CompareTool ct = new CompareTool(); + Assert.assertNull(ct.compareXmp(outFile, cmpFile, true)); + + } + + @Test + public void testPdfUAIdSchemaNameSpaceUriIsNotText() throws IOException { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + generatePdfAWithUA(baos); + + // check whether the pdfuaid NS URI was properly encoded as a URI with rdf:resource + PdfDocument readDoc = new PdfDocument(new PdfReader(new ByteArrayInputStream(baos.toByteArray()))); + String xmpString = new String(readDoc.getXmpMetadata(), StandardCharsets.UTF_8); + Assert.assertTrue( + "Did not find expected namespaceURI definition", + xmpString.contains("") + ); + + } + + private void generatePdfAWithUA(OutputStream os) throws IOException { + WriterProperties wp = new WriterProperties().addUAXmpMetadata(); + try (PdfWriter w = new PdfWriter(os, wp)) { + PdfOutputIntent outputIntent; + try (InputStream is = new FileInputStream(sourceFolder + "sRGB Color Space Profile.icm")) { + outputIntent = new PdfOutputIntent( + "Custom", "", + "http://www.color.org", + "sRGB IEC61966-2.1", + is + ); + } + PdfDocument pdfDoc = new PdfADocument(w, PdfAConformanceLevel.PDF_A_2A, outputIntent).setTagged(); + pdfDoc.getDocumentInfo().setTitle("Test document"); + pdfDoc.getCatalog().setViewerPreferences(new PdfViewerPreferences().setDisplayDocTitle(true)); + pdfDoc.getCatalog().setLang(new PdfString("en")); + pdfDoc.addNewPage(); + pdfDoc.close(); + } + } + private int count(byte[] array, byte b) { int counter = 0; for (byte each : array) { diff --git a/pdfa/src/test/resources/com/itextpdf/pdfa/cmp/PdfAXmpTest/cmp_testPdfUAExtensionMetadata.pdf b/pdfa/src/test/resources/com/itextpdf/pdfa/cmp/PdfAXmpTest/cmp_testPdfUAExtensionMetadata.pdf new file mode 100644 index 0000000000000000000000000000000000000000..16d382cb6e96133b7c6cc51925c0419d1e7dd26d GIT binary patch literal 8487 zcmeHNcUTi?w?|oV!BqrAWi5g6BlHNOwt@nwnU^Ub0XOPe4BxXQkMu zBP(l9HJ$tSF|ge6FrYZ=MrXE zX;{h`a)DR?G7^#t_(CZ|&Lkk9R!|0b$H_1OQm|n2m;`J51LWu8u7u6Zm|{#O{lOuV zO$b&R5M(LghA})@cHNr5FD9W+YDJphc?DDGbUI9?!c-~=aFC=?LJX0Ugwlx$9oyRoh;i2vmT4UxQS^GqFFaAS?vbmKc>_QQub}?&j)I!R0W* zL=pi4))x_r`8<9A=umv!G0 zxiWs-j50qNVz7tsK%eS%=M$u#km|#!^oB_Qn;7hE4Vs~HA5>--r2N_gdNyjse9w=feV%VqZ%A3upl7E|B-b8!k3}=Ji#R3Gfz0s z-VZC>NkzKzdH5cnbHpV{?IxHNiCP{3rwBt+upBjD}x!%O_I zu={+*RVI)I1oa->;HPxmpTOT$`23Bb;4p6ikAs3kyYC#DQqAb*uq$NJbCx6Dyk%V^ z+pB40a!r{A#7cmt9tKBfKr9@rcTbcL{?|I{T@| z|CZx7;!aCN%JQ`Ekii3fd#0kwSGg@NDzX{bVK*>y;G!$({7jvWQDsMlL2tXJ)Xzvi zq4F_%P01VrMa6wXddrKBra-+>=V?se%?8D8=QkWOb*U!Lt|rV`LiJfKeK9Ry(d$f9 zlip5EwA8DVRyU*1?p-&$yPU3Fp1v|fZ&b^#9VOKA$(#htF=^%*(Y<|oi(JiBh^wv} z-Ptltk=#{j5wNBcdni`ju zw2F)5VHV3LHdC%->dhH@N1_r|^KQv#PJYAKbM+4kT-zQgvz$&Tx%Voo2=8r$JDA@CK^R)1( zd#+y(bQ+L7&j^0d)|ovl&UWsi8qe|@ZGR*3_SLji7{xZ&LKqgE$2xtkx>@}8xBJ=2 z9r}Ym$i|PP^L}N%#Q4h9h#>;Y;P)Ax9n<%R_!{! zfjcJrJ~MW_3UP`Sq`r_lwYDv0$QgUpw^eDEGB<4Ls#)?O#`m!EhjSw*#bt2pS=7VY z9@URGzVjbw3aQMRsoFYo%mY=O2caGhi)S46aAm9NJ`~8T@P}}0I9ueA| zy3vCtlGXE0`0v%?4=M_Aqy3tZIO@dtXH=TL@si@>YsIRIhCfYoJGK6{mY>Fq#razC z!?hUO(#TpPPukh6luuIEny_bu_V$6inO9eg->zSh>aa5ZT)+smS+gka&TEET(AQns zylq{MPmX!+ZjZt4TXtGCcsCIL(0HZgVmEJF)urkOD%doJ-j+0DJKwFFYR5e&dZ6`^ z)~TzOki$u*WQ>k;9y?jLXkd|E(U|%S`cl0!=}6u_zke|r$399OS|QmR%BLLCSU7y4 z_QFBaGhz}ZHe=#Lw7G$nt|?Pbd*AjK8;X6!I*0X+YTq2*mabXR{;cvj+F`dyGxpaR z8QBwvI_c_vCOX(mK}b2PYVBxFhn?isleAx@xSkeoi$9#?=V-~YbQ^0>Zs6e%Hp$#* z@uUfY2~P~S5N;64pw;K?xSA5nInFn2gp;!;+%i1ov2}{ylwXL?M)*u^AYxMjJ>+>Q zzNYM#?7)9*DD=ybv*tTI-=!UV-eFn6X%GJ$>|BifFsbeU)w{`#&ZgT&6-m-w4Eo!C z#I%Z(X4^X+sl+Z~IFZ9n-0g;Xl{Cm2>{{ov(uUfaQah1OS?6?blDC_8dF$lM=9j1LF1&nyXVlK;muELzy=>QT zbC*d&OhbJ`@vg{Sx9Vr|Ht-VX-TUM5CEA|n*f2px<=Cg2SJ9JOt~Vx{RhdpS^^JI1 zKel#rV_oBm`U`ct>lnGO*qi=*lCx!ZnQd3%>+Qq5&$`%eEwXLdUd>6;IJJ6Z^-k_f zN*-n=doMFM^6E&!5tHaM(H_xDqn|{-d8ytx?fsP7OooxGEre#}^K8uLTg(Zo3z1Zf;9Q`(nA{LVH~nT_ zWZ^{FLm5L&t{Qi7(EZY?WK!)&*O4}9YH4Gfwd+@Bp5mh!{&l1T(hySVwCp*0eiZ?N zqSoTh2!Uve@J2+;*`!dN2(O5GPWzsMOwwP6&b+=F_#*ID$)1)4H8H|);o0JAoXGmD zKlDS7ohbk7LqbAs!nvF(r0&vj%B6jc&(+e-XmA8!TOw_yN6`xwFz&x>Y&tRL@uDkl zM~%vJ+US&4nHzPcy`%m2mgOa_C6d5LK|6%M6|;)Vgr1Q>kr@jUx>Pk|G_&kipH0l) z6~4R0x+XlhF_=&|tne>>`0LrPjYOxf)s&uISex9e^S9R%=`G(Hi>Bhskr!%iik99o ziaQXu;-mS;F$4SuT#vsWziZ~%<7*ekSa07t$ojIi=8~!> zOXSiewehu2a^9RwKecXYjDCi?=8!`5xY3A?$M8$X;*W%!U$SmcA)$;=NoZqbvK+TX z=j!I_Y}=GO_pJQvnzIg^f>7Tx!Kw8rI;Ul4Id^mKZfedS)j92YiP-XlRSxY=C*kg_ zs5IkgjeeWUJ36#03^p6sYhPMMSw>pcM$g%-R{gRre#b$l{KNdG#!q#!28#xZR<&J# zTihmOqrsYI4!VqST`8m7CW;2{bp*&^AMDo6`1|R+u)`G{m1~?GL7Iyt~e8b^Bxs z#v6w$NfucYQquIorPA<4O#89m#r`LPE(L52*n21h+tG5N)4O)}jy=)7j|z?bndN6h zRf`WM9^}o7BOWlG1}`cx4hwQy>2qo0wtcT+9|ff8rp~IcFh9)Sc`fBC@e1uQ?`hs{ z7kwwW!6>H!r>V1ViS7%_JJweDuALimb=3iM@5LN`jn74&JB^R7T`wL-^fj)5^{U!_r{ zg^!#+ zifT+%MpYGQBMB`K@7XSU5pwcGL_2UO7Pf8vix2faFt5ep=3;%0x z#(|jdjv&F{gSp#vS_5y=FR!gLc#`pGyw&gW1I1@|jyjRaDbBb=;yo@O=fRBov@oSsfTNc%9dHCk>jHsKh-t8MwJt`xZ z8}#_dJ@GE-@z||*2Y2Z+)W)ezPad0m1fCxBR4>b7QFCh7+Iruz!mQGf;wN?I-mSdz z$m1{3O^3r1%FdU*ec;ns&{8ZF)?A+>9Tii2FRQLBwQJ*>F|nra_XV{=OsiHV<5|dw zgz22?`wVBGA792EeK6rIQju8AYM*$`{_Nx!LwH7Thy0ymPh2IO%QDBf-bg4Usw~C2jlK zrq0+9`{3>9m(QQ(r@Ljxs=i5V@A`5ym4kC}4KhR_Yk=#@?guhAGcz%R!k|wyDjkfe z_yY-hXfzXWU)zsnZVE0h`_ZTra4+AFMy7#@xIc{oE;#$qXtXbAW+rrS+xa=4nJKl8 zFPH=Y*WVKSzZ^KSpa4q~G95(_GcGrbMi1psLrqO+XegO%!7)L@Ops92>Yq7+VEh^# z!f%#6!Tr7n+{r3ly?D_mfDTOa28!z+5`HsG`ut{CLyCbDZUxLV3Wcg+U@*(gPUF7; DMlE~r literal 0 HcmV?d00001