From af63f604959bac50e90b32fbaa3f5ebca2ea8bf2 Mon Sep 17 00:00:00 2001 From: BobLd <38405645+BobLd@users.noreply.github.com> Date: Fri, 19 Jun 2026 18:09:12 +0100 Subject: [PATCH] Add % as token separator in PlainTokenizer and fix #1332 --- .../Integration/GithubIssuesTests.cs | 14 +++++++++++++- .../SpecificTestDocuments/color_icc_based.pdf | Bin 0 -> 2420 bytes .../PlainTokenizer.cs | 8 ++++---- 3 files changed, 17 insertions(+), 5 deletions(-) create mode 100644 src/UglyToad.PdfPig.Tests/Integration/SpecificTestDocuments/color_icc_based.pdf diff --git a/src/UglyToad.PdfPig.Tests/Integration/GithubIssuesTests.cs b/src/UglyToad.PdfPig.Tests/Integration/GithubIssuesTests.cs index 4ecf25168..edf563b60 100644 --- a/src/UglyToad.PdfPig.Tests/Integration/GithubIssuesTests.cs +++ b/src/UglyToad.PdfPig.Tests/Integration/GithubIssuesTests.cs @@ -11,6 +11,18 @@ public class GithubIssuesTests { + [Fact] + public void Issues1332() + { + var path = IntegrationHelpers.GetSpecificTestDocumentPath("color_icc_based.pdf"); + using (var document = PdfDocument.Open(path, new ParsingOptions() { UseLenientParsing = true })) + { + var page = document.GetPage(1); + Assert.NotNull(page); + Assert.NotEmpty(page.Paths); + } + } + [Fact] public void Issues1328() { @@ -29,7 +41,7 @@ public void Issues1328() Assert.Equal(187334, images2[0].RawBytes.Length); } } - + [Fact] public void Issues1237() { diff --git a/src/UglyToad.PdfPig.Tests/Integration/SpecificTestDocuments/color_icc_based.pdf b/src/UglyToad.PdfPig.Tests/Integration/SpecificTestDocuments/color_icc_based.pdf new file mode 100644 index 0000000000000000000000000000000000000000..fd9930d595d84c195d77df0e0939f7d197561c65 GIT binary patch literal 2420 zcmcgu+isgc5Pb%Tf0!3PHr!cTvMgaRP1IXt_bti;u{WtJ*F=~^X}`W_cFh8IT2+ZnovTdZ8N+j}t#2F$qkV zeFFf3M>;qImhw6ri3c5RB-tuI{%xY08~W$^A$n zs!n%}toLODk>&DvyOzthb3AV8!m`-13AyPiySloRFS%S-8^q_{wj6*Sz96~`x*_55 zm1OFPeudkdk_Cx~MBvzQK#`y*pa2AqDV|WA^4b%W&0AmKJW=Zf`ruIj_xP_OBq0f0 z9z{?Z_lP1yYeRXSV-M2MFCYr&y)){qk<$5#BB2}@0EVzUqyQwH1AhDD{{kkIM3ltL z61Xyn6}{>OR49Uu{-cr4NCWWUMHm}X1A06j8olOdoEZRm$(^S-BJRCO$)~iJl7~)n zxHF^qz=)F0k%{0_HZ&4KsSR2*_4t%S8iV4LijY{}Xkh@*n?OphQE9wj;X})v>;Z^b z%%M*qq@qRM7d&FZBZyw8F6ln>S?~MquiH3ioZ4T197orH6oWx``@V{h(Z+E!eb4ftd^OO_Y!@V+1A2lwWrnfCrBOB)L*6v!u5XxWcZ`+hal@ zF16kC=SzE{bf@Y|E${jB1V6`a>JsNzjkP{wSKA+w4|P6ImsQmee>N;}%cm2cPi5V( O0~Dzpj7E!3h4mBjcF|D) literal 0 HcmV?d00001 diff --git a/src/UglyToad.PdfPig.Tokenization/PlainTokenizer.cs b/src/UglyToad.PdfPig.Tokenization/PlainTokenizer.cs index 5019eda8c..427dfd2a9 100644 --- a/src/UglyToad.PdfPig.Tokenization/PlainTokenizer.cs +++ b/src/UglyToad.PdfPig.Tokenization/PlainTokenizer.cs @@ -6,7 +6,7 @@ internal sealed class PlainTokenizer : ITokenizer { - public bool ReadsNextByte { get; } = true; + public bool ReadsNextByte => true; public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token) { @@ -20,7 +20,7 @@ public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken tok using var builder = new ValueStringBuilder(stackalloc char[16]); builder.Append((char)currentByte); - + while (inputBytes.MoveNext()) { if (ReadHelper.IsWhitespace(inputBytes.CurrentByte)) @@ -28,12 +28,12 @@ public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken tok break; } - if (inputBytes.CurrentByte is (byte)'<' or (byte)'[' or (byte)'/' or (byte)']' or (byte)'>' or (byte)'(' or (byte)')') + if (inputBytes.CurrentByte is (byte)'<' or (byte)'[' or (byte)'/' or (byte)']' or (byte)'>' or (byte)'(' or (byte)')' or (byte)'%') { break; } - builder.Append((char) inputBytes.CurrentByte); + builder.Append((char)inputBytes.CurrentByte); } var text = builder.AsSpan();