Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"Width": 1800,
"Height": 3113,
"BlackIs1": false
}
7 changes: 6 additions & 1 deletion src/UglyToad.PdfPig.Tests/UglyToad.PdfPig.Tests.csproj
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFrameworks>net471;net8.0;net9.0</TargetFrameworks>
<IsTestProject>true</IsTestProject>
Expand Down Expand Up @@ -198,6 +197,12 @@
<None Update="Images\Files\Jpx\fd42e6a0-5c7a-4eb2-b0e3-474cfde067a6.jp2">
<CopyToOutputDirectory>Never</CopyToOutputDirectory>
</None>
<Content Include="Images\Files\Tif\TiffCcittG4.fixture.json">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</Content>
<Content Include="Images\Files\Tif\TiffCcittG4.ccitt.base64">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</Content>
</ItemGroup>
<ItemGroup>
<Using Include="Xunit" />
Expand Down
110 changes: 110 additions & 0 deletions src/UglyToad.PdfPig.Tests/Writer/PdfPageBuilderCcittG4Tests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
namespace UglyToad.PdfPig.Tests.Writer
{
using System.IO;
using UglyToad.PdfPig.Content;
using System.Linq;
using UglyToad.PdfPig;
using UglyToad.PdfPig.Tests.Writer.TestImages;
using UglyToad.PdfPig.Tokens;
using UglyToad.PdfPig.Writer;
using Xunit;

public class PdfPageBuilderCcittG4Tests
{
[Fact]
public void FixtureProvidesRawCcittData()
{
var fixture = CcittG4TestImage.Load();

Assert.True(fixture.Width > 0);
Assert.True(fixture.Height > 0);
Assert.NotEmpty(fixture.RawCcittData);
Assert.False(fixture.BlackIs1);
}

[Fact]
public void AddCcittG4AddsFaxImage()
{
var fixture = CcittG4TestImage.Load();

byte[] pdfBytes;
using (var documentBuilder = new PdfDocumentBuilder())
{
var pageBuilder = documentBuilder.AddPage(fixture.Width, fixture.Height);

pageBuilder.AddCcittG4(fixture.RawCcittData, fixture.Width, fixture.Height, blackIs1: fixture.BlackIs1);

pdfBytes = documentBuilder.Build();
}

File.WriteAllBytes(@"PdfPageBuilderTests_CanAddTifCcittG4.pdf", pdfBytes);

using (var document = PdfDocument.Open(pdfBytes))
{
var page = document.GetPage(1);
var image = Assert.Single(page.GetImages());

Assert.Equal(fixture.Width, image.WidthInSamples);
Assert.Equal(fixture.Height, image.HeightInSamples);
Assert.Equal(1, image.BitsPerComponent);
}
}

[Fact]
public void FixtureWritesDecodeZeroOne()
{
var fixture = CcittG4TestImage.Load();
var image = BuildFaxImage(fixture.BlackIs1);

var decode = Assert.IsType<ArrayToken>(image.ImageDictionary.Data[NameToken.Decode]);

// The generated fixture comes from a TIFF with Photometric.MINISWHITE, which must map to
// blackIs1 = false for AddCcittG4. Reversing this to match TIFF naming reintroduces negative PDFs.
Assert.Equal([0, 1], decode.Data.OfType<NumericToken>().Select(x => x.Data).ToArray());
}

[Fact]
public void AddCcittG4WithBlackIs1TrueWritesDecodeOneZero()
{
var image = BuildFaxImage(blackIs1: true);

var decode = Assert.IsType<ArrayToken>(image.ImageDictionary.Data[NameToken.Decode]);

Assert.Equal([1, 0], decode.Data.OfType<NumericToken>().Select(x => x.Data).ToArray());
Assert.True(image.ImageDictionary.TryGet(NameToken.DecodeParms, out DictionaryToken? decodeParms));
Assert.False(decodeParms!.Data.ContainsKey(NameToken.BlackIs1));
}

[Fact]
public void AddCcittG4WithBlackIs1FalseWritesDecodeZeroOne()
{
var image = BuildFaxImage(blackIs1: false);

var decode = Assert.IsType<ArrayToken>(image.ImageDictionary.Data[NameToken.Decode]);

Assert.Equal([0, 1], decode.Data.OfType<NumericToken>().Select(x => x.Data).ToArray());
Assert.True(image.ImageDictionary.TryGet(NameToken.DecodeParms, out DictionaryToken? decodeParms));
Assert.False(decodeParms!.Data.ContainsKey(NameToken.BlackIs1));
}

private static IPdfImage BuildFaxImage(bool blackIs1)
{
var fixture = CcittG4TestImage.Load();

byte[] pdfBytes;
using (var documentBuilder = new PdfDocumentBuilder())
{
var pageBuilder = documentBuilder.AddPage(fixture.Width, fixture.Height);

pageBuilder.AddCcittG4(fixture.RawCcittData, fixture.Width, fixture.Height, blackIs1: blackIs1);

pdfBytes = documentBuilder.Build();
}

using var document = PdfDocument.Open(pdfBytes);
var page = document.GetPage(1);

return Assert.Single(page.GetImages());
}
}
}
91 changes: 91 additions & 0 deletions src/UglyToad.PdfPig.Tests/Writer/TestImages/CcittG4TestImage.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
namespace UglyToad.PdfPig.Tests.Writer.TestImages
{
using System;
using System.IO;
using System.Text;
using System.Text.RegularExpressions;

/// <summary>
/// Helper for loading the CCITT Group 4 fixture used by PDF page builder tests.
/// </summary>
internal sealed class CcittG4TestImage
{
private CcittG4TestImage(int width, int height, byte[] rawCcittData, bool blackIs1)
{
Width = width;
Height = height;
RawCcittData = rawCcittData;
BlackIs1 = blackIs1;
}

public int Width { get; }

public int Height { get; }

public byte[] RawCcittData { get; }

public bool BlackIs1 { get; }

/// <summary>
/// Loads the CCITT Group 4 sample payload relative to the test output directory.
/// The payload is already raw CCITT data and the metadata stores the dimensions and polarity needed by the tests.
/// </summary>
public static CcittG4TestImage Load()
{
var metadataPath = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "Images", "Files", "Tif",
"TiffCcittG4.fixture.json"));
var base64Path = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "Images", "Files", "Tif",
"TiffCcittG4.ccitt.base64"));
var metadataJson = File.ReadAllText(metadataPath, Encoding.UTF8);
var metadata = CcittG4FixtureMetadata.Parse(metadataJson);
var base64 = File.ReadAllText(base64Path, Encoding.ASCII)
.Replace("\r", string.Empty)
.Replace("\n", string.Empty)
.Trim();
var rawCcittData = Convert.FromBase64String(base64);

return new CcittG4TestImage(metadata.Width, metadata.Height, rawCcittData, metadata.BlackIs1);
}

private sealed class CcittG4FixtureMetadata
{
public int Width { get; private set; }

public int Height { get; private set; }

public bool BlackIs1 { get; private set; }

public static CcittG4FixtureMetadata Parse(string json)
{
return new CcittG4FixtureMetadata
{
Width = ReadInt(json, "Width"),
Height = ReadInt(json, "Height"),
BlackIs1 = ReadBool(json, "BlackIs1")
};
}

private static int ReadInt(string json, string propertyName)
{
var match = Regex.Match(json, $"\"{propertyName}\"\\s*:\\s*(\\d+)");
if (!match.Success)
{
throw new InvalidOperationException($"Missing integer property '{propertyName}' in CCITT fixture metadata.");
}

return int.Parse(match.Groups[1].Value);
}

private static bool ReadBool(string json, string propertyName)
{
var match = Regex.Match(json, $"\"{propertyName}\"\\s*:\\s*(true|false)", RegexOptions.IgnoreCase);
if (!match.Success)
{
throw new InvalidOperationException($"Missing boolean property '{propertyName}' in CCITT fixture metadata.");
}

return bool.Parse(match.Groups[1].Value);
}
}
}
}
86 changes: 84 additions & 2 deletions src/UglyToad.PdfPig/Writer/PdfPageBuilder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -735,7 +735,7 @@ public AddedImage AddPng(Stream pngStream, PdfRectangle placementRectangle = def

var data = new byte[png.Width * png.Height * 3];
int pixelIndex = 0;

for (var rowIndex = 0; rowIndex < png.Height; rowIndex++)
{
for (var colIndex = 0; colIndex < png.Width; colIndex++)
Expand Down Expand Up @@ -828,6 +828,88 @@ public AddedImage AddPng(Stream pngStream, PdfRectangle placementRectangle = def
return new AddedImage(reference.Data, png.Width, png.Height);
}


/// <summary>
/// Adds a CCITT Group 4 (fax) encoded 1bpp image stream as a PDF image XObject (CCITTFaxDecode).
/// This is ideal for monochrome scanned TIFF pages that are already CCITT G4 compressed.
/// </summary>
/// <param name="ccittG4Data">
/// Raw CCITT Group 4 (T.6) compressed bytes (NOT decoded bitmap pixels, NOT wrapped in TIFF).
/// </param>
/// <param name="width">Image width in pixels (Columns).</param>
/// <param name="height">Image height in pixels (Rows).</param>
/// <param name="placementRectangle">
/// Placement rectangle in user space. If default, uses (0,0,width,height).
/// </param>
/// <param name="blackIs1">
/// Set to true if 1 bits represent black (common for bilevel scans).
/// If the result looks inverted, pass false.
/// </param>
public AddedImage AddCcittG4(byte[] ccittG4Data, int width, int height, PdfRectangle placementRectangle = default, bool blackIs1 = true)
{
if (ccittG4Data is null) throw new ArgumentNullException(nameof(ccittG4Data));
if (width <= 0) throw new ArgumentOutOfRangeException(nameof(width));
if (height <= 0) throw new ArgumentOutOfRangeException(nameof(height));

if (placementRectangle.Equals(default(PdfRectangle)))
{
placementRectangle = new PdfRectangle(0, 0, width, height);
}

// DecodeParms for CCITTFaxDecode:
// K = -1 => Group 4 (T.6)
// Columns/Rows = image dimensions
// BlackIs1 controls polarity via /Decode.
var decodeParms = new Dictionary<NameToken, IToken>
{
{ NameToken.Create("K"), new NumericToken(-1) },
{ NameToken.Create("Columns"), new NumericToken(width) },
{ NameToken.Create("Rows"), new NumericToken(height) }
};

var imgDictionary = new Dictionary<NameToken, IToken>
{
{ NameToken.Type, NameToken.Xobject },
{ NameToken.Subtype, NameToken.Image },
{ NameToken.Width, new NumericToken(width) },
{ NameToken.Height, new NumericToken(height) },
{ NameToken.ColorSpace, NameToken.Devicegray },
{ NameToken.BitsPerComponent, new NumericToken(1) },
{ NameToken.Filter, NameToken.Create("CCITTFaxDecode") },
{ NameToken.DecodeParms, new DictionaryToken(decodeParms) },
{ NameToken.Length, new NumericToken(ccittG4Data.Length) }
};

imgDictionary[NameToken.Decode] = new ArrayToken(
[
blackIs1 ? new NumericToken(1) : new NumericToken(0),
blackIs1 ? new NumericToken(0) : new NumericToken(1)
]);


// IMPORTANT: Do NOT recompress. ccittG4Data is already compressed with CCITT Group 4.
var reference = documentBuilder.AddImage(new DictionaryToken(imgDictionary), ccittG4Data);

var resources = pageDictionary.GetOrCreateDict(NameToken.Resources);
var xObjects = resources.GetOrCreateDict(NameToken.Xobject);

var key = NameToken.Create(xobjectsNames.NewName());
xObjects[key] = reference;

currentStream.Add(Push.Value);
currentStream.Add(new ModifyCurrentTransformationMatrix(
[
placementRectangle.Width, 0,
0, placementRectangle.Height,
placementRectangle.BottomLeft.X, placementRectangle.BottomLeft.Y
]));
currentStream.Add(new InvokeNamedXObject(key));
currentStream.Add(Pop.Value);

return new AddedImage(reference.Data, width, height);
}


/// <summary>
/// Adds a URL link annotation to the page at the specified rectangle area.
/// </summary>
Expand Down Expand Up @@ -1034,7 +1116,7 @@ public PdfPageBuilder CopyFrom(Page srcPage)

gstateName = newName;
}

// According to PDF spec 32000-1:2008, section 8.4.5, ExtGState can contain both direct values and indirect references
if (gstate.Value is IndirectReferenceToken fontReferenceToken)
{
Expand Down
Loading