Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/UglyToad.PdfPig.Core/UglyToad.PdfPig.Core.csproj
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFrameworks>netstandard2.0;net462;net471;net6.0;net8.0</TargetFrameworks>
<TargetFrameworks>netstandard2.0;net462;net471;net6.0;net8.0;net9.0</TargetFrameworks>
<LangVersion>12</LangVersion>
<Version>0.1.12-alpha001</Version>
<IsTestProject>False</IsTestProject>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,19 @@ public DocstrumBoundingBoxes(DocstrumBoundingBoxesOptions options)
/// <returns>The <see cref="TextBlock"/>s generated by the document spectrum method.</returns>
public IReadOnlyList<TextBlock> GetBlocks(IEnumerable<Word> words)
{
if (words?.Any() != true)
if (words is null)
{
return Array.Empty<TextBlock>();
}

return GetBlocks(words.ToList(),
// Avoid multiple enumeration and unnecessary ToArray() if already a list
var wordList = words as IReadOnlyList<Word> ?? words.ToArray();
if (wordList.Count == 0)
{
return Array.Empty<TextBlock>();
}

return GetBlocks(wordList,
options.WithinLineBounds, options.WithinLineMultiplier, options.WithinLineBinSize,
options.BetweenLineBounds, options.BetweenLineMultiplier, options.BetweenLineBinSize,
options.AngularDifferenceBounds,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFrameworks>netstandard2.0;net462;net471;net6.0;net8.0</TargetFrameworks>
<TargetFrameworks>netstandard2.0;net462;net471;net6.0;net8.0;net9.0</TargetFrameworks>
<LangVersion>12</LangVersion>
<Version>0.1.12-alpha001</Version>
<IsTestProject>False</IsTestProject>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,34 +51,49 @@ public IEnumerable<Word> GetWords(IReadOnlyList<Letter> letters)

if (options.GroupByOrientation)
{
// axis aligned
List<Word> words = GetWords(
letters.Where(l => l.TextOrientation == TextOrientation.Horizontal).ToList(),
options.MaximumDistance, options.DistanceMeasureAA, options.FilterPivot,
options.Filter, options.MaxDegreeOfParallelism);

words.AddRange(GetWords(
letters.Where(l => l.TextOrientation == TextOrientation.Rotate270).ToList(),
options.MaximumDistance, options.DistanceMeasureAA, options.FilterPivot,
options.Filter, options.MaxDegreeOfParallelism));
var buckets = new List<Letter>[5];
for (int i = 0; i < buckets.Length; i++) buckets[i] = new List<Letter>();

words.AddRange(GetWords(
letters.Where(l => l.TextOrientation == TextOrientation.Rotate180).ToList(),
options.MaximumDistance, options.DistanceMeasureAA, options.FilterPivot,
options.Filter, options.MaxDegreeOfParallelism));

words.AddRange(GetWords(
letters.Where(l => l.TextOrientation == TextOrientation.Rotate90).ToList(),
options.MaximumDistance, options.DistanceMeasureAA, options.FilterPivot,
options.Filter, options.MaxDegreeOfParallelism));
foreach (var l in letters)
{
switch (l.TextOrientation)
{
case TextOrientation.Horizontal: buckets[0].Add(l); break;
case TextOrientation.Rotate270: buckets[1].Add(l); break;
case TextOrientation.Rotate180: buckets[2].Add(l); break;
case TextOrientation.Rotate90: buckets[3].Add(l); break;
default: buckets[4].Add(l); break;
}
}

// not axis aligned
words.AddRange(GetWords(
letters.Where(l => l.TextOrientation == TextOrientation.Other).ToList(),
options.MaximumDistance, options.DistanceMeasure, options.FilterPivot,
options.Filter, options.MaxDegreeOfParallelism));
// Use a thread-safe collection to avoid lock contention.
var results = new List<Word>(letters.Count); // Pre-allocate for performance

return words;
// Limit parallelism to avoid oversubscription.
var parallelOptions = new System.Threading.Tasks.ParallelOptions
{
MaxDegreeOfParallelism = options.MaxDegreeOfParallelism > 0 ? options.MaxDegreeOfParallelism : Environment.ProcessorCount
};

// Use partitioner for better load balancing and avoid ConcurrentBag overhead
System.Threading.Tasks.Parallel.ForEach(
System.Collections.Concurrent.Partitioner.Create(0, buckets.Length),
parallelOptions,
range =>
{
for (int i = range.Item1; i < range.Item2; i++)
{
if (buckets[i].Count == 0) continue;
var measure = (i == 4) ? options.DistanceMeasure : options.DistanceMeasureAA;
var words = GetWords(buckets[i], options.MaximumDistance, measure, options.FilterPivot, options.Filter, options.MaxDegreeOfParallelism);
lock (results)
{
results.AddRange(words);
}
}
});
results.TrimExcess();
return results;
}
else
{
Expand Down
2 changes: 1 addition & 1 deletion src/UglyToad.PdfPig.Fonts/UglyToad.PdfPig.Fonts.csproj
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFrameworks>netstandard2.0;net462;net471;net6.0;net8.0</TargetFrameworks>
<TargetFrameworks>netstandard2.0;net462;net471;net6.0;net8.0;net9.0</TargetFrameworks>
<LangVersion>12</LangVersion>
<Version>0.1.12-alpha001</Version>
<IsTestProject>False</IsTestProject>
Expand Down
2 changes: 1 addition & 1 deletion src/UglyToad.PdfPig.Tests/UglyToad.PdfPig.Tests.csproj
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFrameworks>net471;net8.0</TargetFrameworks>
<TargetFrameworks>net471;net8.0;net9.0</TargetFrameworks>
<IsTestProject>true</IsTestProject>
<IsPackable>false</IsPackable>
<DebugType>full</DebugType>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFrameworks>netstandard2.0;net462;net471;net6.0;net8.0</TargetFrameworks>
<TargetFrameworks>netstandard2.0;net462;net471;net6.0;net8.0;net9.0</TargetFrameworks>
<LangVersion>12</LangVersion>
<Version>0.1.12-alpha001</Version>
<IsTestProject>False</IsTestProject>
Expand Down
2 changes: 1 addition & 1 deletion src/UglyToad.PdfPig.Tokens/UglyToad.PdfPig.Tokens.csproj
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFrameworks>netstandard2.0;net462;net471;net6.0;net8.0</TargetFrameworks>
<TargetFrameworks>netstandard2.0;net462;net471;net6.0;net8.0;net9.0</TargetFrameworks>
<LangVersion>12</LangVersion>
<Version>0.1.12-alpha001</Version>
<IsTestProject>False</IsTestProject>
Expand Down
2 changes: 1 addition & 1 deletion src/UglyToad.PdfPig/UglyToad.PdfPig.csproj
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFrameworks>netstandard2.0;net462;net471;net6.0;net8.0</TargetFrameworks>
<TargetFrameworks>netstandard2.0;net462;net471;net6.0;net8.0;net9.0</TargetFrameworks>
<LangVersion>12</LangVersion>
<Version>0.1.12-alpha001</Version>
<IsTestProject>False</IsTestProject>
Expand Down
Loading