diff --git a/src/UglyToad.PdfPig.Fonts/SystemFonts/SystemFontFinder.cs b/src/UglyToad.PdfPig.Fonts/SystemFonts/SystemFontFinder.cs index a740419b1..17c891bac 100644 --- a/src/UglyToad.PdfPig.Fonts/SystemFonts/SystemFontFinder.cs +++ b/src/UglyToad.PdfPig.Fonts/SystemFonts/SystemFontFinder.cs @@ -1,34 +1,39 @@ -using System.Collections.Concurrent; - -namespace UglyToad.PdfPig.Fonts.SystemFonts +namespace UglyToad.PdfPig.Fonts.SystemFonts; + +using System; +using System.Collections.Concurrent; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using Standard14Fonts; +using System.Runtime.InteropServices; +using TrueType; +using TrueType.Parser; + +/// +public sealed class SystemFontFinder : ISystemFontFinder { - using System; - using System.Collections.Generic; - using System.IO; - using System.Linq; - using System.Runtime.InteropServices; - using Core; - using Standard14Fonts; - using TrueType; - using TrueType.Parser; + private static readonly IReadOnlyDictionary NameSubstitutes; + private static readonly Lazy> AvailableFonts; - /// - public sealed class SystemFontFinder : ISystemFontFinder - { - private static readonly IReadOnlyDictionary NameSubstitutes; - private static readonly Lazy> AvailableFonts; + private static readonly ConcurrentDictionary Cache = new(StringComparer.OrdinalIgnoreCase); - private static readonly object CacheLock = new object(); - private static readonly Dictionary Cache = new Dictionary(StringComparer.OrdinalIgnoreCase); + /// + /// Fonts grouped by the upper-case first character of their filename (e.g. 'A' → all fonts + /// whose file starts with 'a' or 'A'). Built lazily from so the + /// O(n) grouping is paid only once, turning the per-lookup first-letter scan into an O(1) dict + /// lookup. + /// + private static readonly Lazy> FontsByFirstChar; - /// - /// The instance of . - /// - public static readonly ISystemFontFinder Instance = new SystemFontFinder(); + /// + /// The instance of . + /// + public static readonly ISystemFontFinder Instance = new SystemFontFinder(); - static SystemFontFinder() - { - var dict = new Dictionary + static SystemFontFinder() + { + var dict = new Dictionary { {"Courier", new[] {"CourierNew", "CourierNewPSMT", "LiberationMono", "NimbusMonL-Regu"}}, {"Courier-Bold", new[] {"CourierNewPS-BoldMT", "CourierNew-Bold", "LiberationMono-Bold", "NimbusMonL-Bold"}}, @@ -46,49 +51,49 @@ static SystemFontFinder() {"ZapfDingbats", new[] {"ZapfDingbatsITC", "Dingbats", "MS-Gothic"}} }; - HashSet names; - try - { - names = Standard14.GetNames(); - } - catch (Exception ex) - { - throw new InvalidOperationException("Failed to load the Standard 14 fonts from the assembly's resources.", ex); - } + HashSet names; + try + { + names = Standard14.GetNames(); + } + catch (Exception ex) + { + throw new InvalidOperationException("Failed to load the Standard 14 fonts from the assembly's resources.", ex); + } - foreach (var name in names) + foreach (var name in names) + { + if (!dict.ContainsKey(name)) { - if (!dict.ContainsKey(name)) + var value = Standard14.GetMappedFontName(name); + + if (dict.TryGetValue(value, out var subs)) { - var value = Standard14.GetMappedFontName(name); - - if (dict.TryGetValue(value, out var subs)) - { - dict[name] = subs; - } - else - { - dict[name] = new[] { value }; - } + dict[name] = subs; + } + else + { + dict[name] = new[] { value }; } } + } - NameSubstitutes = dict; + NameSubstitutes = dict; - ISystemFontLister lister; + ISystemFontLister lister; #if NETSTANDARD2_0_OR_GREATER || NET - if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) - { - lister = new WindowsSystemFontLister(); - } - else if (RuntimeInformation.IsOSPlatform(OSPlatform.OSX)) - { - lister = new MacSystemFontLister(); - } - else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux)) - { - lister = new LinuxSystemFontLister(); - } + if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) + { + lister = new WindowsSystemFontLister(); + } + else if (RuntimeInformation.IsOSPlatform(OSPlatform.OSX)) + { + lister = new MacSystemFontLister(); + } + else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux)) + { + lister = new LinuxSystemFontLister(); + } #if NET else if (OperatingSystem.IsAndroid()) { @@ -107,203 +112,250 @@ static SystemFontFinder() lister = new IOSSystemFontLister(); } #endif - else - { - throw new NotSupportedException($"Unsupported operating system: {RuntimeInformation.OSDescription}."); - } + else + { + throw new NotSupportedException($"Unsupported operating system: {RuntimeInformation.OSDescription}."); + } #elif NETFRAMEWORK lister = new WindowsSystemFontLister(); #else #error Missing ISystemFontLister for target framework #endif - AvailableFonts = new Lazy>(() => lister.GetAllFonts().ToArray()); - } - - private readonly ConcurrentDictionary nameToFileNameMap = new ConcurrentDictionary(StringComparer.OrdinalIgnoreCase); - private readonly object readFilesLock = new object(); - private readonly HashSet readFiles = new HashSet(); + AvailableFonts = new Lazy>(() => lister.GetAllFonts().ToArray()); - /// - /// Create a new . - /// - private SystemFontFinder() + FontsByFirstChar = new Lazy>(() => { - } - - /// - public TrueTypeFont GetTrueTypeFont(string name) - { - var result = GetTrueTypeFontNamed(name); - - if (result != null) - { - return result; - } + var fonts = AvailableFonts.Value; + var byChar = new Dictionary>(); - if (name.Contains("-")) + foreach (var record in fonts) { - result = GetTrueTypeFontNamed(name.Replace("-", string.Empty)); - - if (result != null) + var fn = Path.GetFileName(record.Path); + if (string.IsNullOrEmpty(fn)) { - return result; + continue; } - } - if (name.Contains(",")) - { - result = GetTrueTypeFontNamed(name.Replace(',', '-')); - - if (result != null) + var key = char.ToUpperInvariant(fn[0]); + if (!byChar.TryGetValue(key, out var list)) { - return result; + byChar[key] = list = new List(); } + + list.Add(record); } - foreach (var substituteName in GetSubstituteNames(name)) + var result = new Dictionary(byChar.Count); + foreach (var kvp in byChar) { - result = GetTrueTypeFontNamed(substituteName); - - if (result != null) - { - return result; - } + result[kvp.Key] = kvp.Value.ToArray(); } - result = GetTrueTypeFontNamed(name + "-Regular"); + return result; + }); + } + + private readonly ConcurrentDictionary nameToFileNameMap = new ConcurrentDictionary(StringComparer.OrdinalIgnoreCase); + // Tracks font file paths that have already been scanned for name matching, so we never open + // the same file twice during a search. Value is always 0 – the dictionary is used as a + // lock-free concurrent set. + private readonly ConcurrentDictionary readFiles = new ConcurrentDictionary(StringComparer.Ordinal); + + /// + /// Create a new . + /// + private SystemFontFinder() + { } + + /// + public TrueTypeFont? GetTrueTypeFont(string name) + { + var result = GetTrueTypeFontNamed(name); + + if (result is not null) + { return result; } - private IEnumerable GetSubstituteNames(string name) + if (name.Contains('-')) { - name = name.Replace(" ", string.Empty); - if (NameSubstitutes.TryGetValue(name, out var values)) + result = GetTrueTypeFontNamed(name.Replace("-", string.Empty)); + + if (result != null) { - return values; + return result; } - - return Array.Empty(); } - private TrueTypeFont GetTrueTypeFontNamed(string name) + if (name.Contains(',')) { - lock (CacheLock) + result = GetTrueTypeFontNamed(name.Replace(',', '-')); + + if (result != null) { - if (Cache.TryGetValue(name, out var cachedResult)) - { - return cachedResult; - } + return result; } + } - if (nameToFileNameMap.TryGetValue(name, out var fileName)) - { - if (TryReadFile(fileName, false, name, out var result)) - { - return result; - } + foreach (var substituteName in GetSubstituteNames(name)) + { + result = GetTrueTypeFontNamed(substituteName); - return null; + if (result != null) + { + return result; } + } + + result = GetTrueTypeFontNamed(name + "-Regular"); + + return result; + } - var nameCandidates = AvailableFonts.Value.Where(x => Path.GetFileName(x.Path)?.StartsWith(name[0].ToString(), StringComparison.OrdinalIgnoreCase) == true); + private static string[] GetSubstituteNames(string name) + { + if (name.Contains(' ')) + { + name = name.Replace(" ", string.Empty); + } + + if (NameSubstitutes.TryGetValue(name, out var values)) + { + return values; + } + + return Array.Empty(); + } - foreach (var systemFontRecord in nameCandidates) + private TrueTypeFont? GetTrueTypeFontNamed(string name) + { + if (Cache.TryGetValue(name, out var cachedResult)) + { + return cachedResult; + } + + if (nameToFileNameMap.TryGetValue(name, out var fileName)) + { + if (TryReadFile(fileName, false, name, out var result)) { - if (TryGetTrueTypeFont(name, systemFontRecord, out var font)) - { - return font; - } + return result; } - foreach (var record in AvailableFonts.Value) + return null; + } + + // First pass: fonts whose filename starts with the same letter as the requested name – + // the most likely match. FontsByFirstChar is built once, making this O(1) vs the + // previous O(n) LINQ scan that also allocated a string for name[0].ToString(). + char firstChar = char.ToUpperInvariant(name[0]); + + if (FontsByFirstChar.Value.TryGetValue(firstChar, out var candidates)) + { + foreach (var record in candidates) { if (TryGetTrueTypeFont(name, record, out var font)) { return font; } - - // TODO: OTF } - - return null; } - private bool TryGetTrueTypeFont(string name, SystemFontRecord record, out TrueTypeFont font) + // Second pass: all remaining fonts (those whose filename does NOT start with the same + // letter). We skip first-char matches to avoid re-processing what was already tried + // above – the original code iterated all fonts a second time without this guard. + // GetFirstFileNameChar avoids allocating a substring on every iteration. + foreach (var record in AvailableFonts.Value) { - font = null; - if (record.Type == SystemFontType.TrueType) +#if NET + char localFirstChar = Path.GetFileName(record.Path.AsSpan())[0]; +#else + char localFirstChar = Path.GetFileName(record.Path)[0]; +#endif + + if (char.ToUpperInvariant(localFirstChar) == firstChar) { - lock (readFilesLock) - { - if (readFiles.Contains(record.Path)) - { - return false; - } - } + continue; // Already tried in first pass + } - return TryReadFile(record.Path, true, name, out font); + if (TryGetTrueTypeFont(name, record, out var font)) + { + return font; } - return false; + // TODO: OTF } - private bool TryReadFile(string fileName, bool readNameFirst, string fontName, out TrueTypeFont font) - { - font = null; - - var bytes = File.ReadAllBytes(fileName); + return null; + } - var data = new TrueTypeDataBytes(new MemoryInputBytes(bytes)); + private bool TryGetTrueTypeFont(string name, SystemFontRecord record, out TrueTypeFont? font) + { + font = null; - if (readNameFirst) + if (record.Type == SystemFontType.TrueType) + { + if (readFiles.ContainsKey(record.Path)) { - var name = TrueTypeFontParser.GetNameTable(data); + return false; + } - if (name == null) - { - lock (readFilesLock) - { - readFiles.Add(fileName); - } + return TryReadFile(record.Path, true, name, out font); + } - return false; - } + return false; + } - var fontNameFromFile = name.GetPostscriptName() ?? name.FontName; + private bool TryReadFile(string fileName, bool readNameFirst, string fontName, out TrueTypeFont? font) + { + font = null; - nameToFileNameMap.TryAdd(fontNameFromFile, fileName); + byte[] bytes; + try + { + bytes = File.ReadAllBytes(fileName); + } + catch (Exception e) + { + System.Diagnostics.Debug.WriteLine(e); + return false; + } - if (!string.Equals(fontNameFromFile, fontName, StringComparison.OrdinalIgnoreCase)) - { - lock (readFilesLock) - { - readFiles.Add(fileName); - } + var data = new TrueTypeDataBytes(bytes); - return false; - } - } + string? psName = null; - data.Seek(0); - font = TrueTypeFontParser.Parse(data); - var psName = font.TableRegister.NameTable?.GetPostscriptName() ?? font.Name; + if (readNameFirst) + { + var nameTable = TrueTypeFontParser.GetNameTable(data); - lock (CacheLock) + if (nameTable is null) { - if (!Cache.ContainsKey(psName)) - { - Cache[psName] = font; - } + readFiles.TryAdd(fileName, 0); + return false; } - lock (readFilesLock) + psName = nameTable.GetPostscriptName(); + string? fontNameFromFile = psName ?? nameTable.FontName; + + nameToFileNameMap.TryAdd(fontNameFromFile, fileName); + + if (!string.Equals(fontNameFromFile, fontName, StringComparison.OrdinalIgnoreCase)) { - readFiles.Add(fileName); + readFiles.TryAdd(fileName, 0); + return false; } - return true; + data.Seek(0); } + + font = TrueTypeFontParser.Parse(data); + psName ??= font.TableRegister.NameTable?.GetPostscriptName() ?? font.Name; + + Cache.TryAdd(psName, font); + readFiles.TryAdd(fileName, 0); + + return true; } -} \ No newline at end of file +} diff --git a/src/UglyToad.PdfPig.Fonts/TrueType/Parser/TrueTypeFontParser.cs b/src/UglyToad.PdfPig.Fonts/TrueType/Parser/TrueTypeFontParser.cs index 8f57a032e..b29140725 100644 --- a/src/UglyToad.PdfPig.Fonts/TrueType/Parser/TrueTypeFontParser.cs +++ b/src/UglyToad.PdfPig.Fonts/TrueType/Parser/TrueTypeFontParser.cs @@ -158,7 +158,7 @@ private static TrueTypeFont ParseTables(float version, IReadOnlyDictionary /// A name table allows multilingual strings to be associated with the TrueType font. /// - public class NameTable : ITrueTypeTable + public sealed class NameTable : ITrueTypeTable { /// public string Tag => TrueTypeHeaderTable.Name; @@ -34,10 +34,10 @@ public class NameTable : ITrueTypeTable /// /// The name records contained in this name table. /// - public IReadOnlyList NameRecords { get; } + public IReadOnlyList NameRecords { get; } /// - /// Creaye a new . + /// Create a new . /// public NameTable(TrueTypeHeaderTable directoryTable, string fontName, @@ -56,7 +56,7 @@ public NameTable(TrueTypeHeaderTable directoryTable, /// Gets the PostScript name for the font if specified, preferring the Windows platform name if present. /// /// The PostScript name for the font if found or . - public string GetPostscriptName() + public string? GetPostscriptName() { string any = null; foreach (var nameRecord in NameRecords) diff --git a/tools/UglyToad.PdfPig.Benchmarks/Program.cs b/tools/UglyToad.PdfPig.Benchmarks/Program.cs index 332a2c1e0..2dece9b25 100644 --- a/tools/UglyToad.PdfPig.Benchmarks/Program.cs +++ b/tools/UglyToad.PdfPig.Benchmarks/Program.cs @@ -1,13 +1,12 @@ using BenchmarkDotNet.Running; -namespace UglyToad.PdfPig.Benchmarks +namespace UglyToad.PdfPig.Benchmarks; + +internal class Program { - internal class Program + static void Main(string[] args) { - static void Main(string[] args) - { - var summary = BenchmarkRunner.Run(); - Console.ReadKey(); - } + var summary = BenchmarkRunner.Run(); + Console.ReadKey(); } -} +} \ No newline at end of file diff --git a/tools/UglyToad.PdfPig.Benchmarks/SystemFontFinderBenchmarks.cs b/tools/UglyToad.PdfPig.Benchmarks/SystemFontFinderBenchmarks.cs new file mode 100644 index 000000000..5c19ff114 --- /dev/null +++ b/tools/UglyToad.PdfPig.Benchmarks/SystemFontFinderBenchmarks.cs @@ -0,0 +1,44 @@ +using BenchmarkDotNet.Attributes; +using UglyToad.PdfPig.Content; +using UglyToad.PdfPig.Fonts.SystemFonts; +using UglyToad.PdfPig.Fonts.TrueType; + +namespace UglyToad.PdfPig.Benchmarks; + +[Config(typeof(NuGetPackageConfig))] +[MemoryDiagnoser(displayGenColumns: false)] +public class SystemFontFinderBenchmarks +{ + [Benchmark] + public IReadOnlyList ARVE_2745540212_Open() + { + List letters = new List(); + using (var doc = PdfDocument.Open("iizieileamidagi.ARVE_2745540212.pdf")) + { + foreach (var page in doc.GetPages()) + { + letters.AddRange(page.Letters); + } + } + + return letters; + } + + [Benchmark] + public IReadOnlyList ARVE_2745540212_GetTrueTypeFont() + { + List letters = new List(); + using (var doc = PdfDocument.Open("iizieileamidagi.ARVE_2745540212.pdf")) + { + foreach (var page in doc.GetPages()) + { + foreach (var letter in page.Letters) + { + letters.Add(SystemFontFinder.Instance.GetTrueTypeFont(letter.FontName)); + } + } + } + + return letters; + } +} \ No newline at end of file diff --git a/tools/UglyToad.PdfPig.Benchmarks/UglyToad.PdfPig.Benchmarks.csproj b/tools/UglyToad.PdfPig.Benchmarks/UglyToad.PdfPig.Benchmarks.csproj index c0fecfcb7..8187fb846 100644 --- a/tools/UglyToad.PdfPig.Benchmarks/UglyToad.PdfPig.Benchmarks.csproj +++ b/tools/UglyToad.PdfPig.Benchmarks/UglyToad.PdfPig.Benchmarks.csproj @@ -23,7 +23,7 @@ - + @@ -42,6 +42,9 @@ Always + + Always + \ No newline at end of file diff --git a/tools/UglyToad.PdfPig.Benchmarks/iizieileamidagi.ARVE_2745540212.pdf b/tools/UglyToad.PdfPig.Benchmarks/iizieileamidagi.ARVE_2745540212.pdf new file mode 100644 index 000000000..ed95cdf4a Binary files /dev/null and b/tools/UglyToad.PdfPig.Benchmarks/iizieileamidagi.ARVE_2745540212.pdf differ