Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Random Big Title Lorem Ipsum text with lists Lorem ipsum dolor sit amet, consectetur adipiscing elit. In sodales gravida felis, in rhoncus velit rutrum at. Curabitur hendrerit dapibus nulla, ut hendrerit diam imperdiet quis. Pellentesque id neque ali-quam, pulvinar neque in, vulputate elit. Pel-lentesque ut erat sit amet massa suscipit ullamcor-per. Sed porttitor viverra convallis. Duis vitae sem-per metus. Pellentesque eros purus, egestas eget velit eget, elementum aliquet velit. Suspendisse potenti. Nulla vitae massa rutrum, blandit erat vi-tae, aliquet arcu. Aenean feugiat leo sed enim sodales vehicula. Sus-pendisse tempus hendrerit magna sagittis dictum. Duis ultrices dapibus egestas. Cras eu felis eu lectus suscipit pharetra at at lacus. Nulla facilisi. Proin in-terdum faucibus elit nec rhoncus. Proin sodaless metus sed tincidunt hendrerit. • Duis leo enim, convallis sit amet orci eget, condimentum mattis mi ; • Etiam dolor erat, maximus nec mi sed, con-vallis convallis orci ; • Morbi viverra diam in diam cursus, vitae aliquet velit tempus ; • Donec at nisi fermentum, ultricies odio eget, egestas massa at nisi fermentum, ul-tricies odio eget, egestas massa. Donec ultricies cursus odio sed rutrum. Nam ven-enatis metus vitae elementum scelerisque. Ali-quam tempor sapien at turpis posuere eleifend. Sed placerat posuere nunc vel efficitur. Quisque auctor felis vel lectus dictum fringilla. Quisque vo-lutpat pulvinar© elit. Aliquam ultrices feugiat ali-quam. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia Curae; Sus-pendisse imperdiet ex lorem, porta bibendum pu-rus ultricies id. Integer vel lacus sapien. Nam sodales ante eu risus facilisis placerat. Aliquam suscipit pulvinar ultricies. Aenean pulvinar, ex ac fermentum egestas, erat nisi feugiat velit, vitae suscipit tellus odio vitae quam. Morbi elementum sem in elit posuere, non rhoncus magna fringilla. Phasellus cursus in dolor laoreet rutrum. Curabitur tincidunt risus ullamcor-per, vehicula velit at, pulvinar metus. Donec quis ante leo. Vivamus pharetra, nisl ac vehi-cula tempor, tellus lacus aliquam sapien, eu congue nibh quam sit amet odio. Quisque metus arcu, sem-per nec consequat eu, pellentesque vel sem. Sed purus risus, tincidunt¹ sit amet dictum vitae, euis-mod id nibh. Praesent ultrices libero quis enim porta, sit amet pellentesque augue pretium. Viva-mus nec molestie nunc. Donec finibus enim nec tel-lus laoreet elementum. Curabitur efficitur placerat dolor et semper. Morbi laoreet dui eu tortor luctus, nec ultrices do-lor ullamcorper. Ut gravida sed nisl a efficitur. In tincidunt orci a condimentum semper. Suspendisse scelerisque fermentum lacinia. Vestibulum sit amet ornare tellus, aliquet euismod mauris. Cras suscipit venenatis ultrices. Sed diam erat, aliquet a tellus ut, viverra 12º ongue magna. Cras id justo tortor. Mauris in tortor vulputate, pellentesque nisl ac, facilisis ligula. Class aptent taciti² sociosqu ad li-tora torquent per conubia nostra³, per inceptos himenaeos. Aliquam eget dolor turpis. Mauris id molestie tellus. Sed elementum molestie nisi, at ali-quet sem vehicula nec. Morbi tempus nulla enim, a vulputate magna €51 luctus £66 eu. Fusce sodales, libero quis suscipit ultrices, metus erat auctor urna, sit amet dictum arcu tortor eu metus. 1. Ut volutpat, velit at interdum consectetur, nisl lorem consequat mauris, feugiat dignissim tellus massa ut nisl. 2. Praesent at est nisi. Pellentesque rutrum lorem sed dui accumsan gravida. 3. Pellentesque dictum nisl vitae urna luctus, congue pulvinar mi congue. Morbi vestibulum varius ipsum nec molestie. Proin auctor efficitur diam ut luctus. Phasellus cursus maximus ultricies. Mauris eu neque ut sem semper tempus. Curabitur non lorem eu nunc lobortis vi-verra at in diam. Pellentesque euismod purus a leo lobortis tempor. Maecenas mollis ligula at sem sus-cipit fringilla. Mauris sollicitudin tincidunt lectus id tempor. Etiam ut nisi est.
55 changes: 55 additions & 0 deletions src/UglyToad.PdfPig.Tests/Integration/CMapLocalCachingTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
namespace UglyToad.PdfPig.Tests.Integration
{
using System;
using System.Text;

public class CMapLocalCachingTests
{
private static readonly Lazy<string> DocumentFolder = new Lazy<string>(() => Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "Integration", "Documents")));
private static readonly Lazy<string> DlaFolder = new Lazy<string>(() => Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "Dla", "Documents")));

public static object[][] DocumentsData = new object[][]
{
["68-1990-01_A.pdf"],
["Type0 Font.pdf"],
["11194059_2017-11_de_s.pdf"],
["2108.11480.pdf"],
["reference-2-numeric-error.pdf"],
["MOZILLA-3136-0.pdf"],
["FICTIF_TABLE_INDEX.pdf"],
["Approved_Document_B__fire_safety__volume_2_-_Buildings_other_than_dwellings__2019_edition_incorporating_2020_and_2022_amendments.pdf"],
["dotnet-ai.pdf"],
["Old Gutnish Internet Explorer.pdf"],
["Random 2 Columns Lists Hyph - Justified.pdf"]
};

[Theory]
[MemberData(nameof(DocumentsData))]
public void CheckText(string documentName)
{
string fullPath = Path.Combine(DocumentFolder.Value, documentName);
if (!File.Exists(fullPath))
{
fullPath = Path.Combine(DlaFolder.Value, documentName);
}

Assert.True(File.Exists(fullPath));

var sb = new StringBuilder();

using (var document = PdfDocument.Open(fullPath, new ParsingOptions { UseLenientParsing = true }))
{
for (var i = 0; i < document.NumberOfPages; i++)
{
var page = document.GetPage(i + 1);
sb.Append(page.Text);
}
}

//File.WriteAllText(Path.ChangeExtension(fullPath, "txt"), sb.ToString());

string expected = File.ReadAllText(Path.ChangeExtension(fullPath, "txt"));
Assert.Equal(expected, sb.ToString());
}
}
}

Large diffs are not rendered by default.

Binary file not shown.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Old GutnishOld Gutnish was the dialect of Old Norse that was spoken on the Baltic island of Gotland. It shows sufficient differences from the Old West Norse and Old East Norsedialects that it is considered to be a separate branch. Gutnish is still spoken in some parts of Gotland and on the adjoining island of Fårö.The root Gut is identical to Goth, and it is often remarked that the language has similarities with the Gothic language. These similarities have led scholars such as Elias Wessén and Dietrich Hofmann to suggest that it is most closely related to Gothic. The best known example of such a similarity is that Gothic and Gutnish called both adult and young sheep lamb.The Old Norse diphthong au (e.g. auga "eye") remained in Old Gutnish and Old West Norse, while in Old East Norse – except for peripheral dialects – it evolved into the monophthong ǿ, i.e. a long version of ø. Likewise the diphthong ai in bain (bone) remained in Old Gutnish while it in Old West Norse became ei as in bein and in Old East Norse it became é (bén). Whereas Old West Norse had the ey diphthong and Old East Norse evolved the monophthong ǿ) Old Gutnish had oy.Proto-GermanicOld GutnishOld West NorseOld East Norse*augô (eye)augaaugaauga > ǿga*bainą (bone)bainbeinbæin > bén*hauzijaną (to hear)hoyraheyrahøyra > hǿraMost of the corpus of Old Gutnish is found in the Gutasaga from the 13th century.Citation:Þissi þieluar hafþi ann sun sum hit hafþi. En hafþa cuna hit huita stierna þaun tu bygþu fyrsti agutlandi fyrstu nat sum þaun saman suafu þa droymdi hennj draumbr. So sum þrir ormar warin slungnir saman j barmj hennar Oc þytti hennj sum þair scriþin yr barmi hennar. þinna draum segþi han firi hasþa bonda sinum hann riaþ dravm þinna so. Alt ir baugum bundit bo land al þitta warþa oc faum þria syni aiga. þaim gaf hann namn allum o fydum. guti al The approximate extent of Old Norse and related languages in the early 10th century: Old West Norse dialectOld East Norse dialectOld GutnishOld EnglishCrimean Gothic Other Germanic languages with which Old Norse still retained some mutual intelligibilityLanguage samplePage 1 of 3Old Gutnish -Wikipedia10/01/2018https://en.wikipedia.org/wiki/Old_Gutnishgutland aigha graipr al annar haita Oc gunfiaun þriþi. þair sciptu siþan gutlandi i þria þriþiunga. So at graipr þann elzti laut norþasta þriþiung oc guti miþal þriþiung En gunfiaun þann yngsti laut sunnarsta. siþan af þissum þrim aucaþis fulc j gutlandi so mikit um langan tima at land elptj þaim ai alla fyþa þa lutaþu þair bort af landi huert þriþia þiauþ so at alt sculdu þair aiga oc miþ sir bort hafa sum þair vfan iorþar attu.Standardised Old Gutnish:Þissi Þieluar hafþi ann sun sum hit Hafþi. En Hafþa kuna hit Huita Stierna. Þaun tu bygþu fyrsti a Gutlandi. Fyrstu nat sum þaun saman suafu þa droymdi henni draumbr. So sum þrir ormar varin slungnir saman i barmi hennar ok þytti henni sum þair skriþin yr barmi hennar. Þinna draum segþi han firi Hafþa, bonda sinum. Hann riaþ draum þinna so: Alt ir baugum bundit, bo land al þitta varþa ok faum þria syni aiga. Þaim gaf hann namn allum ofydum. Guti al Gutland aiga, Graipr al annar haita ok Gunfiaun þriþi. Þair skiptu siþan Gutlandi i þria þriþiunga so at Graipr þann eldsti laut norþasta þriþiung ok Guti miþal þriþiung en Gunfiaun þann yngsti laut sunnarasta. Siþan af þissum þrim aukaþis fulk i Gutlandi so mikit um langan tima at land elpti þaim ai alla fyþa. Þa lutaþu þair bort af landi huert þriþia þiauþ so at alt skuldu þair aiga ok miþ sir bort hafa sum þair ufan iorþar attu.Employing normalised Old Norse orthography:Þissi Þjelvar hafði ann sun sum hít Hafði. En Hafða kuna hít Hvítastjerna. Þaun tú byggðu fyrsti á Gutlandi. Fyrstu nátt sum þaun saman sváfu þá droymdi henni draumr; só sum þrír ormar varin slungnir saman í barmi hennar, ok þýtti henni sum þair skriðin ýr barmi hennar. Þinna draum segði han firi Hafða bónda sínum. Hann raið draum þinna só: "Alt ir baugum bundit, bóland al þitta varða uk fáum þría syni aiga." Þaim gaf hann namn, allum ófýddum; Guti, al Gutland aiga; Graipr, al annar haita; ok Gunnfjaun þriði. Þair skiptu síðan Gutlandi í þría þriðjunga, só at Graipr þann eldsti laut norðasta þriðjung, ok Guti miðal þriðjung, en Gunnfjaun þann yngsti laut sunnarsta. Síðan, af þissum þrim aukaðis fulk í Gutlandi sum mikit um langan tíma at land elpti þaim ai alla fýða. Þá lutaðu þair bort af landi hvert þriðja þjauð só at alt skuldu þair aiga ok mið sír bort hafa sum þair ufan jorðar áttu.Translation: in Icelandic:Son hann Þjálfi átti sem hét Hafði. Og kona Hafða hét Hvítastjarna. þau tvö byggðu fyrst manna á Gotlandi. Fyrstu nótt sem þau þar saman sváfu þá dreymdi hana draum; sá hún þrjá orma vafðir saman í barmi hennar, og þótti henni sem þeir skriða niður barm hennar. Þennan draum sagði hún Hafða bónda sínum. Hann réð draum þann svo: "Allt er baugum bundið og verður allt land þitt búið og munum við þrjá syni eiga." Þeim gaf hann nöfn ófæddum, Goti sem Gotland á að eiga; Greipur sem annar hét; og Gunnfjón sá þriðji. Þeir skiptu síðan Gotlandi í þrjá þriðjunga, þá fékk Greipur sá elsti norður þriðjunginn, og Goti miðju þriðjunginn, en Gunnfjón sá yngsti fékk suður þriðjunginn. Seinna, af þessum þremur Page 2 of 3Old Gutnish -Wikipedia10/01/2018https://en.wikipedia.org/wiki/Old_Gutnishjókst eftir langan tíma svo fólk í Gotlandi það mikið að landið gat ekki öllum veitt fæði. Þá létu þeir fara burt af landi þriðja hvern þegn, og allt máttu þau eiga og með sér burt hafa sem ofanjarðar áttu.Translation:This Thielvar had a son called Hafthi. And Hafthi's wife was called Hvitstjarna. Those two were the first to settle on Gotland. When they slept on the island for the first night, she dreamed that three snakes lay in her lap, and she thought they were sliding off her lap. She told this to Hafthi. He interpreted her dream and said: "Everything is bound with bangles, this island will be inhabited, and you will bear three sons." Although, they were not yet born, he named them Guti, who would own the island, Graip and Gunfiaun. The sons divided the island into three regions, and Graip, who was the eldest, took the north, Guti the middle, and Gunfjaun, who was the youngest, took the southern third. After a long time, their descendants became so numerous that the island could not support all of them. They drew lots and every third islander had to leave. They could keep everything they owned but the land.◾Nationalencyklopedin◾Christine Peel. "Guta Saga - The History of the Gotlanders," Viking Society for Northern Research Text Series. Vol. XII. Viking Society for Northern Research, University College of London (1999).Retrieved from "https://en.wikipedia.org/w/index.php?title=Old_Gutnish&oldid=797888377" This page was last edited on 29 August 2017, at 19:15.Text is available under the Creative Commons Attribution-ShareAlike License; additional terms may apply. By using this site, you agree to the Terms of Use and Privacy Policy. Wikipedia® is a registered trademark of the Wikimedia Foundation, Inc., a non-profit organization.ReferencesPage 3 of 3Old Gutnish -Wikipedia10/01/2018https://en.wikipedia.org/wiki/Old_Gutnish
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
TypeWeekly newspaperEditorJoe StuverFoundedOctober, 1918 (asthe BroadusIndependent), 1935(as the PowderRiver CountyExaminer), and1965 (as thePowder RiverExaminer)Headquarters119 1/2 N. ParkAve.Broadus, MT 59317United StatesPowder River ExaminerPowder River ExaminerThe Powder River Examiner, originally established in October, 1918 asthe Broadus Independent, is the only newspaper printed in Powder RiverCounty, Montana, and is located in the county seat of Broadus.The Broadus Independent was first published in Broadus, Montana inOctober, 1918, and continued until February, 1919.From March 6, 1919 until April 17, 1919, the paper was published inOlive, Montana as the Olive Branch.The Broadus Independent was published weekly from April 24, 1919until 1935.The Powder River County Examiner replaced the BroadusIndependent in 1935, beginning publication and continuing weekly until1965.In 1965 the newspaper's name was shortened to Powder RiverExaminer, and remains that today.Broadus Independent, Broadus, Montana, October, 1918-February, 1919.Olive Branch, Olive, Montana, March 6, 1919 – April 17, 1919.Broadus Independent, Broadus, Montana, April 24, 1919 – 1935.Powder River County Examiner, Broadus, Montana, 1935-1965.Powder River Examiner, Broadus, Montana, 1965-current.Joe Stuver, (current editor)Retrieved from "https://en.wikipedia.org/w/index.php?title=Powder_River_Examiner&oldid=747264669"This page was last edited on 1 November 2016, at 11:53.Text is available under the Creative Commons Attribution-ShareAlike License; additional terms may apply. By usingthis site, you agree to the Terms of Use and Privacy Policy. Wikipedia® is a registered trademark of the WikimediaFoundation, Inc., a non-profit organization.HistoryPreceding TitlesNotable contributorsPowder River Examiner - Wikipediahttps://en.wikipedia.org/wiki/Powder_River_Examiner1 of 130/03/2018, 03:50

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
(Jl σ3 法海阔』咱国司被NA咿清峭阉且啊辛茹海耀表3-19车辆齿轮用钢系列牌号化学成分(质量分数)(%)国内牌号国外牌号c Si Mn p s Ni Cr Mo Al Cu Ti B 16CrMnTiH 0.13~0.18 0. \Kl -1. 20 20CrMnTiHI 20CrMnTiH2 。.80 -1. 10 1.00-1.30 20CrMnTiH3 0. 17 -0. 37军三0.0350. 04 -0. 10 0.18句0.2320CrMnTiH4 20CrMnTiH5 0. \Kl -1. 25 I.I。”1.4520CrMnTiH6 16MnCrH 16MnCr5 0. 14 -0. 20 I. 00~1.40 O.\KJ-1.20 20MnCrH 20MnCr5 0.17~0.23 1.10-1.50 1.00-1.30 0.02~ 罢王0.20运0.120.02~0.仍525MnCrH 25MnCr5 0.23~0.28 0.055 o. ro -o. so0. 80-1.10 28MnCrH 28MnCr5 0. 25 -0. 30 运0.15运0.1016CrMnBH ZF6 0. 13 -0. 18。.80-1.100.001 -18CrMnBH ZF7 0.15 -0. 40 1.00-1.30 0. 15 -0. 20军军aα丑。0. 015 -0. 035 1.00-1.30 0.α)317CrMnBH ZF7B 17Cr2Ni2H ZFI 0.15 -0.19 0.15 -0. 40 0.40~o.ro1.40~I. 70 1.40-1.70 16CrNiH 16CrNi4 0.13“0.18 0.02-0.04 0.15~0.35 0.70句1.100. 80-1. 200. 80~1.20 :;;;0.10 0.02-0. 05 19CrNiH 19CrNi5 0.16~0. 21 0. 02 -0. 035 17Cr2Ni2MoH ZFlA 0.15”0.19 0.15~0.40 o. 40 -o. ro0.015~0.035 I. 4。”I.70 1.50-1.80 0. 25 -0. 35 20CrNiMoHI 8620Hl 0.02-0.17”0.23 0.15町0.35o. ro -o. 95 0.017~0.032 0.35~0. 75 0.35”0.65 0. 15 -0. 25 20CrNiMoH2 8620田0.045 15CrMoH 0.13”0.18 0.25~0.45 0.17~0. 37 0.4。”0.70 髦。但50.8。”1.1020CrMo 0. 18 -0. 230.15 -0. 25 20CrMoH SCM420 0.17 -0. 23 0.17町0.350.55~0. \Kl 0.85句I.25 0.15”0.35 0.02-0.0击:;;;0.15 35CrMo 0. 32 -0. 40 0.40~0. 70 0.80斗100. 15 -0. 25 运0.03520CrH 0.17~0.37 0.70-1.00 运0.200.50~0.80 40Cr 0. 18 -0. 23 0.37~0.440. 80 -1.10
31 changes: 31 additions & 0 deletions src/UglyToad.PdfPig.Tests/Util/MurmurHash3Tests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
namespace UglyToad.PdfPig.Tests.Util
{
using PdfPig.Util;
using System.Text;

public class MurmurHash3Tests
{
public static object[][] MurmurHashData = new object[][]
{
// https://murmurhash.shorelabs.com/
["The quick brown fox jumps over the lazy dog", "2f1583c3ecee2c675d7bf66ce5e91d2c", "e34bbc7bbc071b6c7a433ca9c49a9347"],
["MurmurHash3 was written by Austin Appleby, and is placed in the public", "6d3583489d9d1e5a898493af67e2ad10", "a91793d43f82cbabda2fb0c28c24799a"],
["0", "0ab2409ea5eb34f8a5eb34f8a5eb34f8", "2ac9debed546a3803a8de9e53c875e09"],
};

[Theory]
[MemberData(nameof(MurmurHashData))]
public void x86x64Check(string sentence, string expectedX86, string expectedX64)
{
byte[] data = Encoding.UTF8.GetBytes(sentence);

var hash = MurmurHash3.Compute_x86_128(data, data.Length, 0);
var actual = string.Concat(Array.ConvertAll(hash, x => x.ToString("x2")));
Assert.Equal(expectedX86, actual);

hash = MurmurHash3.Compute_x64_128(data, data.Length, 0);
actual = string.Concat(Array.ConvertAll(hash, x => x.ToString("x2")));
Assert.Equal(expectedX64, actual);
}
}
}
12 changes: 9 additions & 3 deletions src/UglyToad.PdfPig/Parser/PdfDocumentFactory.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
using PdfFonts.Parser.Parts;
using Tokenization.Scanner;
using Tokens;
using UglyToad.PdfPig.PdfFonts.Cmap;

internal static class PdfDocumentFactory
{
Expand Down Expand Up @@ -166,22 +167,27 @@ private static PdfDocument OpenDocument(

var encodingReader = new EncodingReader(pdfScanner);

var cmapCache = new CMapLocalCache(filterProvider, pdfScanner);

var type0Handler = new Type0FontHandler(
cidFontFactory,
filterProvider,
pdfScanner,
cmapCache,
parsingOptions);

var type1Handler = new Type1FontHandler(
pdfScanner,
filterProvider,
encodingReader,
cmapCache,
parsingOptions.UseLenientParsing);

var trueTypeHandler = new TrueTypeFontHandler(parsingOptions.Logger,
var trueTypeHandler = new TrueTypeFontHandler(
parsingOptions.Logger,
pdfScanner,
filterProvider,
encodingReader,
cmapCache,
SystemFontFinder.Instance,
type1Handler);

Expand All @@ -190,7 +196,7 @@ private static PdfDocument OpenDocument(
type0Handler,
trueTypeHandler,
type1Handler,
new Type3FontHandler(pdfScanner, filterProvider, encodingReader));
new Type3FontHandler(pdfScanner, encodingReader, cmapCache));

var resourceContainer = new ResourceStore(pdfScanner, fontFactory, filterProvider, parsingOptions);

Expand Down
Loading
Loading