From 7bb00c831854fb50ba7a3a38be979f0d81b76d57 Mon Sep 17 00:00:00 2001 From: vers-one <12114169+vers-one@users.noreply.github.com> Date: Sat, 27 May 2023 00:05:29 -0400 Subject: [PATCH] Unescape URLs from EPUB 2 NCX and EPUB 3 nav docs --- .../Unit/Readers/Epub2NcxReaderTests.cs | 52 +++++++++++++++++++ .../Readers/Epub3NavDocumentReaderTests.cs | 45 ++++++++++++++++ .../Unit/Readers/PackageReaderTests.cs | 38 ++++++++++++++ Source/VersOne.Epub/Readers/Epub2NcxReader.cs | 2 +- .../Readers/Epub3NavDocumentReader.cs | 2 +- .../VersOne.Epub/Readers/NavigationReader.cs | 4 +- 6 files changed, 139 insertions(+), 4 deletions(-) diff --git a/Source/VersOne.Epub.Test/Unit/Readers/Epub2NcxReaderTests.cs b/Source/VersOne.Epub.Test/Unit/Readers/Epub2NcxReaderTests.cs index 7c693be..ea451d2 100644 --- a/Source/VersOne.Epub.Test/Unit/Readers/Epub2NcxReaderTests.cs +++ b/Source/VersOne.Epub.Test/Unit/Readers/Epub2NcxReaderTests.cs @@ -256,6 +256,22 @@ public class Epub2NcxReaderTests """; + private const string NCX_FILE_WITH_ESCAPED_CONTENT_SRC_ATTRIBUTE = """ + + + + + + + + Chapter 1 + + + + + + """; + private const string NCX_FILE_WITHOUT_CONTENT_SRC_ATTRIBUTE = """ @@ -877,6 +893,42 @@ public async void ReadEpub2NcxAsyncWithoutNavLabelTextTest() await TestFailingReadOperation(NCX_FILE_WITHOUT_NAVLABEL_TEXT_ELEMENT); } + [Fact(DisplayName = "Reading an NCX file with a URI-escaped 'src' attribute in a 'content' XML element should succeed")] + public async void ReadEpub2NcxAsyncWithEscapedContentSrcTest() + { + Epub2Ncx expectedEpub2Ncx = new + ( + filePath: NCX_FILE_PATH, + head: new Epub2NcxHead(), + docTitle: null, + docAuthors: null, + navMap: new Epub2NcxNavigationMap + ( + items: new List() + { + new Epub2NcxNavigationPoint + ( + id: "navpoint-1", + navigationLabels: new List() + { + new Epub2NcxNavigationLabel + ( + text: "Chapter 1" + ) + }, + content: new Epub2NcxContent + ( + source: "chapter1.html" + ) + ) + } + ), + pageList: null, + navLists: null + ); + await TestSuccessfulReadOperation(NCX_FILE_WITH_ESCAPED_CONTENT_SRC_ATTRIBUTE, expectedEpub2Ncx); + } + [Fact(DisplayName = "ReadEpub2NcxAsync should throw Epub2NcxException if a 'content' XML element has no 'src' attribute")] public async void ReadEpub2NcxAsyncWithoutContentSrcTest() { diff --git a/Source/VersOne.Epub.Test/Unit/Readers/Epub3NavDocumentReaderTests.cs b/Source/VersOne.Epub.Test/Unit/Readers/Epub3NavDocumentReaderTests.cs index 97d4a3c..f5c104b 100644 --- a/Source/VersOne.Epub.Test/Unit/Readers/Epub3NavDocumentReaderTests.cs +++ b/Source/VersOne.Epub.Test/Unit/Readers/Epub3NavDocumentReaderTests.cs @@ -145,6 +145,20 @@ public class Epub3NavDocumentReaderTests """; + private const string NAV_FILE_WITH_ESCAPED_HREF_IN_A_ELEMENT = """ + + + + + + """; + private static EpubPackage MinimalEpubPackageWithNav => new ( @@ -402,6 +416,37 @@ public async void ReadEpub3NavDocumentAsyncWithEmptyLiElement() await TestFailingReadOperation(NAV_FILE_WITH_EMPTY_LI_ELEMENT); } + [Fact(DisplayName = "Reading a NAV file with a URI-escaped 'href' attribute in an 'a' XML element should succeed")] + public async void ReadEpub3NavDocumentAsyncWithEscapedAHrefTest() + { + Epub3NavDocument expectedEpub3NavDocument = new + ( + filePath: NAV_FILE_PATH, + navs: new List() + { + new Epub3Nav + ( + type: Epub3StructuralSemanticsProperty.TOC, + ol: new Epub3NavOl + ( + lis: new List() + { + new Epub3NavLi + ( + anchor: new Epub3NavAnchor + ( + href: "chapter1.html", + text: "Chapter 1" + ) + ) + } + ) + ) + } + ); + await TestSuccessfulReadOperation(NAV_FILE_WITH_ESCAPED_HREF_IN_A_ELEMENT, expectedEpub3NavDocument); + } + private static async Task TestSuccessfulReadOperation(string navFileContent, Epub3NavDocument expectedEpub3NavDocument, EpubReaderOptions? epubReaderOptions = null) { TestZipFile testZipFile = CreateTestZipFileWithNavFile(navFileContent); diff --git a/Source/VersOne.Epub.Test/Unit/Readers/PackageReaderTests.cs b/Source/VersOne.Epub.Test/Unit/Readers/PackageReaderTests.cs index 1fa5dd3..5e13bfd 100644 --- a/Source/VersOne.Epub.Test/Unit/Readers/PackageReaderTests.cs +++ b/Source/VersOne.Epub.Test/Unit/Readers/PackageReaderTests.cs @@ -165,6 +165,17 @@ public class PackageReaderTests """; + private const string OPF_FILE_WITH_ESCAPED_HREF_IN_MANIFEST_ITEM = $""" + + + + + + + + + """; + private const string OPF_FILE_WITHOUT_HREF_IN_MANIFEST_ITEM = $""" @@ -743,6 +754,33 @@ public async void ReadPackageWithoutManifestItemIdWithSkippingInvalidManifestIte await TestSuccessfulReadOperationWithSkippingInvalidManifestItems(OPF_FILE_WITHOUT_ID_IN_MANIFEST_ITEM, MinimalEpub3Package); } + [Fact(DisplayName = "Read an OPF package with a URI-escaped 'href' attribute in a manifest item XML node should succeed")] + public async void ReadPackageWithEscapedManifestItemHrefTest() + { + EpubPackage expectedPackage = new + ( + uniqueIdentifier: null, + epubVersion: EpubVersion.EPUB_3, + metadata: new EpubMetadata(), + manifest: new EpubManifest + ( + id: null, + items: new List() + { + new EpubManifestItem + ( + id: "item-1", + href: "chapter1.html", + mediaType: "application/xhtml+xml" + ) + } + ), + spine: new EpubSpine(), + guide: null + ); + await TestSuccessfulReadOperationWithSkippingInvalidManifestItems(OPF_FILE_WITH_ESCAPED_HREF_IN_MANIFEST_ITEM, expectedPackage); + } + [Fact(DisplayName = "Trying to read OPF package without 'href' attribute in a manifest item XML node should fail with EpubPackageException")] public async void ReadPackageWithoutManifestItemHrefTest() { diff --git a/Source/VersOne.Epub/Readers/Epub2NcxReader.cs b/Source/VersOne.Epub/Readers/Epub2NcxReader.cs index 1d17415..88af648 100644 --- a/Source/VersOne.Epub/Readers/Epub2NcxReader.cs +++ b/Source/VersOne.Epub/Readers/Epub2NcxReader.cs @@ -241,7 +241,7 @@ private static Epub2NcxContent ReadNavigationContent(XElement navigationContentN id = attributeValue; break; case "src": - source = attributeValue; + source = Uri.UnescapeDataString(attributeValue); break; } } diff --git a/Source/VersOne.Epub/Readers/Epub3NavDocumentReader.cs b/Source/VersOne.Epub/Readers/Epub3NavDocumentReader.cs index 614c867..19f4711 100644 --- a/Source/VersOne.Epub/Readers/Epub3NavDocumentReader.cs +++ b/Source/VersOne.Epub/Readers/Epub3NavDocumentReader.cs @@ -170,7 +170,7 @@ private static Epub3NavAnchor ReadEpub3NavAnchor(XElement epub3NavAnchorNode) switch (navAnchorNodeAttribute.GetLowerCaseLocalName()) { case "href": - href = attributeValue; + href = Uri.UnescapeDataString(attributeValue); break; case "title": title = attributeValue; diff --git a/Source/VersOne.Epub/Readers/NavigationReader.cs b/Source/VersOne.Epub/Readers/NavigationReader.cs index f4dc11b..2d8e1e7 100644 --- a/Source/VersOne.Epub/Readers/NavigationReader.cs +++ b/Source/VersOne.Epub/Readers/NavigationReader.cs @@ -41,7 +41,7 @@ private static List GetNavigationItems(EpubSchema epubSch Epub2NcxNavigationLabel? firstNavigationLabel = navigationPoint.NavigationLabels.FirstOrDefault() ?? throw new Epub2NcxException($"Incorrect EPUB 2 NCX: navigation point \"{navigationPoint.Id}\" should contain at least one navigation label."); string title = firstNavigationLabel.Text; - string source = Uri.UnescapeDataString(navigationPoint.Content.Source); + string source = navigationPoint.Content.Source; if (!ContentPathUtils.IsLocalPath(source)) { throw new Epub2NcxException($"Incorrect EPUB 2 NCX: content source \"{source}\" cannot be a remote resource."); @@ -100,7 +100,7 @@ private static List GetNavigationItems(EpubSchema epubSch List nestedItems = GetNavigationItems(epubSchema, epubContentRef, epub3NavLi.ChildOl, epub3NavigationBaseDirectoryPath); if (navAnchor.Href != null) { - string href = Uri.UnescapeDataString(navAnchor.Href); + string href = navAnchor.Href; if (!ContentPathUtils.IsLocalPath(href)) { throw new Epub3NavException($"Incorrect EPUB 3 navigation document: anchor href \"{href}\" cannot be a remote resource.");