From 7bb00c831854fb50ba7a3a38be979f0d81b76d57 Mon Sep 17 00:00:00 2001
From: vers-one <12114169+vers-one@users.noreply.github.com>
Date: Sat, 27 May 2023 00:05:29 -0400
Subject: [PATCH] Unescape URLs from EPUB 2 NCX and EPUB 3 nav docs
---
.../Unit/Readers/Epub2NcxReaderTests.cs | 52 +++++++++++++++++++
.../Readers/Epub3NavDocumentReaderTests.cs | 45 ++++++++++++++++
.../Unit/Readers/PackageReaderTests.cs | 38 ++++++++++++++
Source/VersOne.Epub/Readers/Epub2NcxReader.cs | 2 +-
.../Readers/Epub3NavDocumentReader.cs | 2 +-
.../VersOne.Epub/Readers/NavigationReader.cs | 4 +-
6 files changed, 139 insertions(+), 4 deletions(-)
diff --git a/Source/VersOne.Epub.Test/Unit/Readers/Epub2NcxReaderTests.cs b/Source/VersOne.Epub.Test/Unit/Readers/Epub2NcxReaderTests.cs
index 7c693be..ea451d2 100644
--- a/Source/VersOne.Epub.Test/Unit/Readers/Epub2NcxReaderTests.cs
+++ b/Source/VersOne.Epub.Test/Unit/Readers/Epub2NcxReaderTests.cs
@@ -256,6 +256,22 @@ public class Epub2NcxReaderTests
""";
+ private const string NCX_FILE_WITH_ESCAPED_CONTENT_SRC_ATTRIBUTE = """
+
+
+
+
+
+
+
+ Chapter 1
+
+
+
+
+
+ """;
+
private const string NCX_FILE_WITHOUT_CONTENT_SRC_ATTRIBUTE = """
@@ -877,6 +893,42 @@ public async void ReadEpub2NcxAsyncWithoutNavLabelTextTest()
await TestFailingReadOperation(NCX_FILE_WITHOUT_NAVLABEL_TEXT_ELEMENT);
}
+ [Fact(DisplayName = "Reading an NCX file with a URI-escaped 'src' attribute in a 'content' XML element should succeed")]
+ public async void ReadEpub2NcxAsyncWithEscapedContentSrcTest()
+ {
+ Epub2Ncx expectedEpub2Ncx = new
+ (
+ filePath: NCX_FILE_PATH,
+ head: new Epub2NcxHead(),
+ docTitle: null,
+ docAuthors: null,
+ navMap: new Epub2NcxNavigationMap
+ (
+ items: new List()
+ {
+ new Epub2NcxNavigationPoint
+ (
+ id: "navpoint-1",
+ navigationLabels: new List()
+ {
+ new Epub2NcxNavigationLabel
+ (
+ text: "Chapter 1"
+ )
+ },
+ content: new Epub2NcxContent
+ (
+ source: "chapter1.html"
+ )
+ )
+ }
+ ),
+ pageList: null,
+ navLists: null
+ );
+ await TestSuccessfulReadOperation(NCX_FILE_WITH_ESCAPED_CONTENT_SRC_ATTRIBUTE, expectedEpub2Ncx);
+ }
+
[Fact(DisplayName = "ReadEpub2NcxAsync should throw Epub2NcxException if a 'content' XML element has no 'src' attribute")]
public async void ReadEpub2NcxAsyncWithoutContentSrcTest()
{
diff --git a/Source/VersOne.Epub.Test/Unit/Readers/Epub3NavDocumentReaderTests.cs b/Source/VersOne.Epub.Test/Unit/Readers/Epub3NavDocumentReaderTests.cs
index 97d4a3c..f5c104b 100644
--- a/Source/VersOne.Epub.Test/Unit/Readers/Epub3NavDocumentReaderTests.cs
+++ b/Source/VersOne.Epub.Test/Unit/Readers/Epub3NavDocumentReaderTests.cs
@@ -145,6 +145,20 @@ public class Epub3NavDocumentReaderTests
""";
+ private const string NAV_FILE_WITH_ESCAPED_HREF_IN_A_ELEMENT = """
+
+
+
+
+
+ """;
+
private static EpubPackage MinimalEpubPackageWithNav =>
new
(
@@ -402,6 +416,37 @@ public async void ReadEpub3NavDocumentAsyncWithEmptyLiElement()
await TestFailingReadOperation(NAV_FILE_WITH_EMPTY_LI_ELEMENT);
}
+ [Fact(DisplayName = "Reading a NAV file with a URI-escaped 'href' attribute in an 'a' XML element should succeed")]
+ public async void ReadEpub3NavDocumentAsyncWithEscapedAHrefTest()
+ {
+ Epub3NavDocument expectedEpub3NavDocument = new
+ (
+ filePath: NAV_FILE_PATH,
+ navs: new List()
+ {
+ new Epub3Nav
+ (
+ type: Epub3StructuralSemanticsProperty.TOC,
+ ol: new Epub3NavOl
+ (
+ lis: new List()
+ {
+ new Epub3NavLi
+ (
+ anchor: new Epub3NavAnchor
+ (
+ href: "chapter1.html",
+ text: "Chapter 1"
+ )
+ )
+ }
+ )
+ )
+ }
+ );
+ await TestSuccessfulReadOperation(NAV_FILE_WITH_ESCAPED_HREF_IN_A_ELEMENT, expectedEpub3NavDocument);
+ }
+
private static async Task TestSuccessfulReadOperation(string navFileContent, Epub3NavDocument expectedEpub3NavDocument, EpubReaderOptions? epubReaderOptions = null)
{
TestZipFile testZipFile = CreateTestZipFileWithNavFile(navFileContent);
diff --git a/Source/VersOne.Epub.Test/Unit/Readers/PackageReaderTests.cs b/Source/VersOne.Epub.Test/Unit/Readers/PackageReaderTests.cs
index 1fa5dd3..5e13bfd 100644
--- a/Source/VersOne.Epub.Test/Unit/Readers/PackageReaderTests.cs
+++ b/Source/VersOne.Epub.Test/Unit/Readers/PackageReaderTests.cs
@@ -165,6 +165,17 @@ public class PackageReaderTests
""";
+ private const string OPF_FILE_WITH_ESCAPED_HREF_IN_MANIFEST_ITEM = $"""
+
+
+
+
+
+
+
+
+ """;
+
private const string OPF_FILE_WITHOUT_HREF_IN_MANIFEST_ITEM = $"""
@@ -743,6 +754,33 @@ public async void ReadPackageWithoutManifestItemIdWithSkippingInvalidManifestIte
await TestSuccessfulReadOperationWithSkippingInvalidManifestItems(OPF_FILE_WITHOUT_ID_IN_MANIFEST_ITEM, MinimalEpub3Package);
}
+ [Fact(DisplayName = "Read an OPF package with a URI-escaped 'href' attribute in a manifest item XML node should succeed")]
+ public async void ReadPackageWithEscapedManifestItemHrefTest()
+ {
+ EpubPackage expectedPackage = new
+ (
+ uniqueIdentifier: null,
+ epubVersion: EpubVersion.EPUB_3,
+ metadata: new EpubMetadata(),
+ manifest: new EpubManifest
+ (
+ id: null,
+ items: new List()
+ {
+ new EpubManifestItem
+ (
+ id: "item-1",
+ href: "chapter1.html",
+ mediaType: "application/xhtml+xml"
+ )
+ }
+ ),
+ spine: new EpubSpine(),
+ guide: null
+ );
+ await TestSuccessfulReadOperationWithSkippingInvalidManifestItems(OPF_FILE_WITH_ESCAPED_HREF_IN_MANIFEST_ITEM, expectedPackage);
+ }
+
[Fact(DisplayName = "Trying to read OPF package without 'href' attribute in a manifest item XML node should fail with EpubPackageException")]
public async void ReadPackageWithoutManifestItemHrefTest()
{
diff --git a/Source/VersOne.Epub/Readers/Epub2NcxReader.cs b/Source/VersOne.Epub/Readers/Epub2NcxReader.cs
index 1d17415..88af648 100644
--- a/Source/VersOne.Epub/Readers/Epub2NcxReader.cs
+++ b/Source/VersOne.Epub/Readers/Epub2NcxReader.cs
@@ -241,7 +241,7 @@ private static Epub2NcxContent ReadNavigationContent(XElement navigationContentN
id = attributeValue;
break;
case "src":
- source = attributeValue;
+ source = Uri.UnescapeDataString(attributeValue);
break;
}
}
diff --git a/Source/VersOne.Epub/Readers/Epub3NavDocumentReader.cs b/Source/VersOne.Epub/Readers/Epub3NavDocumentReader.cs
index 614c867..19f4711 100644
--- a/Source/VersOne.Epub/Readers/Epub3NavDocumentReader.cs
+++ b/Source/VersOne.Epub/Readers/Epub3NavDocumentReader.cs
@@ -170,7 +170,7 @@ private static Epub3NavAnchor ReadEpub3NavAnchor(XElement epub3NavAnchorNode)
switch (navAnchorNodeAttribute.GetLowerCaseLocalName())
{
case "href":
- href = attributeValue;
+ href = Uri.UnescapeDataString(attributeValue);
break;
case "title":
title = attributeValue;
diff --git a/Source/VersOne.Epub/Readers/NavigationReader.cs b/Source/VersOne.Epub/Readers/NavigationReader.cs
index f4dc11b..2d8e1e7 100644
--- a/Source/VersOne.Epub/Readers/NavigationReader.cs
+++ b/Source/VersOne.Epub/Readers/NavigationReader.cs
@@ -41,7 +41,7 @@ private static List GetNavigationItems(EpubSchema epubSch
Epub2NcxNavigationLabel? firstNavigationLabel = navigationPoint.NavigationLabels.FirstOrDefault() ??
throw new Epub2NcxException($"Incorrect EPUB 2 NCX: navigation point \"{navigationPoint.Id}\" should contain at least one navigation label.");
string title = firstNavigationLabel.Text;
- string source = Uri.UnescapeDataString(navigationPoint.Content.Source);
+ string source = navigationPoint.Content.Source;
if (!ContentPathUtils.IsLocalPath(source))
{
throw new Epub2NcxException($"Incorrect EPUB 2 NCX: content source \"{source}\" cannot be a remote resource.");
@@ -100,7 +100,7 @@ private static List GetNavigationItems(EpubSchema epubSch
List nestedItems = GetNavigationItems(epubSchema, epubContentRef, epub3NavLi.ChildOl, epub3NavigationBaseDirectoryPath);
if (navAnchor.Href != null)
{
- string href = Uri.UnescapeDataString(navAnchor.Href);
+ string href = navAnchor.Href;
if (!ContentPathUtils.IsLocalPath(href))
{
throw new Epub3NavException($"Incorrect EPUB 3 navigation document: anchor href \"{href}\" cannot be a remote resource.");