From a6a3bd6c713a11786f3623c96e3c4f9267d06be2 Mon Sep 17 00:00:00 2001 From: vers-one <12114169+vers-one@users.noreply.github.com> Date: Wed, 25 Dec 2024 16:22:44 -0500 Subject: [PATCH] Add `SpineReaderOptions.IgnoreMissingManifestItems` (#122) --- .../Unit/Readers/SpineReaderTests.cs | 155 +++++++++++------- Source/VersOne.Epub/Entities/EpubBookRef.cs | 14 +- .../VersOne.Epub/Options/EpubReaderOptions.cs | 18 +- .../Options/SpineReaderOptions.cs | 29 ++++ Source/VersOne.Epub/Readers/BookRefReader.cs | 2 +- Source/VersOne.Epub/Readers/SpineReader.cs | 13 +- 6 files changed, 162 insertions(+), 69 deletions(-) create mode 100644 Source/VersOne.Epub/Options/SpineReaderOptions.cs diff --git a/Source/VersOne.Epub.Test/Unit/Readers/SpineReaderTests.cs b/Source/VersOne.Epub.Test/Unit/Readers/SpineReaderTests.cs index 2244ec0..862e3d6 100644 --- a/Source/VersOne.Epub.Test/Unit/Readers/SpineReaderTests.cs +++ b/Source/VersOne.Epub.Test/Unit/Readers/SpineReaderTests.cs @@ -1,4 +1,5 @@ using VersOne.Epub.Internal; +using VersOne.Epub.Options; using VersOne.Epub.Schema; using VersOne.Epub.Test.Unit.Mocks; @@ -11,8 +12,8 @@ public void GetReadingOrderForMinimalSpineTest() { EpubSchema epubSchema = CreateEpubSchema(); EpubContentRef epubContentRef = new(); - List expectedReadingOrder = new(); - List actualReadingOrder = SpineReader.GetReadingOrder(epubSchema, epubContentRef); + List expectedReadingOrder = []; + List actualReadingOrder = SpineReader.GetReadingOrder(epubSchema, epubContentRef, new SpineReaderOptions()); Assert.Equal(expectedReadingOrder, actualReadingOrder); } @@ -23,87 +24,125 @@ public void GetReadingOrderForTypicalSpineTest() ( manifest: new EpubManifest ( - items: new List() - { - new EpubManifestItem + items: + [ + new ( id: "item-1", href: "chapter1.html", mediaType: "application/xhtml+xml" ), - new EpubManifestItem + new ( id: "item-2", href: "chapter2.html", mediaType: "application/xhtml+xml" ) - } + ] ), spine: new EpubSpine ( - items: new List() - { - new EpubSpineItemRef + items: + [ + new ( idRef: "item-1" ), - new EpubSpineItemRef + new ( idRef: "item-2" ) - } + ] ) ); EpubLocalTextContentFileRef expectedHtmlFileRef1 = CreateTestHtmlFileRef("chapter1.html"); EpubLocalTextContentFileRef expectedHtmlFileRef2 = CreateTestHtmlFileRef("chapter2.html"); - List expectedHtmlLocal = new() - { + List expectedHtmlLocal = + [ expectedHtmlFileRef1, expectedHtmlFileRef2 - }; + ]; EpubContentRef epubContentRef = new ( html: new EpubContentCollectionRef(expectedHtmlLocal.AsReadOnly()) ); - List expectedReadingOrder = new() - { + List expectedReadingOrder = + [ expectedHtmlFileRef1, expectedHtmlFileRef2 - }; - List actualReadingOrder = SpineReader.GetReadingOrder(epubSchema, epubContentRef); + ]; + List actualReadingOrder = SpineReader.GetReadingOrder(epubSchema, epubContentRef, new SpineReaderOptions()); Assert.Equal(expectedReadingOrder, actualReadingOrder); } - [Fact(DisplayName = "GetReadingOrder should throw EpubPackageException if there is no manifest item with ID matching to the ID ref of a spine item")] - public void GetReadingOrderWithMissingManifestItemTest() + [Fact(DisplayName = "GetReadingOrder should throw EpubPackageException if there is no manifest item with ID matching to the ID ref of a spine item and SpineReaderOptions.IgnoreMissingManifestItems is false")] + public void GetReadingOrderWithMissingManifestItemWithoutIgnoringErrorsTest() + { + EpubSchema epubSchema = CreateEpubSchema + ( + manifest: new EpubManifest + ( + items: + [ + new + ( + id: "item-2", + href: "chapter2.html", + mediaType: "application/xhtml+xml" + ) + ] + ), + spine: new EpubSpine + ( + items: + [ + new + ( + idRef: "item-1" + ) + ] + ) + ); + EpubContentRef epubContentRef = new(); + Assert.Throws(() => SpineReader.GetReadingOrder(epubSchema, epubContentRef, new SpineReaderOptions())); + } + + [Fact(DisplayName = "GetReadingOrder should skip non-existent manifest items if SpineReaderOptions.IgnoreMissingManifestItems is true")] + public void GetReadingOrderWithMissingManifestItemWithIgnoringErrorsTest() { EpubSchema epubSchema = CreateEpubSchema ( manifest: new EpubManifest ( - items: new List() - { - new EpubManifestItem + items: + [ + new ( id: "item-2", href: "chapter2.html", mediaType: "application/xhtml+xml" ) - } + ] ), spine: new EpubSpine ( - items: new List() - { - new EpubSpineItemRef + items: + [ + new ( idRef: "item-1" ) - } + ] ) ); EpubContentRef epubContentRef = new(); - Assert.Throws(() => SpineReader.GetReadingOrder(epubSchema, epubContentRef)); + SpineReaderOptions spineReaderOptions = new() + { + IgnoreMissingManifestItems = true + }; + List expectedReadingOrder = []; + List actualReadingOrder = SpineReader.GetReadingOrder(epubSchema, epubContentRef, spineReaderOptions); + Assert.Equal(expectedReadingOrder, actualReadingOrder); } [Fact(DisplayName = "GetReadingOrder should throw EpubPackageException if there is no HTML content file referenced by a manifest item")] @@ -111,31 +150,31 @@ public void GetReadingOrderWithMissingHtmlContentFileTest() { EpubSchema epubSchema = CreateEpubSchema ( - manifest: new EpubManifest + manifest: new ( - items: new List() - { - new EpubManifestItem + items: + [ + new ( id: "item-1", href: "chapter1.html", mediaType: "application/xhtml+xml" ) - } + ] ), - spine: new EpubSpine + spine: new ( - items: new List() - { - new EpubSpineItemRef + items: + [ + new ( idRef: "item-1" ) - } + ] ) ); EpubContentRef epubContentRef = new(); - Assert.Throws(() => SpineReader.GetReadingOrder(epubSchema, epubContentRef)); + Assert.Throws(() => SpineReader.GetReadingOrder(epubSchema, epubContentRef, new SpineReaderOptions())); } [Fact(DisplayName = "GetReadingOrder should throw EpubPackageException if the HTML content file referenced by a spine item is a remote resource")] @@ -144,34 +183,34 @@ public void GetReadingOrderWithRemoteHtmlContentFileTest() string remoteFileHref = "https://example.com/books/123/chapter1.html"; EpubSchema epubSchema = CreateEpubSchema ( - manifest: new EpubManifest + manifest: new ( - items: new List() - { - new EpubManifestItem + items: + [ + new ( id: "item-1", href: remoteFileHref, mediaType: "application/xhtml+xml" ) - } + ] ), - spine: new EpubSpine + spine: new ( - items: new List() - { - new EpubSpineItemRef + items: + [ + new ( idRef: "item-1" ) - } + ] ) ); - List htmlRemote = new() - { - new EpubRemoteTextContentFileRef + List htmlRemote = + [ + new ( - metadata: new EpubContentFileRefMetadata + metadata: new ( key: remoteFileHref, contentType: EpubContentType.XHTML_1_1, @@ -179,19 +218,19 @@ public void GetReadingOrderWithRemoteHtmlContentFileTest() ), epubContentLoader: new TestEpubContentLoader() ) - }; + ]; EpubContentRef epubContentRef = new ( html: new EpubContentCollectionRef(null, htmlRemote.AsReadOnly()) ); - Assert.Throws(() => SpineReader.GetReadingOrder(epubSchema, epubContentRef)); + Assert.Throws(() => SpineReader.GetReadingOrder(epubSchema, epubContentRef, new SpineReaderOptions())); } private static EpubSchema CreateEpubSchema(EpubManifest? manifest = null, EpubSpine? spine = null) { return new ( - package: new EpubPackage + package: new ( uniqueIdentifier: null, epubVersion: EpubVersion.EPUB_3, diff --git a/Source/VersOne.Epub/Entities/EpubBookRef.cs b/Source/VersOne.Epub/Entities/EpubBookRef.cs index c16b4ab..33583cc 100644 --- a/Source/VersOne.Epub/Entities/EpubBookRef.cs +++ b/Source/VersOne.Epub/Entities/EpubBookRef.cs @@ -3,6 +3,7 @@ using System.Threading.Tasks; using VersOne.Epub.Environment; using VersOne.Epub.Internal; +using VersOne.Epub.Options; namespace VersOne.Epub { @@ -24,12 +25,15 @@ public class EpubBookRef : IDisposable /// The book's description or null if the description is not present in the book. /// The parsed EPUB schema of the book. /// The collection of references to the book's content files within the EPUB archive. + /// Various options to configure the behavior of the EPUB reader. /// The parameter is null. /// The parameter is null. /// The parameter is null. /// The parameter is null. /// The parameter is null. - public EpubBookRef(IZipFile epubFile, string? filePath, string title, string author, List? authorList, string? description, EpubSchema schema, EpubContentRef content) + public EpubBookRef( + IZipFile epubFile, string? filePath, string title, string author, List? authorList, string? description, EpubSchema schema, + EpubContentRef content, EpubReaderOptions? epubReaderOptions = null) { EpubFile = epubFile ?? throw new ArgumentNullException(nameof(epubFile)); FilePath = filePath; @@ -39,6 +43,7 @@ public EpubBookRef(IZipFile epubFile, string? filePath, string title, string aut Description = description; Schema = schema ?? throw new ArgumentNullException(nameof(schema)); Content = content ?? throw new ArgumentNullException(nameof(content)); + EpubReaderOptions = epubReaderOptions ?? new EpubReaderOptions(); isDisposed = false; } @@ -90,6 +95,11 @@ public EpubBookRef(IZipFile epubFile, string? filePath, string title, string aut /// public IZipFile EpubFile { get; } + /// + /// Gets the options that configure the behavior of the EPUB reader. + /// + public EpubReaderOptions EpubReaderOptions { get; } + /// /// Loads the book's cover image from the EPUB file. /// @@ -132,7 +142,7 @@ public List GetReadingOrder() /// public async Task> GetReadingOrderAsync() { - return await Task.Run(() => SpineReader.GetReadingOrder(Schema, Content)).ConfigureAwait(false); + return await Task.Run(() => SpineReader.GetReadingOrder(Schema, Content, EpubReaderOptions.SpineReaderOptions)).ConfigureAwait(false); } /// diff --git a/Source/VersOne.Epub/Options/EpubReaderOptions.cs b/Source/VersOne.Epub/Options/EpubReaderOptions.cs index 1db3f85..bbd6b10 100644 --- a/Source/VersOne.Epub/Options/EpubReaderOptions.cs +++ b/Source/VersOne.Epub/Options/EpubReaderOptions.cs @@ -11,19 +11,15 @@ public class EpubReaderOptions /// An optional preset to initialize the class with a predefined set of options. public EpubReaderOptions(EpubReaderOptionsPreset? preset = null) { - BookCoverReaderOptions = new BookCoverReaderOptions(preset); PackageReaderOptions = new PackageReaderOptions(preset); ContentReaderOptions = new ContentReaderOptions(preset); ContentDownloaderOptions = new ContentDownloaderOptions(preset); + BookCoverReaderOptions = new BookCoverReaderOptions(preset); + SpineReaderOptions = new SpineReaderOptions(preset); Epub2NcxReaderOptions = new Epub2NcxReaderOptions(preset); XmlReaderOptions = new XmlReaderOptions(preset); } - /// - /// Gets or sets EPUB content reader options which is used for loading the EPUB book cover image. - /// - public BookCoverReaderOptions BookCoverReaderOptions { get; set; } - /// /// Gets or sets EPUB OPF package reader options. /// @@ -39,6 +35,16 @@ public EpubReaderOptions(EpubReaderOptionsPreset? preset = null) /// public ContentDownloaderOptions ContentDownloaderOptions { get; set; } + /// + /// Gets or sets EPUB content reader options which is used for loading the EPUB book cover image. + /// + public BookCoverReaderOptions BookCoverReaderOptions { get; set; } + + /// + /// Gets or sets EPUB spine reader options which is used for parsing the default reading order of the EPUB book. + /// + public SpineReaderOptions SpineReaderOptions { get; set; } + /// /// Gets or sets EPUB 2 NCX navigation document reader options. /// diff --git a/Source/VersOne.Epub/Options/SpineReaderOptions.cs b/Source/VersOne.Epub/Options/SpineReaderOptions.cs new file mode 100644 index 0000000..62bba55 --- /dev/null +++ b/Source/VersOne.Epub/Options/SpineReaderOptions.cs @@ -0,0 +1,29 @@ +using System.Diagnostics.CodeAnalysis; + +namespace VersOne.Epub.Options +{ + /// + /// Various options to configure the behavior of the EPUB spine reader which is used for parsing the <spine> section + /// of the EPUB OPF package file. This section represents the default reading order of the EPUB book. + /// + public class SpineReaderOptions + { + /// + /// Initializes a new instance of the class. + /// + /// An optional preset to initialize the class with a predefined set of options. + [SuppressMessage("Style", "IDE0060:Remove unused parameter", Justification = "Temporarily ignore unused 'preset' parameter.")] + public SpineReaderOptions(EpubReaderOptionsPreset? preset = null) + { + } + + /// + /// Gets or sets a value indicating whether EPUB spine reader should ignore the error when the manifest item referenced by + /// a EPUB spine item is missing. + /// If it's set to false and the manifest item with the given ID is not present, then + /// the "Incorrect EPUB spine: item with IdRef = "..." is missing in the manifest" exception will be thrown. + /// Default value is false. + /// + public bool IgnoreMissingManifestItems { get; set; } + } +} diff --git a/Source/VersOne.Epub/Readers/BookRefReader.cs b/Source/VersOne.Epub/Readers/BookRefReader.cs index 8858895..c569901 100644 --- a/Source/VersOne.Epub/Readers/BookRefReader.cs +++ b/Source/VersOne.Epub/Readers/BookRefReader.cs @@ -54,7 +54,7 @@ private async Task OpenBookAsync(IZipFile epubFile, string? filePat string? description = schema.Package.Metadata.Descriptions.FirstOrDefault()?.Description; ContentReader contentReader = new(environmentDependencies, epubReaderOptions); EpubContentRef content = await Task.Run(() => contentReader.ParseContentMap(schema, epubFile)).ConfigureAwait(false); - return new(epubFile, filePath, title, author, authorList, description, schema, content); + return new(epubFile, filePath, title, author, authorList, description, schema, content, epubReaderOptions); } private IZipFile GetZipFile(string filePath) diff --git a/Source/VersOne.Epub/Readers/SpineReader.cs b/Source/VersOne.Epub/Readers/SpineReader.cs index bd5b671..912c174 100644 --- a/Source/VersOne.Epub/Readers/SpineReader.cs +++ b/Source/VersOne.Epub/Readers/SpineReader.cs @@ -1,17 +1,26 @@ using System.Collections.Generic; +using VersOne.Epub.Options; using VersOne.Epub.Schema; namespace VersOne.Epub.Internal { internal static class SpineReader { - public static List GetReadingOrder(EpubSchema epubSchema, EpubContentRef epubContentRef) + public static List GetReadingOrder( + EpubSchema epubSchema, EpubContentRef epubContentRef, SpineReaderOptions spineReaderOptions) { List result = new(); foreach (EpubSpineItemRef spineItemRef in epubSchema.Package.Spine.Items) { - EpubManifestItem manifestItem = epubSchema.Package.Manifest.Items.Find(item => item.Id == spineItemRef.IdRef) ?? + EpubManifestItem? manifestItem = epubSchema.Package.Manifest.Items.Find(item => item.Id == spineItemRef.IdRef); + if (manifestItem == null) + { + if (spineReaderOptions.IgnoreMissingManifestItems) + { + continue; + } throw new EpubPackageException($"Incorrect EPUB spine: item with IdRef = \"{spineItemRef.IdRef}\" is missing in the manifest."); + } if (epubContentRef.Html.ContainsRemoteFileRefWithUrl(manifestItem.Href)) { throw new EpubPackageException($"Incorrect EPUB manifest: EPUB spine item \"{manifestItem.Href}\" cannot be a remote resource.");