Skip to content

Commit

Permalink
Also bundle with EPUBs images that don’t have a characteristic file e…
Browse files Browse the repository at this point in the history
…xtension as `image` MIME type and `.image` file extension.
  • Loading branch information
danburzo committed Aug 11, 2024
1 parent cb0a57e commit d7b655b
Show file tree
Hide file tree
Showing 6 changed files with 36 additions and 47 deletions.
2 changes: 1 addition & 1 deletion index.js
Original file line number Diff line number Diff line change
Expand Up @@ -978,7 +978,7 @@ async function epubgen(data, output_path, options) {
remoteResources: remoteResources.map(entry => ({
id: entry.mapped.replace(/[^a-z0-9]/gi, ''),
href: entry.mapped,
mimetype: lookupMimetype(entry.mapped)
mimetype: entry.mimetype
}))
});

Expand Down
10 changes: 0 additions & 10 deletions src/constants/regex.js

This file was deleted.

5 changes: 2 additions & 3 deletions src/enhancements.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { parseSrcset, stringifySrcset } from 'srcset';
import replaceElementType from './replace-element-type.js';
import { REGEX_IMAGE_URL } from './constants/regex.js';
import { isImageURL } from './util/file-mimetype.js';

/*
Convert AMP markup to HMTL markup
Expand Down Expand Up @@ -52,7 +52,6 @@ function fixLazyLoadedImages(doc) {
<img src='original-size.png'/>
*/
function imagesAtFullSize(doc) {
let include_pattern = REGEX_IMAGE_URL;
let exclude_patterns = [
/*
Exclude Wikipedia links to image file pages
Expand Down Expand Up @@ -85,7 +84,7 @@ function imagesAtFullSize(doc) {

// Only replace if the `href` matches an image file
if (
include_pattern.test(href) &&
isImageURL(href, doc) &&
!exclude_patterns.some(pattern => pattern.test(href))
) {
img.setAttribute('src', anchor.href);
Expand Down
16 changes: 3 additions & 13 deletions src/inline-images.js
Original file line number Diff line number Diff line change
@@ -1,25 +1,15 @@
import { parseSrcset, stringifySrcset } from 'srcset';
import { lookupMimetype, imageMimetypes } from './util/file-mimetype.js';
import { getMimetypeFromURL, imageMimetypes } from './util/file-mimetype.js';
import fetchBase64 from './util/fetch-base64.js';

function get_mime(src, doc) {
let pathname = src;
try {
pathname = new URL(src, doc.baseURI).pathname;
} catch (err) {
// no-op, probably due to bad `doc.baseURI`
}
return lookupMimetype(pathname);
}

export default async function inlineImages(doc, fetchOptions = {}, out) {
if (out) {
out.write('Inlining images...\n');
}
let src_promises = Array.from(
doc.querySelectorAll('picture source[src], img[src]')
).map(async el => {
let mime = get_mime(el.src, doc);
let mime = getMimetypeFromURL(el.src, doc);
/*
For web pages using atypical URLs for images
let’s just use a generic MIME type and hope it works.
Expand Down Expand Up @@ -61,7 +51,7 @@ export default async function inlineImages(doc, fetchOptions = {}, out) {
stringifySrcset(
await Promise.all(
items.map(async item => {
let mime = get_mime(item.url, doc);
let mime = getMimetypeFromURL(item.url, doc);

/*
For web pages using atypical URLs for images
Expand Down
27 changes: 15 additions & 12 deletions src/remote-resources.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
import { randomUUID as uuid } from 'node:crypto';
import { parseSrcset, stringifySrcset } from 'srcset';
import { REGEX_IMAGE_URL } from './constants/regex.js';
import {
getMimetypeFromURL,
imageMimetypes,
extForMimetype
} from './util/file-mimetype.js';
import { getUrlOrigin } from './util/url-origin.js';

export default function remoteResources(doc) {
Expand All @@ -11,21 +15,20 @@ export default function remoteResources(doc) {
and return a uniquely generated file name instead.
*/
function collectAndReplace(src) {
let pathname = src;
try {
pathname = new URL(src, doc.baseURI).pathname;
} catch (err) {
// no-op, probably due to bad `doc.baseURI`.
}
let match = pathname.match(REGEX_IMAGE_URL);
if (!match) {
return src;
let ext;
let mime = getMimetypeFromURL(src);
if (mime && imageMimetypes.has(mime)) {
ext = extForMimetype(mime);
} else {
ext = '.image';
mime = 'image';
}
if (!srcs.has(src)) {
srcs.set(src, {
original: src,
mapped: `rr-${uuid()}.${match[1]}`,
origin: getUrlOrigin(doc.baseURI)
mapped: `rr-${uuid()}${ext}`,
origin: getUrlOrigin(doc.baseURI),
mimetype: mime
});
}
return `./${srcs.get(src).mapped}`;
Expand Down
23 changes: 15 additions & 8 deletions src/util/file-mimetype.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,24 @@ export function lookupMimetype(filepath) {
return mimetype.lookup(filepath);
}

export function extForMimetype(mimetype) {
return Object.entries(mimetype.catalog).find(it => it[1] === mimetype)?.[0];
export function extForMimetype(type) {
return Object.entries(mimetype.catalog).find(it => it[1] === type)?.[0];
}

/*
Note: it is unfortunate that we use two separate mechanisms
to discern when an URL points to an image, but here we are.
export function getMimetypeFromURL(src, doc) {
let pathname = src;
try {
pathname = new URL(src, doc.baseURI).pathname;
} catch (err) {
// no-op, probably due to bad `doc.baseURI`
}
return lookupMimetype(pathname);
}

export function isImageURL(src, doc) {
return imageMimetypes.has(getMimetypeFromURL(src, doc));

Check failure on line 28 in src/util/file-mimetype.js

View workflow job for this annotation

GitHub Actions / build (18.x)

'imageMimetypes' was used before it was defined

Check failure on line 28 in src/util/file-mimetype.js

View workflow job for this annotation

GitHub Actions / build (14.x)

'imageMimetypes' was used before it was defined

Check failure on line 28 in src/util/file-mimetype.js

View workflow job for this annotation

GitHub Actions / build (16.x)

'imageMimetypes' was used before it was defined

Check failure on line 28 in src/util/file-mimetype.js

View workflow job for this annotation

GitHub Actions / build (18.x)

'imageMimetypes' was used before it was defined
}

`imageMimetypes` here needs to be kept in sync with the
`REGEX_IMAGE_URL` constant!
*/
export const imageMimetypes = new Set([
'image/avif',
'image/bmp',
Expand Down

0 comments on commit d7b655b

Please sign in to comment.