From 134ec60adb29d415f4156059d8f576ce4294a9ab Mon Sep 17 00:00:00 2001 From: Kiko Beats Date: Mon, 18 Sep 2023 22:29:14 +0200 Subject: [PATCH] fix(audio): don't consider iframe referencing itself (#663) --- packages/metascraper-audio/index.js | 41 +++++++--------------- packages/metascraper-helpers/test/index.js | 6 ++++ 2 files changed, 18 insertions(+), 29 deletions(-) diff --git a/packages/metascraper-audio/index.js b/packages/metascraper-audio/index.js index 21f8cb436..77cace027 100644 --- a/packages/metascraper-audio/index.js +++ b/packages/metascraper-audio/index.js @@ -78,35 +78,18 @@ module.exports = ({ getIframe = _getIframe } = {}) => { return { audio: audioRules.concat( async ({ htmlDom: $, url }) => { - const iframe = $('iframe') - if (iframe.length === 0) return - - const srcs = [] - - iframe.each(function () { - const src = $(this).attr('src') - const normalizedUrl = normalizeUrl(url, src) - if ( - typeof normalizedUrl === 'string' && - normalizedUrl.startsWith('http') && - srcs.indexOf(normalizedUrl) === -1 - ) { - srcs.push(normalizedUrl) - } - }) - - return srcs.length > 0 - ? pReflect( - Promise.any( - srcs.map(async src => { - const htmlDom = await getIframe(url, $, { src }) - const result = await findRule(audioRules, { htmlDom, url }) - if (!has(result)) throw TypeError('no result') - return result - }) - ) - ).then(({ value }) => value) - : undefined + const srcs = [...new $('iframe').map((_, element) => $(element).attr('src')).get().map(src => normalizeUrl(url, src))] + if (srcs.length === 0) return + return pReflect( + Promise.any( + srcs.map(async src => { + const htmlDom = await getIframe(url, $, { src }) + const result = await findRule(audioRules, { htmlDom, url }) + if (!has(result)) throw TypeError('no result') + return result + }) + ) + ).then(({ value }) => value) }, async ({ htmlDom: $, url }) => { const src = $twitter($, 'twitter:player') diff --git a/packages/metascraper-helpers/test/index.js b/packages/metascraper-helpers/test/index.js index a7cfd0118..de52ab36c 100644 --- a/packages/metascraper-helpers/test/index.js +++ b/packages/metascraper-helpers/test/index.js @@ -58,6 +58,12 @@ test('.normalizeUrl', t => { ) t.is(normalizeUrl('https://example.com/'), 'https://example.com/') t.is(normalizeUrl('https://example.com'), 'https://example.com/') + t.is(normalizeUrl('https://www.example.com', 'https://www.example.com/foo'), 'https://www.example.com/foo') + t.is(normalizeUrl('https://www.example.com', '/foo'), 'https://www.example.com/foo') + t.is(normalizeUrl('https://www.example.com', 'file.html'), 'https://www.example.com/file.html') + t.is(normalizeUrl('https://www.example.com', 'data:text/html;base64,PGh0bWw+SGVsbG8sIHdvcmxkITwvaHRtbD4='), 'data:text/html;base64,PGh0bWw+SGVsbG8sIHdvcmxkITwvaHRtbD4=') + t.is(normalizeUrl('https://www.example.com', 'javascript:alert(\'Hello, world!\');'), undefined) + t.is(normalizeUrl('https://www.example.com', 'javascript:void(0)'), undefined) }) test('.author', t => {