From 1bda60a8920e1d9ec6d287843b3ea243feae962e Mon Sep 17 00:00:00 2001 From: Kiko Beats Date: Tue, 15 Aug 2023 21:32:58 +0200 Subject: [PATCH] fix(audio,video): load twitter:player as iframe (#655) --- packages/metascraper-audio/index.js | 120 ++- packages/metascraper-audio/package.json | 3 +- .../fixtures/providers/transistor.fm.html | 851 ++++++++++++++++++ packages/metascraper-audio/test/index.js | 91 +- packages/metascraper-audio/test/providers.js | 42 + packages/metascraper-helpers/index.js | 5 + .../metascraper-iframe/src/from-twitter.js | 45 +- packages/metascraper-spotify/index.js | 6 +- packages/metascraper-video/index.js | 83 +- packages/metascraper-video/package.json | 5 +- .../test/fixtures/video-type-relative.html | 15 - .../test/fixtures/video-type.html | 16 - packages/metascraper-video/test/index.js | 102 +-- packages/metascraper-video/test/providers.js | 6 +- .../test/snapshots/index.js.md | 21 +- .../test/snapshots/index.js.snap | Bin 519 -> 549 bytes .../test/integration/acast/index.js | 2 +- .../test/integration/bloomberg/index.js | 2 +- .../test/integration/engadget/index.js | 2 +- .../wikipedia/snapshots/index.js.md | 2 +- .../wikipedia/snapshots/index.js.snap | Bin 507 -> 542 bytes 21 files changed, 1153 insertions(+), 266 deletions(-) create mode 100644 packages/metascraper-audio/test/fixtures/providers/transistor.fm.html delete mode 100644 packages/metascraper-video/test/fixtures/video-type-relative.html delete mode 100644 packages/metascraper-video/test/fixtures/video-type.html diff --git a/packages/metascraper-audio/index.js b/packages/metascraper-audio/index.js index 43a2de0fd..21f8cb436 100644 --- a/packages/metascraper-audio/index.js +++ b/packages/metascraper-audio/index.js @@ -6,20 +6,41 @@ const { audio, findRule, has, - isMime, + $twitter, loadIframe, normalizeUrl, toRule } = require('@metascraper/helpers') -const memoize = require('@keyvhq/memoize') +const { find, chain, isEqual } = require('lodash') const pReflect = require('p-reflect') -const got = require('got') const toAudio = toRule(audio) -const withContentType = (url, contentType) => - isMime(contentType, 'audio') ? url : false +const toAudioFromDom = toRule((domNodes, opts) => { + const values = chain(domNodes) + .map(domNode => ({ + src: domNode?.attribs.src, + type: chain(domNode) + .get('attribs.type') + .split(';') + .get(0) + .split('/') + .get(1) + // https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types/Common_types + .replace('mpeg', 'mp3') + .value() + })) + .uniqWith(isEqual) + .value() + + let result + find( + values, + ({ src, type }) => (result = audio(src, Object.assign({ type }, opts))) + ) + return result +}) const audioRules = [ ({ url, htmlDom: $ }) => { @@ -35,45 +56,27 @@ const audioRules = [ }) : undefined }, - toAudio($ => { - const contentType = - $('meta[name="twitter:player:stream:content_type"]').attr('content') || - $('meta[property="twitter:player:stream:content_type"]').attr('content') - - const streamUrl = - $('meta[name="twitter:player:stream"]').attr('content') || - $('meta[property="twitter:player:stream"]').attr('content') - - return contentType ? withContentType(streamUrl, contentType) : streamUrl - }), + ({ url, htmlDom: $ }) => { + const src = $twitter($, 'twitter:player:stream') + return src + ? audio(src, { + url, + type: $twitter($, 'twitter:player:stream:content_type') + }) + : undefined + }, toAudio($jsonld('contentUrl')), - toAudio($ => $('audio').attr('src')), - toAudio($ => $('audio > source').attr('src')), + toAudioFromDom($ => $('audio').get()), + toAudioFromDom($ => $('audio > source').get()), ({ htmlDom: $ }) => $filter($, $('a[href]'), el => audio(el.attr('href'))) ] const _getIframe = (url, $, { src }) => loadIframe(url, $.load(``)) -const createGetPlayer = ({ gotOpts, keyvOpts }) => { - const getPlayer = async playerUrl => { - const { value: response } = await pReflect(got(playerUrl, gotOpts)) - if (!response) return - const contentType = response.headers['content-type'] - if (!contentType || !contentType.startsWith('text')) return - return response.body - } - return memoize(getPlayer, keyvOpts, { - value: value => (value === undefined ? null : value) - }) -} - -module.exports = ({ getIframe = _getIframe, gotOpts, keyvOpts } = {}) => { - const getPlayer = createGetPlayer({ gotOpts, keyvOpts }) - +module.exports = ({ getIframe = _getIframe } = {}) => { return { - audio: [ - ...audioRules, + audio: audioRules.concat( async ({ htmlDom: $, url }) => { const iframe = $('iframe') if (iframe.length === 0) return @@ -92,33 +95,28 @@ module.exports = ({ getIframe = _getIframe, gotOpts, keyvOpts } = {}) => { } }) - if (srcs.length === 0) return - - const { value } = await pReflect( - Promise.any( - srcs.map(async src => { - const htmlDom = await getIframe(url, $, { src }) - const result = await findRule(audioRules, { htmlDom, url }) - if (!has(result)) throw TypeError('no result') - return result - }) - ) - ) - - return value + return srcs.length > 0 + ? pReflect( + Promise.any( + srcs.map(async src => { + const htmlDom = await getIframe(url, $, { src }) + const result = await findRule(audioRules, { htmlDom, url }) + if (!has(result)) throw TypeError('no result') + return result + }) + ) + ).then(({ value }) => value) + : undefined }, async ({ htmlDom: $, url }) => { - const playerUrl = - $('meta[name="twitter:player"]').attr('content') || - $('meta[property="twitter:player"]').attr('content') - if (!playerUrl) return - - const html = await getPlayer(normalizeUrl(url, playerUrl)) - if (!html) return - - const htmlDom = $.load(html) - return findRule(audioRules, { htmlDom, url }) + const src = $twitter($, 'twitter:player') + return src + ? findRule(audioRules, { + htmlDom: await getIframe(url, $, { src }), + url + }) + : undefined } - ] + ) } } diff --git a/packages/metascraper-audio/package.json b/packages/metascraper-audio/package.json index 09e0fd56e..5321895aa 100644 --- a/packages/metascraper-audio/package.json +++ b/packages/metascraper-audio/package.json @@ -22,9 +22,8 @@ "metascraper" ], "dependencies": { - "@keyvhq/memoize": "~2.0.3", "@metascraper/helpers": "^5.35.1", - "got": "~11.8.6", + "lodash": "~4.17.21", "p-reflect": "~2.1.0" }, "devDependencies": { diff --git a/packages/metascraper-audio/test/fixtures/providers/transistor.fm.html b/packages/metascraper-audio/test/fixtures/providers/transistor.fm.html new file mode 100644 index 000000000..a5c979fdb --- /dev/null +++ b/packages/metascraper-audio/test/fixtures/providers/transistor.fm.html @@ -0,0 +1,851 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Build Your SaaS | Paul Jarvis: gaining freedom by building an indie business + + + + + + + + + + + + +
+
+
+
+
+ + + +
+

Paul Jarvis: gaining freedom by building an indie business

+ + +
+
+ +
+
+ + + + + + +
+ +
Justin catches up with his old internet friend Paul Jarvis. Today, Paul co-founded Fathom Analytics with Jack Ellis: a simple alternative to Google Analytics. Paul is also the author of the book "Company of One," which has influenced a whole generation of indie entrepreneurs (and has been reviewed by Cal Newport, Chris Guillebeau, Ben Chestnut, Tiago Forte, and more). Previously, Justin and Paul did a weekly mastermind, where they supported and encouraged each other around our indie businesses. They decided to do a catch-up call and recorded it so you could listen in. ๐Ÿ‘

+

Highlights:
+
    +
  • (00:10) - Intro
  • +
  • (02:26) - Being off the internet
  • +
  • (04:04) - What's a typical day for Paul?
  • +
  • (06:27) - Looking back at our Mastermind call
  • +
  • (08:14) - There's no beginning and no end
  • +
  • (10:42) - Things that are out of your control affect your business
  • +
  • (13:14) - Does Justin's surfing metaphor make sense to a surfer?
  • +
  • (16:17) - How would you start an indie business in 2023?
  • +
  • (22:11) - You've got to get in motion
  • +
  • (25:14) - Using products in your category for a long time
  • +
  • (27:59) - Is there still any room in Saas?
  • +
  • (32:02) - The act of making the bet
  • +
  • (38:51) - Is freelancing still viable in 2023?
  • +
  • (43:01) - Company design is lifestyle design
  • +
  • (45:06) - Worrying about being stagnant
  • +
  • (47:26) - How do you handle customer feature requests?
  • +
  • (52:14) - It's ok to be late to a shift in the market
  • +
  • (58:30) - Caring is an indie advantage
  • +
  • (01:05:11) - Collaboration is what gets us anywhere
  • +


+
๐ŸŽ™๏ธ Podcast hosting is provided by Transistor.fm.

Links: +
+ + Thanks to our monthly supporters +
    +
  • Pascal from sharpen.page
  • +
  • Rewardful.com
  • +
  • Greg Park
  • +
  • Mitchell Davis from RecruitKit.com.au
  • +
  • Marcel Fahle, wearebold.af
  • +
  • Ethan Gunderson
  • +
  • Anton Zorin from ProdCamp.com
  • +
  • Bill Condo (@mavrck)
  • +
  • Ward from MemberSpace.com
  • +
  • Russell Brown from Photivo.com
  • +
  • Evandro Sasse
  • +
  • Austin Loveless
  • +
  • Michael Sitver
  • +
  • Fathom Analytics
  • +
  • Dan Buda
  • +
  • Colin Gray
  • +
  • Dave Giunta
  • +
+ + โ˜… Support this podcast on Patreon โ˜… + + + + + + + + + +
+ + +
+

Creators and Guests

+ +
+ + + + + + + +
+ + + Paul Jarvis + +
Guest
+
+ +
+ Paul Jarvis + +
Co-founder of Fathom Analytics, author of Company of One
+ + + +
+
+ + +
+
+ +
+ +
+ Paul Jarvis: gaining freedom by building an indie business +
+ + +
+
+ +
+ +
+ + + + +
+ + + + + + + + \ No newline at end of file diff --git a/packages/metascraper-audio/test/index.js b/packages/metascraper-audio/test/index.js index 94bcce292..8a1e85585 100644 --- a/packages/metascraper-audio/test/index.js +++ b/packages/metascraper-audio/test/index.js @@ -5,31 +5,6 @@ const test = require('ava') const createMetascraper = (...args) => require('metascraper')([require('..')(...args)]) -test('provide `keyvOpts`', async t => { - const cache = new Map() - const url = 'https://twitter-card-player.vercel.app' - const metascraper = createMetascraper({ - gotOpts: { retry: 0 }, - keyvOpts: { store: cache } - }) - - const metadataOne = await metascraper({ - url, - html: '' - }) - - t.truthy(metadataOne.audio) - t.is(cache.size, 1) - - const metadataTwo = await metascraper({ - url, - html: '' - }) - - t.falsy(metadataTwo.audio) - t.is(cache.size, 2) -}) - test('og:audio', async t => { const html = '' @@ -111,7 +86,65 @@ test('jsonld:contentUrl', async t => { t.snapshot(metadata) }) -test.todo('multiple `audio > source:src`') -test.todo('multiple `audio > source:src` with invalid video values') -test.todo('`audio > source:src` with content type') -test.todo('`audio > source:src` with content type and relative src') +test('multiple `audio > source:src`', async t => { + const html = ` + + ` + const url = + 'https://www.theverge.com/2018/1/22/16921092/pentagon-secret-nuclear-bunker-reconstruction-minecraft-cns-miis-model' + const metascraper = createMetascraper() + const metadata = await metascraper({ html, url }) + t.is( + metadata.audio, + 'https://www.theverge.com/2018/1/22/16921092/audio-small.wav' + ) +}) + +test('`audio > source:src` with content type', async t => { + const html = ` + + ` + const url = + 'https://www.theverge.com/2018/1/22/16921092/pentagon-secret-nuclear-bunker-reconstruction-minecraft-cns-miis-model' + const metascraper = createMetascraper() + const metadata = await metascraper({ html, url }) + t.is(metadata.audio, 'https://www.theverge.com/audio-small') +}) + +test('multiple `audio > source:src` with invalid audio values', async t => { + const html = ` + + ` + const url = + 'https://www.theverge.com/2018/1/22/16921092/pentagon-secret-nuclear-bunker-reconstruction-minecraft-cns-miis-model' + const metascraper = createMetascraper() + const metadata = await metascraper({ html, url }) + t.is( + metadata.audio, + 'https://www.theverge.com/2018/1/22/16921092/audio-small.mp3' + ) +}) + +test('`audio > source:src` with content type and relative src', async t => { + const html = ` + + ` + const url = + 'https://www.theverge.com/2018/1/22/16921092/pentagon-secret-nuclear-bunker-reconstruction-minecraft-cns-miis-model' + const metascraper = createMetascraper() + const metadata = await metascraper({ html, url }) + t.is( + metadata.audio, + 'https://www.theverge.com/2018/1/22/16921092/audio-small' + ) +}) diff --git a/packages/metascraper-audio/test/providers.js b/packages/metascraper-audio/test/providers.js index 9a2d76170..cc9fea21d 100644 --- a/packages/metascraper-audio/test/providers.js +++ b/packages/metascraper-audio/test/providers.js @@ -96,3 +96,45 @@ test('deezer.com', async t => { 'https://cdns-preview-f.dzcdn.net/stream/c-fd483edcc271cabd1a307132ebda8cef-5.mp3' ) }) + +test('transistor.fm (twitter:player:stream)', async t => { + const html = await readFile( + resolve(__dirname, 'fixtures/providers/transistor.fm.html') + ) + const url = + 'https://saas.transistor.fm/episodes/paul-jarvis-gaining-freedom-by-building-an-indie-business' + + const metascraper = createMetascraper() + const metadata = await metascraper({ html, url }) + + t.is( + metadata.audio, + 'https://chrt.fm/track/637E/2.gum.fm/op3.dev/e/dts.podtrac.com/redirect.mp3/media.transistor.fm/e83b42d0/9e93424b.mp3?src=site' + ) +}) + +test('transistor.fm (twitter:player)', async t => { + const html = ( + await readFile(resolve(__dirname, 'fixtures/providers/transistor.fm.html')) + ) + .toString() + .replace( + '', + '' + ) + .replace( + '', + '' + ) + + const url = + 'https://saas.transistor.fm/episodes/paul-jarvis-gaining-freedom-by-building-an-indie-business' + + const metascraper = createMetascraper() + const metadata = await metascraper({ html, url }) + + t.is( + metadata.audio, + 'https://chrt.fm/track/637E/2.gum.fm/op3.dev/e/dts.podtrac.com/redirect.mp3/media.transistor.fm/e83b42d0/9e93424b.mp3?src=player' + ) +}) diff --git a/packages/metascraper-helpers/index.js b/packages/metascraper-helpers/index.js index dff60f6a8..653131d1f 100644 --- a/packages/metascraper-helpers/index.js +++ b/packages/metascraper-helpers/index.js @@ -163,6 +163,10 @@ const titleize = (src, opts = {}) => { return title } +const $twitter = ($, selector) => + $(`meta[name="${selector}"]`).attr('content') || + $(`meta[property="${selector}"]`).attr('content') + const $filter = ($, matchedEl, fn = $filter.fn) => { let matched @@ -466,6 +470,7 @@ const loadIframe = (url, $, { timeout = 5000 } = {}) => module.exports = { $filter, $jsonld, + $twitter, absoluteUrl, audio, audioExtensions, diff --git a/packages/metascraper-iframe/src/from-twitter.js b/packages/metascraper-iframe/src/from-twitter.js index e3fdf88db..711f07dc3 100644 --- a/packages/metascraper-iframe/src/from-twitter.js +++ b/packages/metascraper-iframe/src/from-twitter.js @@ -1,37 +1,32 @@ 'use strict' -const { normalizeUrl, memoizeOne } = require('@metascraper/helpers') +const { $twitter, memoizeOne } = require('@metascraper/helpers') const { map } = require('lodash') -const getPlayerUrl = memoizeOne((url, $) => { - const playerUrl = - $('meta[name="twitter:player"]').attr('content') || - $('meta[property="twitter:player"]').attr('content') +const getPlayerUrl = memoizeOne( + (_, $) => $twitter($, 'twitter:player'), + memoizeOne.EqualityUrlAndHtmlDom +) - return playerUrl === undefined ? undefined : normalizeUrl(url, playerUrl) -}, memoizeOne.EqualityUrlAndHtmlDom) +const playerWidth = $ => $twitter($, 'twitter:player:width') -const playerWidth = $ => - $('meta[name="twitter:player:width"]').attr('content') || - $('meta[property="twitter:player:width"]').attr('content') +const playerHeight = $ => $twitter($, 'twitter:player:height') -const playerHeight = $ => - $('meta[name="twitter:player:height"]').attr('content') || - $('meta[property="twitter:player:height"]').attr('content') +const fromTwitter = + () => + async ({ htmlDom, url, iframe }) => { + const playerUrl = getPlayerUrl(url, htmlDom) + if (!playerUrl) return -const fromTwitter = () => async ({ htmlDom, url, iframe }) => { - const playerUrl = getPlayerUrl(url, htmlDom) - if (!playerUrl) return + const props = map( + { width: playerWidth(htmlDom), height: playerHeight(htmlDom), ...iframe }, + (value, key) => (value === undefined ? value : `${key}="${value}"`) + ) + .filter(Boolean) + .join(' ') - const props = map( - { width: playerWidth(htmlDom), height: playerHeight(htmlDom), ...iframe }, - (value, key) => (value === undefined ? value : `${key}="${value}"`) - ) - .filter(Boolean) - .join(' ') - - return `` -} + return `` + } fromTwitter.test = (url, $) => getPlayerUrl(url, $) !== undefined diff --git a/packages/metascraper-spotify/index.js b/packages/metascraper-spotify/index.js index 78b7680bf..b2d5cdc8f 100644 --- a/packages/metascraper-spotify/index.js +++ b/packages/metascraper-spotify/index.js @@ -10,7 +10,6 @@ const { composeRule, description, memoizeOne, - normalizeUrl, parseUrl, sanetizeUrl, toRule @@ -46,7 +45,7 @@ const test = memoizeOne(url => parseUrl(url).domainWithoutSuffix === 'spotify') module.exports = ({ gotOpts, keyvOpts } = {}) => { const spotify = createSpotify({ gotOpts, keyvOpts }) - const getSpotify = composeRule(($, url) => spotify(normalizeUrl(url))) + const getSpotify = composeRule((_, url) => spotify(url)) const rules = { audio: getSpotify({ from: 'audio', ext: 'mp3' }), @@ -56,14 +55,13 @@ module.exports = ({ gotOpts, keyvOpts } = {}) => { ], date: getSpotify({ from: 'date' }), description: [ - toDescription(($, url) => { + toDescription($ => { const description = $('meta[property="og:description"]').attr('content') if (!description) return return description.includes('on Spotify. ') ? description.split('on Spotify. ')[1] : description }), - getSpotify({ from: 'description' }) ], image: getSpotify({ from: 'image' }), diff --git a/packages/metascraper-video/index.js b/packages/metascraper-video/index.js index 4384c7d4e..7d910babc 100644 --- a/packages/metascraper-video/index.js +++ b/packages/metascraper-video/index.js @@ -2,18 +2,15 @@ const { $jsonld, - extension, + $twitter, + loadIframe, findRule, - normalizeUrl, toRule, url: urlFn, video } = require('@metascraper/helpers') -const { chain, isEqual } = require('lodash') -const memoize = require('@keyvhq/memoize') -const pReflect = require('p-reflect') -const got = require('got') +const { chain, find, isEqual } = require('lodash') const toUrl = toRule(urlFn) @@ -23,17 +20,20 @@ const toVideoFromDom = toRule((domNodes, opts) => { const values = chain(domNodes) .map(domNode => ({ src: domNode?.attribs.src, - type: domNode?.attribs.type + type: chain(domNode) + .get('attribs.type') + .split(';') + .get(0) + .split('/') + .get(1) + .value() })) .uniqWith(isEqual) - .orderBy( - ({ src, type }) => extension(src) === 'mp4' || type?.includes('mp4'), - ['desc'] - ) .value() let result - values.find( + find( + values, ({ src, type }) => (result = video(src, Object.assign({ type }, opts))) ) return result @@ -53,44 +53,37 @@ const videoRules = [ }) : undefined }, - toVideo($ => $('meta[name="twitter:player:stream"]').attr('content')), - toVideo($ => $('meta[property="twitter:player:stream"]').attr('content')), + ({ url, htmlDom: $ }) => { + const src = $twitter($, 'twitter:player:stream') + return src + ? video(src, { + url, + type: $twitter($, 'twitter:player:stream:content_type') + }) + : undefined + }, toVideo($jsonld('contentUrl')), toVideoFromDom($ => $('video').get()), toVideoFromDom($ => $('video > source').get()) ] -const createGetPlayer = ({ gotOpts, keyvOpts }) => { - const getPlayer = async playerUrl => { - const { value: response } = await pReflect(got(playerUrl, gotOpts)) - if (!response) return - const contentType = response.headers['content-type'] - if (!contentType || !contentType.startsWith('text')) return - return response.body - } - return memoize(getPlayer, keyvOpts, { - value: value => (value === undefined ? null : value) - }) -} +const imageRules = [toUrl($ => $('video').attr('poster'))] -module.exports = ({ gotOpts, keyvOpts } = {}) => { - const getPlayer = createGetPlayer({ gotOpts, keyvOpts }) +const _getIframe = (url, $, { src }) => + loadIframe(url, $.load(``)) - return { - image: [toUrl($ => $('video').attr('poster'))], - video: [ - ...videoRules, - async ({ htmlDom: $, url }) => { - const playerUrl = - $('meta[name="twitter:player"]').attr('content') || - $('meta[property="twitter:player"]').attr('content') +const withIframe = (rules, getIframe) => + rules.concat(async ({ htmlDom: $, url }) => { + const src = $twitter($, 'twitter:player') + return src + ? findRule(rules, { + htmlDom: await getIframe(url, $, { src }), + url + }) + : undefined + }) - if (!playerUrl) return - const html = await getPlayer(normalizeUrl(url, playerUrl)) - if (!html) return - const htmlDom = $.load(html) - return findRule(videoRules, { htmlDom, url }) - } - ] - } -} +module.exports = ({ getIframe = _getIframe } = {}) => ({ + image: withIframe(imageRules, getIframe), + video: withIframe(videoRules, getIframe) +}) diff --git a/packages/metascraper-video/package.json b/packages/metascraper-video/package.json index d3e337d5c..fbbb0385d 100644 --- a/packages/metascraper-video/package.json +++ b/packages/metascraper-video/package.json @@ -22,11 +22,8 @@ "video" ], "dependencies": { - "@keyvhq/memoize": "~2.0.3", "@metascraper/helpers": "^5.35.1", - "got": "~11.8.6", - "lodash": "~4.17.21", - "p-reflect": "~2.1.0" + "lodash": "~4.17.21" }, "devDependencies": { "ava": "latest" diff --git a/packages/metascraper-video/test/fixtures/video-type-relative.html b/packages/metascraper-video/test/fixtures/video-type-relative.html deleted file mode 100644 index c392c4917..000000000 --- a/packages/metascraper-video/test/fixtures/video-type-relative.html +++ /dev/null @@ -1,15 +0,0 @@ - - - - - a0442e2b-a384-4f2b-b443-9f34c2215e16 - - - - - - - - \ No newline at end of file diff --git a/packages/metascraper-video/test/fixtures/video-type.html b/packages/metascraper-video/test/fixtures/video-type.html deleted file mode 100644 index cb574d1f7..000000000 --- a/packages/metascraper-video/test/fixtures/video-type.html +++ /dev/null @@ -1,16 +0,0 @@ - - - - - a0442e2b-a384-4f2b-b443-9f34c2215e16 - - - - - - - - \ No newline at end of file diff --git a/packages/metascraper-video/test/index.js b/packages/metascraper-video/test/index.js index 9be6cfc4e..f6078ed88 100644 --- a/packages/metascraper-video/test/index.js +++ b/packages/metascraper-video/test/index.js @@ -7,31 +7,6 @@ const test = require('ava') const createMetascraper = (...args) => require('metascraper')([require('..')(...args)]) -test('provide `keyvOpts`', async t => { - const cache = new Map() - const url = 'https://twitter-card-player.vercel.app' - const metascraper = createMetascraper({ - gotOpts: { retry: 0 }, - keyvOpts: { store: cache } - }) - - const metadataOne = await metascraper({ - url, - html: '' - }) - - t.truthy(metadataOne.video) - t.is(cache.size, 1) - - const metadataTwo = await metascraper({ - url, - html: '' - }) - - t.falsy(metadataTwo.audio) - t.is(cache.size, 2) -}) - test('og:video', async t => { const html = '' @@ -59,32 +34,22 @@ test('og:video:secure_url', async t => { t.is(metadata.video, 'https://cdn.microlink.io/file-examples/sample.mp4') }) -test('video > source:src', async t => { - const html = await readFile(resolve(__dirname, 'fixtures/source-src.html')) - const url = 'https://9gag.com/gag/aGjVLDK' - const metascraper = createMetascraper() - const metadata = await metascraper({ html, url }) - t.snapshot(metadata) -}) - -test('jsonld:contentUrl', async t => { - const html = `` - const url = 'https://browserless.js.org' +test('twitter:player', async t => { + const html = + '' + const url = 'https://twitter-card-player.vercel.app' const metascraper = createMetascraper() const metadata = await metascraper({ html, url }) t.snapshot(metadata) }) -test.todo('twitter:player:stream') - -test('twitter:player', async t => { +test('twitter:player:stream', async t => { const html = - '' + '' const url = 'https://twitter-card-player.vercel.app' const metascraper = createMetascraper() const metadata = await metascraper({ html, url }) + t.snapshot(metadata) }) @@ -107,20 +72,42 @@ test('video:src', async t => { t.is(metadata.video, 'https://cdn.microlink.io/file-examples/sample.mp4') }) +test('video > source:src', async t => { + const html = await readFile(resolve(__dirname, 'fixtures/source-src.html')) + const url = 'https://9gag.com/gag/aGjVLDK' + const metascraper = createMetascraper() + const metadata = await metascraper({ html, url }) + t.snapshot(metadata) +}) + +test('jsonld:contentUrl', async t => { + const html = `` + const url = 'https://browserless.js.org' + const metascraper = createMetascraper() + const metadata = await metascraper({ html, url }) + t.snapshot(metadata) +}) + test('multiple `video > source:src`', async t => { const html = ` - - ` + + ` const url = 'https://www.theverge.com/2018/1/22/16921092/pentagon-secret-nuclear-bunker-reconstruction-minecraft-cns-miis-model' const metascraper = createMetascraper() const metadata = await metascraper({ html, url }) - t.snapshot(metadata) + + t.is( + metadata.video, + 'https://www.theverge.com/2018/1/22/16921092/video-small.mp4' + ) }) test('multiple `video > source:src` with invalid video values', async t => { @@ -134,7 +121,12 @@ test('multiple `video > source:src` with invalid video values', async t => { }) test('`video > source:src` with content type', async t => { - const html = await readFile(resolve(__dirname, 'fixtures/video-type.html')) + const html = ` + + ` + const url = 'https://app.croct.dev/' const metascraper = createMetascraper() const metadata = await metascraper({ html, url }) @@ -146,9 +138,11 @@ test('`video > source:src` with content type', async t => { }) test('`video > source:src` with content type and relative src', async t => { - const html = await readFile( - resolve(__dirname, 'fixtures/video-type-relative.html') - ) + const html = ` + + ` const url = 'https://example.com' const metascraper = createMetascraper() const metadata = await metascraper({ html, url }) diff --git a/packages/metascraper-video/test/providers.js b/packages/metascraper-video/test/providers.js index 5fc911827..4d3485231 100644 --- a/packages/metascraper-video/test/providers.js +++ b/packages/metascraper-video/test/providers.js @@ -45,5 +45,9 @@ test('9gag.com', async t => { const url = 'https://9gag.com/gag/abY5Mm9' const metascraper = createMetascraper() const metadata = await metascraper({ html, url }) - t.true(metadata.video.endsWith('.mp4')) + + t.is( + metadata.video, + 'https://img-9gag-fun.9cache.com/photo/abY5Mm9_460svvp9.webm' + ) }) diff --git a/packages/metascraper-video/test/snapshots/index.js.md b/packages/metascraper-video/test/snapshots/index.js.md index 747072504..6d4a4f3d7 100644 --- a/packages/metascraper-video/test/snapshots/index.js.md +++ b/packages/metascraper-video/test/snapshots/index.js.md @@ -22,29 +22,38 @@ Generated by [AVA](https://avajs.dev). video: 'https://example.com/video.mp4', } -## twitter:player +## multiple `video > source:src` > Snapshot 1 { image: null, - video: 'https://cdn.microlink.io/file-examples/sample.mp4', + video: 'https://www.theverge.com/2018/1/22/16921092/video-small.mp4', } -## multiple `video > source:src` +## multiple `video > source:src` with invalid video values > Snapshot 1 { image: null, - video: 'https://www.theverge.com/2018/1/22/16921092/video-small.mp4', + video: 'https://player.vimeo.com/external/339440208.hd.mp4?s=58a130628077c53fb48a2cd8ee2c9af24c4e39ec&profile_id=175', } -## multiple `video > source:src` with invalid video values +## twitter:player + +> Snapshot 1 + + { + image: 'https://cdn.microlink.io/file-examples/sample.png', + video: 'https://cdn.microlink.io/file-examples/sample.mp4', + } + +## twitter:player:stream > Snapshot 1 { image: null, - video: 'https://player.vimeo.com/external/339440208.hd.mp4?s=58a130628077c53fb48a2cd8ee2c9af24c4e39ec&profile_id=175', + video: 'https://cdn.microlink.io/file-examples/sample.mp4', } diff --git a/packages/metascraper-video/test/snapshots/index.js.snap b/packages/metascraper-video/test/snapshots/index.js.snap index 3d145e30e17bba23614797d88453c95623c7ab40..745243baa0d83e4dbbf216119905fbbdbecadd65 100644 GIT binary patch literal 549 zcmV+=0^0pSRzVpX<*9$-nf3e=uf|R{IHKnkG zqgQu_iXV#z00000000BMlTUBcFciS!RU5lZbb{-ooOWyeIBAm<4K7F=K;i<3+vYki zi95#*v6Ge~7q}qt83t#*4o4)u3F*?nAXBAkQX^Th<)`QO`}z4QnQ=)k)yJ=(ndVT| zECy*lHkGVO3R6{5rUWTeCbjx-6~{!Ax4#C)J!AEuLo~q6KWxV5W;{R#$LQeYYz|Z- z^k+I84h?iqNNa8>yUdev>lTNNFyZ&P*n1!dYy#0nNMu)#v}YbAI0O7o5C@(yCeB zT;YlzmKsVyxb1n7?>pEDtt?()KT&7nkhq>R!J!ibbnGQFKO~sOApoWkNw80S@FJk6 zMJW@;;he>1ZZKX86S`5-B%iit_BH&tO~a?VzHPTdrLnMbMoY<=xU`tGS0#?tjjU|d nHgq92KkF^i{q2|PU;L@kC6K&t;oHueVSA+C5^B%0y9EFMb1n&z literal 519 zcmV+i0{HzwRzVr&OBJdT=x{) z8#rS;oga$`00000000BMlEH4&Fc600u3CDj*b1(da_Vj4#K~@wMTH9z2avb`;?{bd zN#d{Wsg)GjoGEEQn%9jO4&(FRhVkmZ zhA}$26Y8Xgf(@-|(Scl7+>$QMni~VnycYC5G(+8Acdk3=b+b%*6=#hSTwZ!y;hYPY z+|X#OJF4F-o0DoK47Z=|#n7J@L$+@_XM5DMt=DVMl+Z#`bc-Q=kl+9lf`eH~0zW0) zt|q#oLfoc|@1%6?K1-L&mI|J^U4e~Ppoje5cg`<*{?2Y*%PVWgEkA6m3ndje4AUs` ziJy37w#9y;&!!0tLVrdQKaSZn%okBY3Cj`y!cv-(h(!>lz)tH%Z9Q_%v$G(c{se_@ J{%^Ge0062~0~Y`Q diff --git a/packages/metascraper/test/integration/acast/index.js b/packages/metascraper/test/integration/acast/index.js index bab197716..ff2ebabdd 100644 --- a/packages/metascraper/test/integration/acast/index.js +++ b/packages/metascraper/test/integration/acast/index.js @@ -23,7 +23,7 @@ const metascraper = require('../../..')([ const url = 'https://play.acast.com/s/saywhytodrugs/caffeine' -test('acast', async t => { +;(process.env.CI ? test.skip : test)('acast', async t => { const html = await readFile(resolve(__dirname, 'input.html')) const metadata = await metascraper({ html, url }) t.snapshot(metadata) diff --git a/packages/metascraper/test/integration/bloomberg/index.js b/packages/metascraper/test/integration/bloomberg/index.js index 9ff68db03..00be2cfd7 100644 --- a/packages/metascraper/test/integration/bloomberg/index.js +++ b/packages/metascraper/test/integration/bloomberg/index.js @@ -23,7 +23,7 @@ const metascraper = require('../../..')([ const url = 'http://www.bloomberg.com/news/articles/2016-05-24/as-zenefits-stumbles-gusto-goes-head-on-by-selling-insurance' -test('bloomberg', async t => { +;(process.env.CI ? test.skip : test)('bloomberg', async t => { const html = await readFile(resolve(__dirname, 'input.html')) const metadata = await metascraper({ html, url }) t.snapshot(metadata) diff --git a/packages/metascraper/test/integration/engadget/index.js b/packages/metascraper/test/integration/engadget/index.js index 914bb3081..169b43eaa 100644 --- a/packages/metascraper/test/integration/engadget/index.js +++ b/packages/metascraper/test/integration/engadget/index.js @@ -23,7 +23,7 @@ const metascraper = require('../../..')([ const url = 'https://www.engadget.com/2019/01/07/all-github-users-keep-code-private' -test('engadget', async t => { +;(process.env.CI ? test.skip : test)('engadget', async t => { const html = await readFile(resolve(__dirname, 'input.html')) const metadata = await metascraper({ html, url }) t.snapshot(metadata) diff --git a/packages/metascraper/test/integration/wikipedia/snapshots/index.js.md b/packages/metascraper/test/integration/wikipedia/snapshots/index.js.md index 421e5304f..8f15d45c5 100644 --- a/packages/metascraper/test/integration/wikipedia/snapshots/index.js.md +++ b/packages/metascraper/test/integration/wikipedia/snapshots/index.js.md @@ -9,7 +9,7 @@ Generated by [AVA](https://avajs.dev). > Snapshot 1 { - audio: null, + audio: 'https://upload.wikimedia.org/wikipedia/en/d/d2/Bob_Dylan_-_Like_a_Rolling_Stone.ogg', author: null, date: '2016-10-13T12:00:00.000Z', description: 'This article is about the musician. For his debut album, see Bob Dylan (album).', diff --git a/packages/metascraper/test/integration/wikipedia/snapshots/index.js.snap b/packages/metascraper/test/integration/wikipedia/snapshots/index.js.snap index 61df42aa4c7b63f4d5733edb9f0559d6e50390b0..043f6a31b5b16f595c871ce62cff0a7f280859a0 100644 GIT binary patch literal 542 zcmV+(0^$8ZRzVuP`mPFIEOx`q1;756(GHlk>Nk)Ol^szJe1@f_Nut zVW1FPSjCO4w6kaDMZq0^6?_WNCh8x28DD+jzi&Lgd;IjcgAw}d@pOSuL&F3s`-~i!`9lXK2LguX|%G` zRb7)&M%kYVKKfXL>J`lE^Z=|Ar(C991ePYzi79EEFlOF||4}lR*B6x|S%CU0tjp;r z?2S8tilz;QeXQeNR?3Z$5IWuE)q2IqsCm;Xrpu(T6||G*<(NK~b+{WNySe@iA-Sj| g=vq12cP|{dXKShFT~9>;`uBnS1!>EuD5(Pg08VQRX#fBK literal 507 zcmVVYPsZ z2~{b3TOW%E00000000A(Qp<|fFc?lwo#SO3Jc9}@v+@GANh=5rxKL!a12QOzP|`NG znUf}=NygFLyn}+<@nr;`#|Q9lA?HAK7KVJ}|NaaAEf*gvW$Lv%{~^5eO2|)gEv;xI zmrir*I^*5>MbmK4-zE1GbQO&UU&A-w!21Dk2KWZ}1$d4T`VF|fKu7|<0)7H+B%s5~ zeY`xXPDJf7-iWK!3gJD3cM-k?K8rgzL3n2$@9@w_GqGfEW?S;$J2U*-cm-+|k=!d- z{xr>5nm;+tivz~sW{k1-v!LBZxO$M*OQWOi=(v-P@WIPk2{MY637+^)kUltBORh6= zXa+L6HKGC=S5@dA6DI_DX)5ySv*MaOoV1U!d4sa&Z7g*09d+JYcR*>d%J3!|_t1}f z$js2v%~oh_`rc?qeHZ$QGRg`Hsmdu;nU*iUtcB*~YuIUdD4dt4T$L~cRuAW>`CZs)4|k_^Bl%XxwT59-h{f_wwPHxPkP;kN=es= xVWW8)-REPSzHwxCw`)krWg$rzRFofFTJp%Yqn>9ym5qqM^$)fk764QO006BM@ag~n