Skip to content

Commit

Permalink
Video is a collection
Browse files Browse the repository at this point in the history
  • Loading branch information
Kikobeats committed Sep 1, 2018
1 parent b845f45 commit 97fcfae
Show file tree
Hide file tree
Showing 10 changed files with 94 additions and 69 deletions.
44 changes: 22 additions & 22 deletions packages/metascraper-helpers/index.js
Original file line number Diff line number Diff line change
@@ -1,8 +1,16 @@
'use strict'

const { toLower, replace, includes, isString, trim, flow, isEmpty } = require('lodash')
const {
toLower,
replace,
includes,
isString,
trim,
flow,
isEmpty
} = require('lodash')
const condenseWhitespace = require('condense-whitespace')
const videoExtensions = require('video-extensions')
const videoExtensions = require('video-extensions').concat(['gif'])
const audioExtensions = require('audio-extensions')
const isRelativeUrl = require('is-relative-url')
const fileExtension = require('file-extension')
Expand All @@ -21,34 +29,27 @@ const REGEX_LOCATION = /^[A-Z\s]+\s+[-—–]\s+/

const removeLocation = value => replace(value, REGEX_LOCATION, '')

const urlTest = (url, { relative = true }) => relative
? isRelativeUrl(url) || urlRegex().test(url)
: urlRegex().test(url)
const urlTest = (url, { relative = true }) =>
relative ? isRelativeUrl(url) || urlRegex().test(url) : urlRegex().test(url)

const isUrl = (url, opts = {}) => !isEmpty(url) && urlTest(url, opts)

const absoluteUrl = (baseUrl, relativePath = '') => (
const absoluteUrl = (baseUrl, relativePath = '') =>
resolveUrl(baseUrl, relativePath)
)

const sanetizeUrl = (url, opts) => (
const sanetizeUrl = (url, opts) =>
_normalizeUrl(url, {
normalizeHttp: false,
stripWWW: false,
sortQueryParameters: false,
removeTrailingSlash: false,
...opts
})
)

const normalizeUrl = (baseUrl, relativePath, opts) => (
const normalizeUrl = (baseUrl, relativePath, opts) =>
sanetizeUrl(absoluteUrl(baseUrl, relativePath), opts)
)

const removeByPrefix = flow([
value => value.replace(REGEX_BY, ''),
trim
])
const removeByPrefix = flow([value => value.replace(REGEX_BY, ''), trim])

const createTitle = flow([condenseWhitespace, smartquotes])

Expand All @@ -61,14 +62,13 @@ const titleize = (src, { capitalize = false, removeBy = false } = {}) => {

const defaultFn = el => el.text().trim()

const $filter = ($, collection, fn = defaultFn) => {
const el = collection.filter((i, el) => fn($(el))).first()
const $filter = ($, domNodes, fn = defaultFn) => {
const el = domNodes.filter((i, el) => fn($(el))).first()
return fn(el)
}

const isAuthor = (str, opts = { relative: false }) => (
const isAuthor = (str, opts = { relative: false }) =>
isString(str) && !isUrl(str, opts)
)

const getAuthor = (str, opts = { removeBy: true }) => titleize(str, opts)

Expand All @@ -77,9 +77,8 @@ const protocol = url => {
return protocol.replace(':', '')
}

const createUrlExtensionValidator = collection => url => (
const createUrlExtensionValidator = collection => url =>
isUrl(url) && includes(collection, extension(url))
)

const isVideoUrl = createUrlExtensionValidator(videoExtensions)

Expand All @@ -89,7 +88,8 @@ const extension = url => fileExtension(url).split('?')[0]

const description = value => isString(value) && getDescription(value)

const getDescription = value => titleize(removeLocation(value), { capitalize: false })
const getDescription = value =>
titleize(removeLocation(value), { capitalize: false })

const publisher = value => isString(value) && condenseWhitespace(value)

Expand Down
4 changes: 2 additions & 2 deletions packages/metascraper-logo/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ const getDomNodeSizes = (domNodes, attr) =>
const getSizes = ($, collection) =>
chain(collection)
.reduce((acc, { tag, attr }) => {
const domNode = $(tag).get()
const selectors = getDomNodeSizes(domNode, attr)
const domNodes = $(tag).get()
const selectors = getDomNodeSizes(domNodes, attr)
return concat(acc, selectors)
}, [])
.sortBy(({ size }) => -size)
Expand Down
6 changes: 3 additions & 3 deletions packages/metascraper-media-provider/test/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,13 @@ const { getVideo } = require('..')
describe('metascraper-media-provider', () => {
describe('.getVideo', () => {
it('twitter', () => {
snapshot(getVideo(require('./fixtures/twitter.json')))
snapshot(getVideo(require('./fixtures/video/twitter.json')))
})
it('vimeo', () => {
snapshot(getVideo(require('./fixtures/vimeo.json')))
snapshot(getVideo(require('./fixtures/video/vimeo.json')))
})
it('youtube', () => {
snapshot(getVideo(require('./fixtures/youtube.json')))
snapshot(getVideo(require('./fixtures/video/youtube.json')))
})
})
describe('provider', () => {
Expand Down
65 changes: 37 additions & 28 deletions packages/metascraper-video/__snapshots__/index.js.snap-shot
Original file line number Diff line number Diff line change
@@ -1,19 +1,8 @@
exports['video src 1'] = {
"image": "https://cdn.vox-cdn.com/thumbor/AtQQMyWrexi6-Xyk73jv6nqTO7s=/0x5:1247x658/fit-in/1200x630/cdn.vox-cdn.com/uploads/chorus_asset/file/10079811/Screen_Shot_2018_01_22_at_3.27.50_PM.png",
"video": null,
"author": "Rachel Becker",
"date": "2018-01-22T23:38:17.000Z",
"description": "The zombies are only released on the weekends, the developers promise",
"lang": null,
"logo": "https://cdn.vox-cdn.com/uploads/chorus_asset/file/7395351/android-chrome-192x192.0.png",
"publisher": "The Verge",
"title": "You can visit the Pentagon’s secret nuclear bunker inside Minecraft",
"url": "https://www.theverge.com/2018/1/22/16921092/pentagon-secret-nuclear-bunker-reconstruction-minecraft-cns-miis-model"
}

exports['source src 1'] = {
exports['<source src /> 1'] = {
"image": "https://img-9gag-fun.9cache.com/photo/aGjVLDK_460s.jpg",
"video": "https://img-9gag-fun.9cache.com/photo/aGjVLDK_460sv.mp4",
"video": [
"https://img-9gag-fun.9cache.com/photo/aGjVLDK_460sv.mp4"
],
"author": null,
"date": null,
"description": "Watch the video and the fun convo of the 9GAG community",
Expand All @@ -37,22 +26,26 @@ exports['og:video 1'] = {
"url": "https://twitter.com/_developit/status/955905369242513414"
}

exports['src:poster 1'] = {
"image": "https://thumbs.gfycat.com/TimelyHealthyArmadillo-mobile.jpg",
"video": "https://thumbs.gfycat.com/TimelyHealthyArmadillo-mobile.mp4",
"author": "Gfycat",
"date": null,
"description": "Watch Backflip GIF on Gfycat. Discover more PUBG GIFs on Gfycat",
"lang": "en",
"logo": "https://gfycat.com/static/apple-touch-icon/apple-touch-icon-180x180.png",
"publisher": "Gfycat",
"title": "Backflip - Create, Discover and Share Awesome GIFs on Gfycat",
"url": "https://thumbs.gfycat.com/TimelyHealthyArmadillo-size_restricted.gif"
exports['single src 1'] = {
"image": "https://cdn.vox-cdn.com/thumbor/AtQQMyWrexi6-Xyk73jv6nqTO7s=/0x5:1247x658/fit-in/1200x630/cdn.vox-cdn.com/uploads/chorus_asset/file/10079811/Screen_Shot_2018_01_22_at_3.27.50_PM.png",
"video": [
"https://cdn.vox-cdn.com/thumbor/4l0C-7uGFtTfc6lWibo1ITiE2YU=/0x0:1280x720/320x213/filters:focal(538x258:742x462):gifv():no_upscale()/cdn.vox-cdn.com/uploads/chorus_image/image/58416873/2018_01_22_14_19_55.0.gif"
],
"author": "Rachel Becker",
"date": "2018-01-22T23:38:17.000Z",
"description": "The zombies are only released on the weekends, the developers promise",
"lang": null,
"logo": "https://cdn.vox-cdn.com/uploads/chorus_asset/file/7395351/android-chrome-192x192.0.png",
"publisher": "The Verge",
"title": "You can visit the Pentagon’s secret nuclear bunker inside Minecraft",
"url": "https://www.theverge.com/2018/1/22/16921092/pentagon-secret-nuclear-bunker-reconstruction-minecraft-cns-miis-model"
}

exports['clips.twitch.tv 1'] = {
"image": "https://clips-media-assets.twitch.tv/27434665136-offset-2366-preview.jpg",
"video": "https://clips-media-assets.twitch.tv/AT-27434665136-offset-2366-1280x720.mp4",
"video": [
"https://clips-media-assets.twitch.tv/AT-27434665136-offset-2366-1280x720.mp4"
],
"author": null,
"date": null,
"description": "Shroud with the casual coffee sip to kill combo - Clipped by jpan11",
Expand All @@ -65,7 +58,9 @@ exports['clips.twitch.tv 1'] = {

exports['play.tv 1'] = {
"image": "https://d1playscdntv-a.akamaihd.net/video/Ha35bprkDYG/processed/720.jpg",
"video": "https://d1playscdntv-a.akamaihd.net/video/Ha35bprkDYG/processed/480.mp4",
"video": [
"https://d1playscdntv-a.akamaihd.net/video/Ha35bprkDYG/processed/480.mp4"
],
"author": "chineseouchie",
"description": "Publicado por chineseouchie",
"lang": "en",
Expand All @@ -75,3 +70,17 @@ exports['play.tv 1'] = {
"url": "https://plays.tv/video/5a6f64b1bef69a7fa9/holy-shit"
}

exports['src:poster 1'] = {
"image": "https://thumbs.gfycat.com/TimelyHealthyArmadillo-mobile.jpg",
"video": [
"https://thumbs.gfycat.com/TimelyHealthyArmadillo-mobile.mp4"
],
"author": "Gfycat",
"date": null,
"description": "Watch Backflip GIF on Gfycat. Discover more PUBG GIFs on Gfycat",
"lang": "en",
"logo": "https://gfycat.com/static/apple-touch-icon/apple-touch-icon-180x180.png",
"publisher": "Gfycat",
"title": "Backflip - Create, Discover and Share Awesome GIFs on Gfycat",
"url": "https://thumbs.gfycat.com/TimelyHealthyArmadillo-size_restricted.gif"
}
4 changes: 2 additions & 2 deletions packages/metascraper-video/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ const wrap = createWrapper((value, url) => urlFn(value, { url }))

const wrapVideo = createWrapper((value, url) => {
const urlValue = urlFn(value, { url })
return isVideoUrl(urlValue) && urlValue
return isVideoUrl(urlValue) && [urlValue]
})

/**
Expand All @@ -33,6 +33,6 @@ module.exports = () => ({
wrapVideo($ => $('meta[property="og:video"]').attr('content')),
wrapVideo($ => $('meta[property="twitter:player:stream"]').attr('content')),
wrapVideo($ => $('video').attr('src')),
wrapVideo($ => $('source').attr('src'))
wrapVideo($ => $('video > source').attr('src'))
]
})
37 changes: 27 additions & 10 deletions packages/metascraper-video/test/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,34 @@ const readFile = promisify(fs.readFile)

describe('metascraper-video', () => {
describe('video', () => {
it('video src', async () => {
const html = await readFile(resolve(__dirname, 'fixtures/video-src.html'))
const url =
'https://www.theverge.com/2018/1/22/16921092/pentagon-secret-nuclear-bunker-reconstruction-minecraft-cns-miis-model'

const metadata = await metascraper({ html, url })
snapshot(metadata)
describe('<video />', () => {
it('single src', async () => {
const html = await readFile(
resolve(__dirname, 'fixtures/video-src.html')
)
const url =
'https://www.theverge.com/2018/1/22/16921092/pentagon-secret-nuclear-bunker-reconstruction-minecraft-cns-miis-model'
const metadata = await metascraper({ html, url })
snapshot(metadata)
})
xit('multiple src', async () => {
const html = `
<video controls>
<source src="video-small.mp4" type="video/mp4" media="all and (max-width: 480px)">
<source src="video-small.webm" type="video/webm" media="all and (max-width: 480px)">
<source src="video.mp4" type="video/mp4">
<source src="video.webm" type="video/webm">
</video>
`
const url =
'https://www.theverge.com/2018/1/22/16921092/pentagon-secret-nuclear-bunker-reconstruction-minecraft-cns-miis-model'
const metadata = await metascraper({ html, url })
console.log(metadata)
// snapshot(metadata)
})
})

it('source src', async () => {
it('<source src />', async () => {
const html = await readFile(
resolve(__dirname, 'fixtures/source-src.html')
)
Expand All @@ -45,12 +63,11 @@ describe('metascraper-video', () => {
it('og:video', async () => {
const html = await readFile(resolve(__dirname, 'fixtures/tweet.html'))
const url = 'https://twitter.com/_developit/status/955905369242513414'

const metadata = await metascraper({ html, url })
snapshot(metadata)
})

describe('specific providers', () => {
describe('by providers', () => {
it('clips.twitch.tv', async () => {
const html = await readFile(
resolve(__dirname, 'fixtures/providers/clip.twitch.tv.html')
Expand Down
3 changes: 1 addition & 2 deletions packages/metascraper/__snapshots__/index.js.snap-shot
Original file line number Diff line number Diff line change
Expand Up @@ -973,11 +973,10 @@ exports['wikipedia 1'] = {
"date": "2016-10-13T12:00:00.000Z",
"description": "Bob Dylan (/ˈdɪlən/; born Robert Allen Zimmerman, May 24, 1941) is an American singer-songwriter, author, and painter, who has been an influential figure in popular music and culture for more than five decades. Much of his most celebrated work dates from the 1960s, when he became a reluctant “voice of a generation”[2] with songs such as “Blowin’ in the Wind” and “The Times They Are a-Changin’” that became anthems for the Civil Rights Movement and anti-war movement. In 1965, he controversially abandoned his early fan-base in the American folk music revival, recording a six-minute single, “Like a Rolling Stone”, which enlarged the scope of popular music.",
"image": "https://upload.wikimedia.org/wikipedia/commons/thumb/0/02/Bob_Dylan_-_Azkena_Rock_Festival_2010_2.jpg/1200px-Bob_Dylan_-_Azkena_Rock_Festival_2010_2.jpg",
"video": "https://upload.wikimedia.org/wikipedia/en/e/e7/Bob_Dylan_-_Blowin'_in_the_Wind.ogg",
"video": null,
"lang": "en",
"logo": "https://en.wikipedia.org/static/apple-touch/wikipedia.png",
"publisher": "Wikipedia",
"title": "Bob Dylan - Wikipedia",
"url": "https://en.wikipedia.org/wiki/Bob_Dylan"
}

0 comments on commit 97fcfae

Please sign in to comment.