Skip to content

Commit

Permalink
Improve video detection
Browse files Browse the repository at this point in the history
  • Loading branch information
Kikobeats committed Aug 13, 2018
1 parent 63b10e1 commit 3a326ab
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 44 deletions.
20 changes: 14 additions & 6 deletions packages/metascraper-video-provider/index.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
'use strict'

const { round, size, get, chain, find, isString } = require('lodash')
const { overEvery, isEmpty, eq, has, round, size, get, chain, find, isString } = require('lodash')
const { isUrl, titleize } = require('@metascraper/helpers')
const youtubedl = require('youtube-dl')
const { promisify } = require('util')
Expand Down Expand Up @@ -28,19 +28,22 @@ const getVideoInfo = async url => {
return cachedVideoInfo
}

const isMp4 = format => format.ext === 'mp4' || path.extname(format.url).startsWith('.mp4')
const isHttp = format => format.protocol === 'https' || format.protocol === 'http'
const isMp4 = format => eq(get(format, 'ext', 'mp4')) || path.extname(get(format, 'url')).startsWith('.mp4')
const isHttp = format => eq(get(format, 'protocol'), 'http')
const isHttps = format => eq(get(format, 'protocol'), 'http')
const hasAudio = format => has(format, 'abr')

/**
* Get a Video source quality enough good
* compatible to be consumed for the browser.
*/
const getVideoUrl = formats => {
const getVideoUrl = (formats, filters = []) => {
const urls = chain(formats)
.filter(format => isHttp(format) && isMp4(format))
.filter(overEvery(filters))
.map('url')
.value()

if (isEmpty(urls)) return false
const index = round(size(urls) / 2) - 1
return get(urls, index)
}
Expand All @@ -50,7 +53,12 @@ const getVideoUrl = formats => {
*/
const getVideoProvider = async ({ url }) => {
const { formats } = await getVideoInfo(url)
const videoUrl = getVideoUrl(formats)
const videoUrl = getVideoUrl(formats, [isMp4, isHttps, hasAudio]) ||
getVideoUrl(formats, [isMp4, isHttp, hasAudio]) ||
getVideoUrl(formats, [isMp4, isHttps]) ||
getVideoUrl(formats, [isMp4]) ||
getVideoUrl(formats)

return isUrl(videoUrl) && videoUrl
}

Expand Down
73 changes: 35 additions & 38 deletions packages/metascraper-video-provider/test/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -34,49 +34,46 @@ const metascraper = require('metascraper').load([
const readFile = promisify(fs.readFile)

describe('metascraper-video-provider', () => {
describe('supported', () => {
it('vimeo', async () => {
const html = await readFile(resolve(__dirname, 'fixtures/vimeo.html'))
const url = 'https://vimeo.com/188175573'

const metadata = await metascraper({ html, url })
should(isUrl(metadata.video)).be.true()
should(isString(metadata.title)).be.true()
const meta = omit(metadata, ['video', 'title'])
snapshot(meta)
})
it('vimeo', async () => {
const html = await readFile(resolve(__dirname, 'fixtures/vimeo.html'))
const url = 'https://vimeo.com/188175573'
const metadata = await metascraper({ html, url })
should(isUrl(metadata.video)).be.true()
should(isString(metadata.title)).be.true()
const meta = omit(metadata, ['video', 'title'])
snapshot(meta)
})

xit('twitter', async () => {
const html = await readFile(resolve(__dirname, 'fixtures/twitter.html'))
const url = 'https://twitter.com/verge/status/957383241714970624'
xit('twitter', async () => {
const html = await readFile(resolve(__dirname, 'fixtures/twitter.html'))
const url = 'https://twitter.com/verge/status/957383241714970624'

const metadata = await metascraper({ html, url })
should(isUrl(metadata.video)).be.true()
should(isString(metadata.title)).be.true()
const meta = omit(metadata, ['video', 'title'])
snapshot(meta)
})
const metadata = await metascraper({ html, url })
should(isUrl(metadata.video)).be.true()
should(isString(metadata.title)).be.true()
const meta = omit(metadata, ['video', 'title'])
snapshot(meta)
})

it('facebook', async () => {
const html = await readFile(resolve(__dirname, 'fixtures/facebook.html'))
const url = 'https://www.facebook.com/afcajax/videos/1686831701364171'
it('facebook', async () => {
const html = await readFile(resolve(__dirname, 'fixtures/facebook.html'))
const url = 'https://www.facebook.com/afcajax/videos/1686831701364171'

const metadata = await metascraper({ html, url })
should(isUrl(metadata.video)).be.true()
should(isString(metadata.title)).be.true()
const meta = omit(metadata, ['video', 'title'])
snapshot(meta)
})
const metadata = await metascraper({ html, url })
should(isUrl(metadata.video)).be.true()
should(isString(metadata.title)).be.true()
const meta = omit(metadata, ['video', 'title'])
snapshot(meta)
})

it('youtube', async () => {
const html = await readFile(resolve(__dirname, 'fixtures/youtube.html'))
const url = 'https://www.youtube.com/watch?v=hwMkbaS_M_c'
it('youtube', async () => {
const html = await readFile(resolve(__dirname, 'fixtures/youtube.html'))
const url = 'https://www.youtube.com/watch?v=hwMkbaS_M_c'

const metadata = await metascraper({ html, url })
should(isUrl(metadata.video)).be.true()
should(isString(metadata.title)).be.true()
const meta = omit(metadata, ['video', 'title'])
snapshot(meta)
})
const metadata = await metascraper({ html, url })
should(isUrl(metadata.video)).be.true()
should(isString(metadata.title)).be.true()
const meta = omit(metadata, ['video', 'title'])
snapshot(meta)
})
})

0 comments on commit 3a326ab

Please sign in to comment.