Improve video detection

microlinkhq · Aug 13, 2018 · 3a326ab · 3a326ab
1 parent 63b10e1
commit 3a326ab
Show file tree

Hide file tree

Showing 2 changed files with 49 additions and 44 deletions.
diff --git a/packages/metascraper-video-provider/index.js b/packages/metascraper-video-provider/index.js
@@ -1,6 +1,6 @@
 'use strict'
 
-const { round, size, get, chain, find, isString } = require('lodash')
+const { overEvery, isEmpty, eq, has, round, size, get, chain, find, isString } = require('lodash')
 const { isUrl, titleize } = require('@metascraper/helpers')
 const youtubedl = require('youtube-dl')
 const { promisify } = require('util')
@@ -28,19 +28,22 @@ const getVideoInfo = async url => {
   return cachedVideoInfo
 }
 
-const isMp4 = format => format.ext === 'mp4' || path.extname(format.url).startsWith('.mp4')
-const isHttp = format => format.protocol === 'https' || format.protocol === 'http'
+const isMp4 = format => eq(get(format, 'ext', 'mp4')) || path.extname(get(format, 'url')).startsWith('.mp4')
+const isHttp = format => eq(get(format, 'protocol'), 'http')
+const isHttps = format => eq(get(format, 'protocol'), 'http')
+const hasAudio = format => has(format, 'abr')
 
 /**
  * Get a Video source quality enough good
  * compatible to be consumed for the browser.
  */
-const getVideoUrl = formats => {
+const getVideoUrl = (formats, filters = []) => {
   const urls = chain(formats)
-    .filter(format => isHttp(format) && isMp4(format))
+    .filter(overEvery(filters))
     .map('url')
     .value()
 
+  if (isEmpty(urls)) return false
   const index = round(size(urls) / 2) - 1
   return get(urls, index)
 }
@@ -50,7 +53,12 @@ const getVideoUrl = formats => {
  */
 const getVideoProvider = async ({ url }) => {
   const { formats } = await getVideoInfo(url)
-  const videoUrl = getVideoUrl(formats)
+  const videoUrl = getVideoUrl(formats, [isMp4, isHttps, hasAudio]) ||
+    getVideoUrl(formats, [isMp4, isHttp, hasAudio]) ||
+    getVideoUrl(formats, [isMp4, isHttps]) ||
+    getVideoUrl(formats, [isMp4]) ||
+    getVideoUrl(formats)
+
   return isUrl(videoUrl) && videoUrl
 }
 

diff --git a/packages/metascraper-video-provider/test/index.js b/packages/metascraper-video-provider/test/index.js
@@ -34,49 +34,46 @@ const metascraper = require('metascraper').load([
 const readFile = promisify(fs.readFile)
 
 describe('metascraper-video-provider', () => {
-  describe('supported', () => {
-    it('vimeo', async () => {
-      const html = await readFile(resolve(__dirname, 'fixtures/vimeo.html'))
-      const url = 'https://vimeo.com/188175573'
-
-      const metadata = await metascraper({ html, url })
-      should(isUrl(metadata.video)).be.true()
-      should(isString(metadata.title)).be.true()
-      const meta = omit(metadata, ['video', 'title'])
-      snapshot(meta)
-    })
+  it('vimeo', async () => {
+    const html = await readFile(resolve(__dirname, 'fixtures/vimeo.html'))
+    const url = 'https://vimeo.com/188175573'
+    const metadata = await metascraper({ html, url })
+    should(isUrl(metadata.video)).be.true()
+    should(isString(metadata.title)).be.true()
+    const meta = omit(metadata, ['video', 'title'])
+    snapshot(meta)
+  })
 
-    xit('twitter', async () => {
-      const html = await readFile(resolve(__dirname, 'fixtures/twitter.html'))
-      const url = 'https://twitter.com/verge/status/957383241714970624'
+  xit('twitter', async () => {
+    const html = await readFile(resolve(__dirname, 'fixtures/twitter.html'))
+    const url = 'https://twitter.com/verge/status/957383241714970624'
 
-      const metadata = await metascraper({ html, url })
-      should(isUrl(metadata.video)).be.true()
-      should(isString(metadata.title)).be.true()
-      const meta = omit(metadata, ['video', 'title'])
-      snapshot(meta)
-    })
+    const metadata = await metascraper({ html, url })
+    should(isUrl(metadata.video)).be.true()
+    should(isString(metadata.title)).be.true()
+    const meta = omit(metadata, ['video', 'title'])
+    snapshot(meta)
+  })
 
-    it('facebook', async () => {
-      const html = await readFile(resolve(__dirname, 'fixtures/facebook.html'))
-      const url = 'https://www.facebook.com/afcajax/videos/1686831701364171'
+  it('facebook', async () => {
+    const html = await readFile(resolve(__dirname, 'fixtures/facebook.html'))
+    const url = 'https://www.facebook.com/afcajax/videos/1686831701364171'
 
-      const metadata = await metascraper({ html, url })
-      should(isUrl(metadata.video)).be.true()
-      should(isString(metadata.title)).be.true()
-      const meta = omit(metadata, ['video', 'title'])
-      snapshot(meta)
-    })
+    const metadata = await metascraper({ html, url })
+    should(isUrl(metadata.video)).be.true()
+    should(isString(metadata.title)).be.true()
+    const meta = omit(metadata, ['video', 'title'])
+    snapshot(meta)
+  })
 
-    it('youtube', async () => {
-      const html = await readFile(resolve(__dirname, 'fixtures/youtube.html'))
-      const url = 'https://www.youtube.com/watch?v=hwMkbaS_M_c'
+  it('youtube', async () => {
+    const html = await readFile(resolve(__dirname, 'fixtures/youtube.html'))
+    const url = 'https://www.youtube.com/watch?v=hwMkbaS_M_c'
 
-      const metadata = await metascraper({ html, url })
-      should(isUrl(metadata.video)).be.true()
-      should(isString(metadata.title)).be.true()
-      const meta = omit(metadata, ['video', 'title'])
-      snapshot(meta)
-    })
+    const metadata = await metascraper({ html, url })
+    should(isUrl(metadata.video)).be.true()
+    should(isString(metadata.title)).be.true()
+    const meta = omit(metadata, ['video', 'title'])
+    snapshot(meta)
   })
 })