From 93674e0dc2de03d6a5e694928e10fad67d028bb5 Mon Sep 17 00:00:00 2001 From: Kiko Beats Date: Mon, 23 Dec 2019 20:47:32 +0100 Subject: [PATCH] fix: omit 404 twitter urls related: https://sentry.io/share/issue/a64dae53c7f54cff9fbe865b0bf18618/ --- .../metascraper-media-provider/package.json | 1 + .../src/get-media/provider/twitter.js | 37 ++++++++++++------- .../metascraper-media-provider/test/index.js | 6 +++ 3 files changed, 31 insertions(+), 13 deletions(-) diff --git a/packages/metascraper-media-provider/package.json b/packages/metascraper-media-provider/package.json index c5f415bf6..b4cd01f5e 100644 --- a/packages/metascraper-media-provider/package.json +++ b/packages/metascraper-media-provider/package.json @@ -22,6 +22,7 @@ "got": "~9.6.0", "lodash": "~4.17.15", "luminati-tunnel": "~1.3.0", + "p-reflect": "~2.1.0", "p-retry": "~4.2.0", "tldts": "~5.6.2", "youtube-dl": "~2.3.0" diff --git a/packages/metascraper-media-provider/src/get-media/provider/twitter.js b/packages/metascraper-media-provider/src/get-media/provider/twitter.js index 037ccc37f..17086d83f 100644 --- a/packages/metascraper-media-provider/src/get-media/provider/twitter.js +++ b/packages/metascraper-media-provider/src/get-media/provider/twitter.js @@ -3,6 +3,7 @@ const debug = require('debug')('metascraper-media-provider:twitter') const { reduce, set, get, chain } = require('lodash') const { protocol } = require('@metascraper/helpers') +const pReflect = require('p-reflect') const pRetry = require('p-retry') const got = require('got') @@ -55,6 +56,26 @@ const createGuestToken = ({ userAgent, tunnel }) => { } const createGetTwitterVideo = ({ userAgent, getGuestToken }) => { + const getData = async (apiUrl, url, token) => { + const { isFulfilled, value, reason } = await pReflect( + got(apiUrl, { + retry: 0, + json: true, + headers: { + referer: url, + 'x-guest-token': token, + origin: 'https://twitter.com', + authorization: TWITTER_BEARER_TOKEN, + 'user-agent': userAgent + } + }) + ) + + if (isFulfilled) return value.body + if (reason.statusCode === 404) return {} + throw reason + } + return async url => { const tweetId = getTweetId(url) const apiUrl = `https://api.twitter.com/2/timeline/conversation/${tweetId}.json?tweet_mode=extended` @@ -66,25 +87,15 @@ const createGetTwitterVideo = ({ userAgent, getGuestToken }) => { `getTwitterInfo apiUrl=${apiUrl} guestToken=${token} userAgent=${userAgent}` ) - const { body } = await got(apiUrl, { - retry: 0, - json: true, - headers: { - referer: url, - 'x-guest-token': token, - origin: 'https://twitter.com', - authorization: TWITTER_BEARER_TOKEN, - 'user-agent': userAgent - } - }) + const payload = await getData(apiUrl, url, token) const id = get( - body, + payload, `globalObjects.tweets.${tweetId}.retweeted_status_id_str`, tweetId ) - const tweetObj = get(body, `globalObjects.tweets.${id}`) + const tweetObj = get(payload, `globalObjects.tweets.${id}`) data = { extractor_key: 'Twitter', diff --git a/packages/metascraper-media-provider/test/index.js b/packages/metascraper-media-provider/test/index.js index a9a7c3e31..36c2da84b 100644 --- a/packages/metascraper-media-provider/test/index.js +++ b/packages/metascraper-media-provider/test/index.js @@ -99,6 +99,12 @@ describe('metascraper-media-provider', () => { should(isUrl(metadata.video)).be.true() }) }) + + it('omit 404 urls', async () => { + const url = 'https://twitter.com/chenzonaut/status/456218458162601984' + const metadata = await metascraper({ url }) + should(isUrl(metadata.video)).be.false() + }) }) describe('facebook', () => {