Skip to content

Commit

Permalink
fix: omit 404 twitter urls
Browse files Browse the repository at this point in the history
  • Loading branch information
Kikobeats committed Dec 23, 2019
1 parent ea97d1e commit 93674e0
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 13 deletions.
1 change: 1 addition & 0 deletions packages/metascraper-media-provider/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
"got": "~9.6.0",
"lodash": "~4.17.15",
"luminati-tunnel": "~1.3.0",
"p-reflect": "~2.1.0",
"p-retry": "~4.2.0",
"tldts": "~5.6.2",
"youtube-dl": "~2.3.0"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
const debug = require('debug')('metascraper-media-provider:twitter')
const { reduce, set, get, chain } = require('lodash')
const { protocol } = require('@metascraper/helpers')
const pReflect = require('p-reflect')
const pRetry = require('p-retry')
const got = require('got')

Expand Down Expand Up @@ -55,6 +56,26 @@ const createGuestToken = ({ userAgent, tunnel }) => {
}

const createGetTwitterVideo = ({ userAgent, getGuestToken }) => {
const getData = async (apiUrl, url, token) => {
const { isFulfilled, value, reason } = await pReflect(
got(apiUrl, {
retry: 0,
json: true,
headers: {
referer: url,
'x-guest-token': token,
origin: 'https://twitter.com',
authorization: TWITTER_BEARER_TOKEN,
'user-agent': userAgent
}
})
)

if (isFulfilled) return value.body
if (reason.statusCode === 404) return {}
throw reason
}

return async url => {
const tweetId = getTweetId(url)
const apiUrl = `https://api.twitter.com/2/timeline/conversation/${tweetId}.json?tweet_mode=extended`
Expand All @@ -66,25 +87,15 @@ const createGetTwitterVideo = ({ userAgent, getGuestToken }) => {
`getTwitterInfo apiUrl=${apiUrl} guestToken=${token} userAgent=${userAgent}`
)

const { body } = await got(apiUrl, {
retry: 0,
json: true,
headers: {
referer: url,
'x-guest-token': token,
origin: 'https://twitter.com',
authorization: TWITTER_BEARER_TOKEN,
'user-agent': userAgent
}
})
const payload = await getData(apiUrl, url, token)

const id = get(
body,
payload,
`globalObjects.tweets.${tweetId}.retweeted_status_id_str`,
tweetId
)

const tweetObj = get(body, `globalObjects.tweets.${id}`)
const tweetObj = get(payload, `globalObjects.tweets.${id}`)

data = {
extractor_key: 'Twitter',
Expand Down
6 changes: 6 additions & 0 deletions packages/metascraper-media-provider/test/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,12 @@ describe('metascraper-media-provider', () => {
should(isUrl(metadata.video)).be.true()
})
})

it('omit 404 urls', async () => {
const url = 'https://twitter.com/chenzonaut/status/456218458162601984'
const metadata = await metascraper({ url })
should(isUrl(metadata.video)).be.false()
})
})

describe('facebook', () => {
Expand Down

0 comments on commit 93674e0

Please sign in to comment.