Skip to content

Commit

Permalink
Revert "Avoid use Twitter API (#122)"
Browse files Browse the repository at this point in the history
This reverts commit 71c7071.

# Conflicts:
#	packages/metascraper-media-provider/src/get-media/twitter-info.js
  • Loading branch information
Kikobeats committed Oct 3, 2018
1 parent 6481681 commit 66ca721
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 79 deletions.
7 changes: 1 addition & 6 deletions packages/metascraper-media-provider/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,10 @@
},
"dependencies": {
"@metascraper/helpers": "^4.3.5",
"binary-split": "~1.0.3",
"got": "~9.2.0",
"html-get": "~1.3.0",
"html-urls": "~2.3.0",
"lodash": "~4.17.10",
"mem": "~4.0.0",
"memoize-one": "~4.0.2",
"memoize-token": "~1.1.3",
"ua-string": "~1.2.0",
"memoize-token": "~1.1.0",
"youtube-dl": "~1.12.2"
},
"devDependencies": {
Expand Down
105 changes: 32 additions & 73 deletions packages/metascraper-media-provider/src/get-media/twitter-info.js
Original file line number Diff line number Diff line change
@@ -1,29 +1,18 @@
'use strict'

const { replace, find, get, chain } = require('lodash')
const memoizeToken = require('memoize-token')
const split = require('binary-split')
const uaString = require('ua-string')
const htmlUrls = require('html-urls')
const getHTML = require('html-get')
const { get, chain } = require('lodash')
const { URL } = require('url')
const got = require('got')
const mem = require('mem')

const REGEX_COOKIE = /document\.cookie = decodeURIComponent\("gt=([0-9]+)/

const REGEX_TWITTER_HOST = /^https?:\/\/twitter.com/i

const REGEX_BEARER_TOKEN = /BEARER_TOKEN:"(.*?)"/

const REGEX_AUTH_URL = /main.*.js/
// twitter guest web token
// https://github.com/soimort/you-get/blob/da8c982608c9308765e0960e08fc28cccb74b215/src/you_get/extractors/twitter.py#L72
// https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/twitter.py#L235
const TWITTER_BEARER_TOKEN =
'Bearer AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw'

const TWITTER_HOSTNAMES = ['twitter.com', 'mobile.twitter.com']

const API_GUEST_ACTIVATE_LIMIT = 180

const API_GUEST_ACTIVATE_EXPIRE = 15 * 60 * 1000 // 15 min

const isTweet = url => url.includes('/status/')

const isTwitterHost = url => TWITTER_HOSTNAMES.includes(new URL(url).hostname)
Expand All @@ -32,83 +21,53 @@ const isTwitterUrl = url => isTwitterHost(url) && isTweet(url)

const getTweetId = url => url.split('/').reverse()[0]

const getMobileUrl = mem(url => replace(url, REGEX_TWITTER_HOST, 'https://mobile.twitter.com'))

const promiseStream = async (url, { onData }) =>
new Promise((resolve, reject) => {
const stream = got.stream(getMobileUrl(url), {
headers: { 'user-agent': uaString }
})

let req
stream.on('request', request => (req = request))
stream
.pipe(split())
.on('data', async data => {
const result = await onData(data.toString())
if (result) {
req.abort()
resolve(result)
}
})
.on('error', reject)
})
const API_GUEST_ACTIVATE_LIMIT = 180
const API_GUEST_ACTIVATE_EXPIRE = 15 * 60 * 1000 // 15 min

const getAuthorization = async url =>
promiseStream(url, {
onData: line => {
return get(REGEX_BEARER_TOKEN.exec(line), 1)
const getGuestToken = async url => {
const { body } = await got.post(
'https://api.twitter.com/1.1/guest/activate.json',
{
retry: false,
headers: { Authorization: TWITTER_BEARER_TOKEN, Referer: url },
json: true
}
})

const createGetAuth = ({ getBrowserless, ...opts }) => {
const fn = async url => {
const mobileUrl = getMobileUrl(url)
const { html } = await getHTML(mobileUrl, {
prerender: true,
getBrowserless
})

const guestToken = get(REGEX_COOKIE.exec(html), 1)
const links = htmlUrls({ html, url: mobileUrl })
const bearerUrl = find(links, ({ normalizedUrl }) => REGEX_AUTH_URL.test(normalizedUrl))
const authorization = await getAuthorization(get(bearerUrl, 'normalizedUrl'))
return { authorization: `Bearer ${authorization}`, guestToken }
}

return memoizeToken(fn, {
max: API_GUEST_ACTIVATE_LIMIT,
expire: API_GUEST_ACTIVATE_EXPIRE,
key: 'media:twitter',
...opts
})
)
return get(body, 'guest_token')
}

const getTwitterInfo = ({ getAuth }) => async url => {
const getTwitterInfo = ({ getToken }) => async url => {
const tweetId = getTweetId(url)
const apiUrl = `https://api.twitter.com/2/timeline/conversation/${tweetId}.json?tweet_mode=extended`
const { authorization, guestToken } = await getAuth(url)

const guestToken = await getToken(url)
const { body } = await got(apiUrl, {
retry: false,
json: true,
headers: {
authorization,
authorization: TWITTER_BEARER_TOKEN,
'x-guest-token': guestToken
}
})

return chain(body)
.get(`globalObjects.tweets.${tweetId}.extended_entities.media.0.video_info.variants`)
.get(
`globalObjects.tweets.${tweetId}.extended_entities.media.0.video_info.variants`
)
.filter('bitrate')
.orderBy('bitrate', 'asc')
.value()
}

module.exports = (opts = {}) => {
const getAuth = createGetAuth(opts)
module.exports = opts => {
const getToken = memoizeToken(getGuestToken, {
max: API_GUEST_ACTIVATE_LIMIT,
expire: API_GUEST_ACTIVATE_EXPIRE,
key: 'media:twitter',
...opts
})

return {
getTwitterInfo: getTwitterInfo({ getAuth }),
getTwitterInfo: getTwitterInfo({ getToken }),
isTwitterUrl
}
}

0 comments on commit 66ca721

Please sign in to comment.