Skip to content

Commit

Permalink
feat: add metascraper-twitter (#608)
Browse files Browse the repository at this point in the history
Closes #260
  • Loading branch information
Kikobeats authored Jan 1, 2023
1 parent 3ae3a5a commit 075c0ab
Show file tree
Hide file tree
Showing 13 changed files with 1,123 additions and 0 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,7 @@ const metascraper = require('metascraper')([
- [metascraper-telegram](https://github.com/microlinkhq/metascraper/tree/master/packages/metascraper-telegram) – Metascraper integration with Telegram.
- [metascraper-uol](https://github.com/microlinkhq/metascraper/tree/master/packages/metascraper-uol) – Metascraper integration for uol.com URLs.
- [metascraper-spotify](https://github.com/microlinkhq/metascraper/tree/master/packages/metascraper-spotify) – Metascraper integration with Spotify.
- [metascraper-twitter](https://github.com/microlinkhq/metascraper/tree/master/packages/metascraper-twitter) – Metascraper integration with Twitter.
- [metascraper-youtube](https://github.com/microlinkhq/metascraper/tree/master/packages/metascraper-youtube) – Metascraper integration with YouTube.

### Community
Expand Down
3 changes: 3 additions & 0 deletions packages/metascraper-twitter/.npmrc
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
unsafe-perm=true
save-prefix=~
save=false
22 changes: 22 additions & 0 deletions packages/metascraper-twitter/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
<div align="center">
<br>
<img style="width: 500px; margin:3rem 0 1.5rem;" src="https://metascraper.js.org/static/logo-banner.png" alt="metascraper">
<br>
<br>
<p align="center"><strong>metascraper-twitter</strong>: Metascraper integration with Twitter.</p>
<p align="center">See our <a href="https://metascraper.js.org" target='_blank' rel='noopener noreferrer'>website</a> for more information.</p>
<br>
</div>

## Install

```bash
$ npm install metascraper-twitter --save
```

## License

**metascraper-twitter** © [Microlink](https://microlink.io), released under the [MIT](https://github.com/microlinkhq/metascraper/blob/master/LICENSE.md) License.<br>
Authored and maintained by [Microlink](https://microlink.io) with help from [contributors](https://github.com/microlinkhq/metascraper/contributors).

> [microlink.io](https://microlink.io) · GitHub [microlinkhq](https://github.com/microlinkhq) · Twitter [@microlinkhq](https://twitter.com/microlinkhq)
61 changes: 61 additions & 0 deletions packages/metascraper-twitter/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
'use strict'

const {
$jsonld,
author,
date,
image,
memoizeOne,
parseUrl,
title,
toRule,
video
} = require('@metascraper/helpers')

const toAuthor = toRule(author)
const toDate = toRule(date)
const toImage = toRule(image)
const toVideo = toRule(video)
const toTitle = toRule(title)

const test = memoizeOne(url => parseUrl(url).domainWithoutSuffix === 'twitter')

const REGEX_IMG_MODIFIERS = /_(?:bigger|mini|normal)\./
const ORIGINAL_IMG_SIZE = '_400x400'

module.exports = () => {
const rules = {
author: [
toAuthor($jsonld('author.givenName')),
toAuthor($ => {
const author = $('meta[property="og:title"]').attr('content')
return author.includes(' on Twitter')
? author.split(' on Twitter')[0]
: author
})
],
title: [toTitle(($, url) => `@${url.split('/')[3]} on Twitter`)],
date: [
toDate(($, url) => {
const id = url.replace('https://twitter.com', '')
return $(`a[href="${id}"] time`).attr('datetime')
})
],
image: [
toImage($jsonld('image.contentUrl')),
toImage($ => $('video').attr('poster')),
toImage($ => {
const avatar = $('article img[src]').attr('src')
return avatar?.replace(REGEX_IMG_MODIFIERS, `${ORIGINAL_IMG_SIZE}.`)
})
],
video: [toVideo($ => $('video').attr('src'))],
publisher: () => 'Twitter'
}

rules.test = ({ url }) => test(url)

return rules
}

module.exports.test = test
41 changes: 41 additions & 0 deletions packages/metascraper-twitter/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
{
"name": "metascraper-twitter",
"description": "Metascraper integration with Twitter",
"homepage": "https://nicedoc.io/microlinkhq/metascraper/packages/metascraper-twitter",
"version": "5.32.4",
"main": "index.js",
"author": {
"email": "[email protected]",
"name": "microlink.io",
"url": "https://microlink.io"
},
"repository": {
"directory": "packages/metascraper-twitter",
"type": "git",
"url": "git+https://github.com/microlinkhq/metascraper.git"
},
"bugs": {
"url": "https://github.com/microlinkhq/metascraper/issues"
},
"keywords": [
"audio",
"metascraper",
"spotify"
],
"dependencies": {
"@metascraper/helpers": "^5.32.4"
},
"devDependencies": {
"ava": "latest"
},
"engines": {
"node": ">= 12"
},
"files": [
"index.js"
],
"scripts": {
"test": "NODE_PATH=.. TZ=UTC ava --timeout 15s"
},
"license": "MIT"
}
257 changes: 257 additions & 0 deletions packages/metascraper-twitter/test/fixtures/profile.html

Large diffs are not rendered by default.

191 changes: 191 additions & 0 deletions packages/metascraper-twitter/test/fixtures/tweet-gif.html

Large diffs are not rendered by default.

186 changes: 186 additions & 0 deletions packages/metascraper-twitter/test/fixtures/tweet-image.html

Large diffs are not rendered by default.

190 changes: 190 additions & 0 deletions packages/metascraper-twitter/test/fixtures/tweet.html

Large diffs are not rendered by default.

59 changes: 59 additions & 0 deletions packages/metascraper-twitter/test/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
'use strict'

const { readFile } = require('fs/promises')
const { resolve } = require('path')
const test = require('ava')

const metascraperTwitter = require('metascraper-twitter')

const createMetascraper = (...args) =>
require('metascraper')([
metascraperTwitter(...args),
require('metascraper-author')(),
require('metascraper-date')(),
require('metascraper-description')(),
require('metascraper-lang')(),
require('metascraper-publisher')(),
require('metascraper-title')(),
require('metascraper-url')()
])

test('from a Twitter profile', async t => {
const url = 'https://twitter.com/Kikobeats'
const html = await readFile(resolve(__dirname, 'fixtures/profile.html'))

const metascraper = createMetascraper()
const metadata = await metascraper({ url, html })

t.snapshot(metadata)
})

test('from a tweet', async t => {
const url = 'https://twitter.com/realDonaldTrump/status/1222907250383245320'
const html = await readFile(resolve(__dirname, 'fixtures/tweet.html'))

const metascraper = createMetascraper()
const metadata = await metascraper({ url, html })

t.snapshot(metadata)
})

test('from a tweet with a gif', async t => {
const url = 'https://twitter.com/Kikobeats/status/880139124791029763'
const html = await readFile(resolve(__dirname, 'fixtures/tweet-gif.html'))

const metascraper = createMetascraper()
const metadata = await metascraper({ url, html })

t.snapshot(metadata)
})

test('from a tweet with an image', async t => {
const url = 'https://twitter.com/k4rliky/status/934482867480121345'
const html = await readFile(resolve(__dirname, 'fixtures/tweet-image.html'))

const metascraper = createMetascraper()
const metadata = await metascraper({ url, html })

t.snapshot(metadata)
})
93 changes: 93 additions & 0 deletions packages/metascraper-twitter/test/snapshots/index.js.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
# Snapshot report for `test/index.js`

The actual snapshot is saved in `index.js.snap`.

Generated by [AVA](https://avajs.dev).

## from a Twitter profile

> Snapshot 1
{
author: '#!/kiko/beats',
date: '2010-01-02T11:14:31.000Z',
description: `engineering ▲ @vercel; founder of␊
https://t.co/4PQvCsVNsA␊
https://t.co/fpiHwbEPBv␊
https://t.co/IG8Qq0IDKi␊
https://t.co/gblDRx1P9D␊
https://t.co/SmoZi3hAhb␊
https://t.co/Y0Uk1XU3Eu␊
https://t.co/PAq3eTEhmI`,
image: 'https://pbs.twimg.com/profile_images/1564907557512269826/H6O8VFZL_x96.jpg',
lang: 'en',
publisher: 'Twitter',
title: '@Kikobeats on Twitter',
url: 'https://twitter.com/Kikobeats',
video: null,
}

## from a tweet

> Snapshot 1
{
author: 'Donald J. Trump',
date: '2020-01-30T15:39:43.000Z',
description: '““Schiff blasted for not focusing on California homeless.” @foxandfriends His District is in terrible shape. He is a corrupt pol who only dreams of the Impeachment Hoax. In my opinion he is mentally deranged!”',
image: 'https://pbs.twimg.com/profile_images/874276197357596672/kUuht00m_x96.jpg',
lang: 'en',
publisher: 'Twitter',
title: '@realDonaldTrump on Twitter',
url: 'https://twitter.com/realDonaldTrump/status/1222907250383245320',
video: null,
}

## from a tweet with a gif

> Snapshot 1
{
author: '#!/kiko/beats',
date: '2017-06-28T19:01:34.000Z',
description: '“Experimenting with Clearbit API + Apple TV 3D Parallax https://t.co/Qsm163k4mJ”',
image: 'https://pbs.twimg.com/tweet_video_thumb/DDbh3WCXYAAZfz9.jpg',
lang: 'en',
publisher: 'Twitter',
title: '@Kikobeats on Twitter',
url: 'https://twitter.com/Kikobeats/status/880139124791029763',
video: 'https://video.twimg.com/tweet_video/DDbh3WCXYAAZfz9.mp4',
}

## from a tweet with an image

> Snapshot 1
{
author: 'Brad, what are you gonna do?',
date: '2017-11-25T18:04:12.000Z',
description: '“Lo mejor de @codemotion_es #codemotionMadrid es estar con la gente que quieres 😍@ladyCircus”',
image: 'https://pbs.twimg.com/profile_images/1603675274348040192/y9P6VlyX_x96.jpg',
lang: 'en',
publisher: 'Twitter',
title: '@k4rliky on Twitter',
url: 'https://twitter.com/k4rliky/status/934482867480121345',
video: null,
}

## from a tweet with a video

> Snapshot 1
{
author: '#!/kiko/beats',
date: '2022-12-08T16:26:59.000Z',
description: `“SpongeBob at the Edge ⚡️ ⬩ demo: https://t.co/xn8YCGMX3d␊
⬩ code: https://t.co/h3rGc4JNBS a browser hit = a new random time card”`,
image: 'https://pbs.twimg.com/ext_tw_video_thumb/1600889467992248320/pu/img/B5Su3ad6DHdQZ_Sz.jpg',
lang: 'en',
publisher: 'Twitter',
title: '@Kikobeats on Twitter',
url: 'https://twitter.com/Kikobeats/status/1600889688226549763',
video: null,
}
Binary file not shown.
19 changes: 19 additions & 0 deletions packages/metascraper-twitter/test/test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
'use strict'

const test = require('ava')

const { test: validator } = require('..')

test('true', t => {
t.true(
validator('https://twitter.com/realDonaldTrump/status/1222907250383245320')
)
})

test('false', t => {
t.false(
validator(
'https://soundcloud.com/beautybrainsp/beauty-brain-swag-bandicoot'
)
)
})

0 comments on commit 075c0ab

Please sign in to comment.