-
-
Notifications
You must be signed in to change notification settings - Fork 168
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: add metascraper-twitter (#608)
Closes #260
- Loading branch information
Showing
13 changed files
with
1,123 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
unsafe-perm=true | ||
save-prefix=~ | ||
save=false |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
<div align="center"> | ||
<br> | ||
<img style="width: 500px; margin:3rem 0 1.5rem;" src="https://metascraper.js.org/static/logo-banner.png" alt="metascraper"> | ||
<br> | ||
<br> | ||
<p align="center"><strong>metascraper-twitter</strong>: Metascraper integration with Twitter.</p> | ||
<p align="center">See our <a href="https://metascraper.js.org" target='_blank' rel='noopener noreferrer'>website</a> for more information.</p> | ||
<br> | ||
</div> | ||
|
||
## Install | ||
|
||
```bash | ||
$ npm install metascraper-twitter --save | ||
``` | ||
|
||
## License | ||
|
||
**metascraper-twitter** © [Microlink](https://microlink.io), released under the [MIT](https://github.com/microlinkhq/metascraper/blob/master/LICENSE.md) License.<br> | ||
Authored and maintained by [Microlink](https://microlink.io) with help from [contributors](https://github.com/microlinkhq/metascraper/contributors). | ||
|
||
> [microlink.io](https://microlink.io) · GitHub [microlinkhq](https://github.com/microlinkhq) · Twitter [@microlinkhq](https://twitter.com/microlinkhq) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
'use strict' | ||
|
||
const { | ||
$jsonld, | ||
author, | ||
date, | ||
image, | ||
memoizeOne, | ||
parseUrl, | ||
title, | ||
toRule, | ||
video | ||
} = require('@metascraper/helpers') | ||
|
||
const toAuthor = toRule(author) | ||
const toDate = toRule(date) | ||
const toImage = toRule(image) | ||
const toVideo = toRule(video) | ||
const toTitle = toRule(title) | ||
|
||
const test = memoizeOne(url => parseUrl(url).domainWithoutSuffix === 'twitter') | ||
|
||
const REGEX_IMG_MODIFIERS = /_(?:bigger|mini|normal)\./ | ||
const ORIGINAL_IMG_SIZE = '_400x400' | ||
|
||
module.exports = () => { | ||
const rules = { | ||
author: [ | ||
toAuthor($jsonld('author.givenName')), | ||
toAuthor($ => { | ||
const author = $('meta[property="og:title"]').attr('content') | ||
return author.includes(' on Twitter') | ||
? author.split(' on Twitter')[0] | ||
: author | ||
}) | ||
], | ||
title: [toTitle(($, url) => `@${url.split('/')[3]} on Twitter`)], | ||
date: [ | ||
toDate(($, url) => { | ||
const id = url.replace('https://twitter.com', '') | ||
return $(`a[href="${id}"] time`).attr('datetime') | ||
}) | ||
], | ||
image: [ | ||
toImage($jsonld('image.contentUrl')), | ||
toImage($ => $('video').attr('poster')), | ||
toImage($ => { | ||
const avatar = $('article img[src]').attr('src') | ||
return avatar?.replace(REGEX_IMG_MODIFIERS, `${ORIGINAL_IMG_SIZE}.`) | ||
}) | ||
], | ||
video: [toVideo($ => $('video').attr('src'))], | ||
publisher: () => 'Twitter' | ||
} | ||
|
||
rules.test = ({ url }) => test(url) | ||
|
||
return rules | ||
} | ||
|
||
module.exports.test = test |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
{ | ||
"name": "metascraper-twitter", | ||
"description": "Metascraper integration with Twitter", | ||
"homepage": "https://nicedoc.io/microlinkhq/metascraper/packages/metascraper-twitter", | ||
"version": "5.32.4", | ||
"main": "index.js", | ||
"author": { | ||
"email": "[email protected]", | ||
"name": "microlink.io", | ||
"url": "https://microlink.io" | ||
}, | ||
"repository": { | ||
"directory": "packages/metascraper-twitter", | ||
"type": "git", | ||
"url": "git+https://github.com/microlinkhq/metascraper.git" | ||
}, | ||
"bugs": { | ||
"url": "https://github.com/microlinkhq/metascraper/issues" | ||
}, | ||
"keywords": [ | ||
"audio", | ||
"metascraper", | ||
"spotify" | ||
], | ||
"dependencies": { | ||
"@metascraper/helpers": "^5.32.4" | ||
}, | ||
"devDependencies": { | ||
"ava": "latest" | ||
}, | ||
"engines": { | ||
"node": ">= 12" | ||
}, | ||
"files": [ | ||
"index.js" | ||
], | ||
"scripts": { | ||
"test": "NODE_PATH=.. TZ=UTC ava --timeout 15s" | ||
}, | ||
"license": "MIT" | ||
} |
257 changes: 257 additions & 0 deletions
257
packages/metascraper-twitter/test/fixtures/profile.html
Large diffs are not rendered by default.
Oops, something went wrong.
191 changes: 191 additions & 0 deletions
191
packages/metascraper-twitter/test/fixtures/tweet-gif.html
Large diffs are not rendered by default.
Oops, something went wrong.
186 changes: 186 additions & 0 deletions
186
packages/metascraper-twitter/test/fixtures/tweet-image.html
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
'use strict' | ||
|
||
const { readFile } = require('fs/promises') | ||
const { resolve } = require('path') | ||
const test = require('ava') | ||
|
||
const metascraperTwitter = require('metascraper-twitter') | ||
|
||
const createMetascraper = (...args) => | ||
require('metascraper')([ | ||
metascraperTwitter(...args), | ||
require('metascraper-author')(), | ||
require('metascraper-date')(), | ||
require('metascraper-description')(), | ||
require('metascraper-lang')(), | ||
require('metascraper-publisher')(), | ||
require('metascraper-title')(), | ||
require('metascraper-url')() | ||
]) | ||
|
||
test('from a Twitter profile', async t => { | ||
const url = 'https://twitter.com/Kikobeats' | ||
const html = await readFile(resolve(__dirname, 'fixtures/profile.html')) | ||
|
||
const metascraper = createMetascraper() | ||
const metadata = await metascraper({ url, html }) | ||
|
||
t.snapshot(metadata) | ||
}) | ||
|
||
test('from a tweet', async t => { | ||
const url = 'https://twitter.com/realDonaldTrump/status/1222907250383245320' | ||
const html = await readFile(resolve(__dirname, 'fixtures/tweet.html')) | ||
|
||
const metascraper = createMetascraper() | ||
const metadata = await metascraper({ url, html }) | ||
|
||
t.snapshot(metadata) | ||
}) | ||
|
||
test('from a tweet with a gif', async t => { | ||
const url = 'https://twitter.com/Kikobeats/status/880139124791029763' | ||
const html = await readFile(resolve(__dirname, 'fixtures/tweet-gif.html')) | ||
|
||
const metascraper = createMetascraper() | ||
const metadata = await metascraper({ url, html }) | ||
|
||
t.snapshot(metadata) | ||
}) | ||
|
||
test('from a tweet with an image', async t => { | ||
const url = 'https://twitter.com/k4rliky/status/934482867480121345' | ||
const html = await readFile(resolve(__dirname, 'fixtures/tweet-image.html')) | ||
|
||
const metascraper = createMetascraper() | ||
const metadata = await metascraper({ url, html }) | ||
|
||
t.snapshot(metadata) | ||
}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
# Snapshot report for `test/index.js` | ||
|
||
The actual snapshot is saved in `index.js.snap`. | ||
|
||
Generated by [AVA](https://avajs.dev). | ||
|
||
## from a Twitter profile | ||
|
||
> Snapshot 1 | ||
{ | ||
author: '#!/kiko/beats', | ||
date: '2010-01-02T11:14:31.000Z', | ||
description: `engineering ▲ @vercel; founder of␊ | ||
https://t.co/4PQvCsVNsA␊ | ||
https://t.co/fpiHwbEPBv␊ | ||
https://t.co/IG8Qq0IDKi␊ | ||
https://t.co/gblDRx1P9D␊ | ||
https://t.co/SmoZi3hAhb␊ | ||
https://t.co/Y0Uk1XU3Eu␊ | ||
https://t.co/PAq3eTEhmI`, | ||
image: 'https://pbs.twimg.com/profile_images/1564907557512269826/H6O8VFZL_x96.jpg', | ||
lang: 'en', | ||
publisher: 'Twitter', | ||
title: '@Kikobeats on Twitter', | ||
url: 'https://twitter.com/Kikobeats', | ||
video: null, | ||
} | ||
|
||
## from a tweet | ||
|
||
> Snapshot 1 | ||
{ | ||
author: 'Donald J. Trump', | ||
date: '2020-01-30T15:39:43.000Z', | ||
description: '““Schiff blasted for not focusing on California homeless.” @foxandfriends His District is in terrible shape. He is a corrupt pol who only dreams of the Impeachment Hoax. In my opinion he is mentally deranged!”', | ||
image: 'https://pbs.twimg.com/profile_images/874276197357596672/kUuht00m_x96.jpg', | ||
lang: 'en', | ||
publisher: 'Twitter', | ||
title: '@realDonaldTrump on Twitter', | ||
url: 'https://twitter.com/realDonaldTrump/status/1222907250383245320', | ||
video: null, | ||
} | ||
|
||
## from a tweet with a gif | ||
|
||
> Snapshot 1 | ||
{ | ||
author: '#!/kiko/beats', | ||
date: '2017-06-28T19:01:34.000Z', | ||
description: '“Experimenting with Clearbit API + Apple TV 3D Parallax https://t.co/Qsm163k4mJ”', | ||
image: 'https://pbs.twimg.com/tweet_video_thumb/DDbh3WCXYAAZfz9.jpg', | ||
lang: 'en', | ||
publisher: 'Twitter', | ||
title: '@Kikobeats on Twitter', | ||
url: 'https://twitter.com/Kikobeats/status/880139124791029763', | ||
video: 'https://video.twimg.com/tweet_video/DDbh3WCXYAAZfz9.mp4', | ||
} | ||
|
||
## from a tweet with an image | ||
|
||
> Snapshot 1 | ||
{ | ||
author: 'Brad, what are you gonna do?', | ||
date: '2017-11-25T18:04:12.000Z', | ||
description: '“Lo mejor de @codemotion_es #codemotionMadrid es estar con la gente que quieres 😍@ladyCircus”', | ||
image: 'https://pbs.twimg.com/profile_images/1603675274348040192/y9P6VlyX_x96.jpg', | ||
lang: 'en', | ||
publisher: 'Twitter', | ||
title: '@k4rliky on Twitter', | ||
url: 'https://twitter.com/k4rliky/status/934482867480121345', | ||
video: null, | ||
} | ||
|
||
## from a tweet with a video | ||
|
||
> Snapshot 1 | ||
{ | ||
author: '#!/kiko/beats', | ||
date: '2022-12-08T16:26:59.000Z', | ||
description: `“SpongeBob at the Edge ⚡️ ⬩ demo: https://t.co/xn8YCGMX3d␊ | ||
⬩ code: https://t.co/h3rGc4JNBS a browser hit = a new random time card”`, | ||
image: 'https://pbs.twimg.com/ext_tw_video_thumb/1600889467992248320/pu/img/B5Su3ad6DHdQZ_Sz.jpg', | ||
lang: 'en', | ||
publisher: 'Twitter', | ||
title: '@Kikobeats on Twitter', | ||
url: 'https://twitter.com/Kikobeats/status/1600889688226549763', | ||
video: null, | ||
} |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
'use strict' | ||
|
||
const test = require('ava') | ||
|
||
const { test: validator } = require('..') | ||
|
||
test('true', t => { | ||
t.true( | ||
validator('https://twitter.com/realDonaldTrump/status/1222907250383245320') | ||
) | ||
}) | ||
|
||
test('false', t => { | ||
t.false( | ||
validator( | ||
'https://soundcloud.com/beautybrainsp/beauty-brain-swag-bandicoot' | ||
) | ||
) | ||
}) |