Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(logo-favicon): granular control #667

Merged
merged 4 commits into from
Nov 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@
"standard-markdown"
],
"package.json": [
"finepack --sort-ignore-object-at ava"
"finepack"
]
},
"simple-git-hooks": {
Expand Down
21 changes: 21 additions & 0 deletions packages/metascraper-logo-favicon/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,27 @@ $ npm install metascraper-logo-favicon --save

#### options

##### google

Type: `boolean`<br>
Default: `true`

It enables logo resolution using Google API.

##### favicon

Type: `boolean`<br>
Default: `true`

It tries to resolve `favicon.ico` of the url.

##### rootFavicon

Type: `boolean`|`regexp`<br>
Default: `true`

It tries to resolve `favicon.ico` of the url when the URL is a subdomain.

##### pickFn

Type: `function`
Expand Down
85 changes: 58 additions & 27 deletions packages/metascraper-logo-favicon/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -105,52 +105,83 @@ const pickBiggerSize = sizes => {
pickBiggerSize.sortBySize = collection =>
orderBy(collection, ['size.priority'], ['desc'])

const createGetLogo = ({ gotOpts, keyvOpts }) => {
const getLogo = async url => {
const faviconUrl = logo('/favicon.ico', { url })
if (!faviconUrl) return
const favicon = async (url, { gotOpts } = {}) => {
const faviconUrl = logo('/favicon.ico', { url })
if (!faviconUrl) return undefined
const response = await reachableUrl(faviconUrl, gotOpts)
return reachableUrl.isReachable(response) &&
response.headers['content-type']?.startsWith('image')
? faviconUrl
: undefined
}

let response = await reachableUrl(faviconUrl, gotOpts)
const google = async (url, { gotOpts } = {}) => {
const response = await reachableUrl(google.url(url), gotOpts)
return reachableUrl.isReachable(response) ? response.url : undefined
}

if (
reachableUrl.isReachable(response) &&
response.headers['content-type']?.startsWith('image')
) {
return faviconUrl
}
google.url = (url, size = 128) =>
`https://www.google.com/s2/favicons?domain_url=${url}&sz=${size}`

response = await reachableUrl(
`https://www.google.com/s2/favicons?domain_url=${url}&sz=128`,
gotOpts
const createGetLogo = ({ withGoogle, withFavicon, gotOpts, keyvOpts }) => {
const getLogo = async url => {
const providers = [withFavicon && favicon, withGoogle && google].filter(
Boolean
)

return reachableUrl.isReachable(response) ? response.url : undefined
for (const provider of providers) {
const logoUrl = await provider(url, { gotOpts })
if (logoUrl) return logoUrl
}
}

return memoize(getLogo, keyvOpts, {
const fn = memoize(getLogo, keyvOpts, {
value: value => (value === undefined ? null : value)
})

return (...args) =>
fn(...args).then(value => (value === null ? undefined : value))
}

const castNull = value => (value === null ? undefined : value)
const createRootFavicon = ({ getLogo, withRootFavicon = true } = {}) => {
if (withRootFavicon === false) return undefined
return ({ url }) => {
const urlObj = new URL(url)
const domain = parseUrl(url).domain

if (withRootFavicon instanceof RegExp && withRootFavicon.test(domain)) {
return undefined
}

module.exports = ({ gotOpts, keyvOpts, pickFn = pickBiggerSize } = {}) => {
const getLogo = createGetLogo({ gotOpts, keyvOpts })
urlObj.hostname = domain
return getLogo(normalizeUrl(urlObj))
}
}

module.exports = ({
google: withGoogle = true,
favicon: withFavicon = true,
rootFavicon: withRootFavicon = true,
gotOpts,
keyvOpts,
pickFn = pickBiggerSize
} = {}) => {
const getLogo = createGetLogo({ withGoogle, withFavicon, gotOpts, keyvOpts })
const rootFavicon = createRootFavicon({ getLogo, withRootFavicon })
return {
logo: [
toUrl($ => {
const sizes = getSizes($, sizeSelectors)
const size = pickFn(sizes, pickBiggerSize)
return get(size, 'url')
}),
async ({ url }) => castNull(await getLogo(normalizeUrl(url))),
async ({ url }) => {
const urlObj = new URL(url)
urlObj.hostname = parseUrl(url).domain
const result = await getLogo(normalizeUrl(urlObj))
return castNull(result)
}
]
({ url }) => getLogo(normalizeUrl(url)),
rootFavicon
].filter(Boolean)
}
}

module.exports.favicon = favicon
module.exports.google = google
module.exports.createRootFavicon = createRootFavicon
module.exports.createGetLogo = createGetLogo
10 changes: 10 additions & 0 deletions packages/metascraper-logo-favicon/test/favicon.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
'use strict'

const test = require('ava')

const { favicon } = require('..')

test('with { contentType: \'image/vnd.microsoft.icon\' }', async t => {
const url = 'https://microlink.io/'
t.is(await favicon(url), 'https://microlink.io/favicon.ico')
})
17 changes: 17 additions & 0 deletions packages/metascraper-logo-favicon/test/get-logo.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
'use strict'

const test = require('ava')

const { createGetLogo } = require('..')

test('serialize null correctly', async t => {
const cache = new Map()
const keyvOpts = { store: cache }
const getLogo = createGetLogo({
keyvOpts,
withGoogle: false,
withFavicon: false
})
t.is(await getLogo('https://example.com'), undefined)
t.is(JSON.parse(cache.get('https://example.com')).value, null)
})
29 changes: 29 additions & 0 deletions packages/metascraper-logo-favicon/test/google.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
'use strict'

const test = require('ava')
const got = require('got')

const { google } = require('..')

test('return undefined under no logo', async t => {
const url = 'https://idontexist.lol'
t.is(await google(url), undefined)
})

test('return logo when URL is reachable', async t => {
const url = 'https://microlink.io/'
const logoUrl = await google(url)
t.true(typeof logoUrl === 'string')

const fallbackUrl = google.url()
const [logo, fallback] = await Promise.all(
[logoUrl, fallbackUrl].map(url =>
got(url, {
responseType: 'buffer',
resolveBodyOnly: true,
throwHttpErrors: false
})
)
)
t.true(logo.length !== fallback.length)
})
42 changes: 42 additions & 0 deletions packages/metascraper-logo-favicon/test/root-favicon.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
'use strict'

const test = require('ava')
const got = require('got')

const { createGetLogo, createRootFavicon, google } = require('..')

test('enable it by default', async t => {
const getLogo = createGetLogo({ withGoogle: true, withFavicon: true })
const rootFavicon = createRootFavicon({ getLogo })
const url = 'https://geolocation-indol.vercel.app/'
const logoUrl = await rootFavicon({ url })
const domainLogoUrl = google.url('https://vercel.app/')

const [logo, domainLogo] = await Promise.all(
[logoUrl, domainLogoUrl].map(url =>
got(url, {
responseType: 'buffer',
resolveBodyOnly: true,
throwHttpErrors: false
})
)
)

t.is(logo.length, domainLogo.length)
})

test('exclude certain subdomains', async t => {
const getLogo = createGetLogo({ withGoogle: true, withFavicon: true })
const rootFavicon = createRootFavicon({
getLogo,
withRootFavicon: /^vercel\.app/
})
const url = 'https://geolocation-indol.vercel.app/'
const logoUrl = await rootFavicon({ url })
t.is(logoUrl, undefined)
})

test('disable it when \'{ withRootFavicon: false}\'', async t => {
const rootFavicon = createRootFavicon({ withRootFavicon: false })
t.is(rootFavicon, undefined)
})
2 changes: 1 addition & 1 deletion packages/metascraper-manifest/test/snapshots/index.js.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,5 +88,5 @@ Generated by [AVA](https://avajs.dev).
{
description: 'Get breaking news, politics, trending music, world events, sports scores, and the latest global news stories as they unfold - all with less data.',
lang: null,
publisher: 'Twitter',
publisher: 'X',
}
Binary file modified packages/metascraper-manifest/test/snapshots/index.js.snap
Binary file not shown.
2 changes: 1 addition & 1 deletion packages/metascraper-spotify/test/snapshots/index.js.md
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,7 @@ Generated by [AVA](https://avajs.dev).
author: 'Syntax - Tasty Web Development Treats',
date: '2020-01-06T14:00:00.000Z',
description: 'In this Hasty Treat, Scott and Wes talk about modules in Node — what are they, how they’re different from browser modules, and more! Sentry - Sponsor If you want to know what’s happening with your errors, track them with Sentry. Sentry is open-source error tracking that helps developers monitor and…',
image: 'https://i.scdn.co/image/ab6765630000ba8a6c0e28e162c68d0e67bc10d5',
image: 'https://i.scdn.co/image/ab6765630000ba8ab84c022aff150c336f73b395',
lang: 'en',
publisher: 'Spotify',
title: 'Hasty Treat - Modules in Node',
Expand Down
Binary file modified packages/metascraper-spotify/test/snapshots/index.js.snap
Binary file not shown.
Loading