Skip to content

Commit e4f7eed

Browse files
committed
fix(logo): avoid data uri with no length
Since logo can be detected from HTML markup there is the case where the data URI detected is empty. an empty data uri ('data:,') is considered a valid URL & URI, but can't be considered a valid image.
1 parent 8f3f9aa commit e4f7eed

File tree

8 files changed

+47
-23
lines changed

8 files changed

+47
-23
lines changed

package.json

+6-1
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,12 @@
175175
"commitlint": {
176176
"extends": [
177177
"@commitlint/config-conventional"
178-
]
178+
],
179+
"rules": {
180+
"body-max-length": [
181+
0
182+
]
183+
}
179184
},
180185
"nano-staged": {
181186
"*.js": [

packages/metascraper-helpers/index.js

+10-3
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ const _normalizeUrl = require('normalize-url')
1111
const smartquotes = require('smartquotes')
1212
const { decodeHTML } = require('entities')
1313
const iso6393 = require('iso-639-3/to-1')
14+
const dataUri = require('data-uri-utils')
1415
const hasValues = require('has-values')
1516
const chrono = require('chrono-node')
1617
const isIso = require('isostring')
@@ -375,11 +376,17 @@ const $jsonld = propName => $ => {
375376

376377
const image = (value, opts) => {
377378
const urlValue = url(value, opts)
378-
return urlValue !== undefined &&
379+
380+
const result =
381+
urlValue !== undefined &&
379382
!isAudioUrl(urlValue, opts) &&
380383
!isVideoUrl(urlValue, opts)
381-
? urlValue
382-
: undefined
384+
? urlValue
385+
: undefined
386+
387+
if (!dataUri.test(result)) return result
388+
const buffer = dataUri.toBuffer(dataUri.normalize(result))
389+
return buffer.length ? result : undefined
383390
}
384391

385392
const logo = image

packages/metascraper-helpers/package.json

+1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
"audio-extensions": "0.0.0",
2626
"chrono-node": "~2.7.4",
2727
"condense-whitespace": "~2.0.0",
28+
"data-uri-utils": "~1.0.7",
2829
"entities": "~4.5.0",
2930
"file-extension": "~4.0.5",
3031
"has-values": "~2.0.1",

packages/metascraper-helpers/test/index.js

+1
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,7 @@ test('.image', t => {
248248
image({ '@id': 'https://www.milanocittastato.it/#/schema/logo/image/' }),
249249
undefined
250250
)
251+
t.is(image('data:,'), undefined)
251252
})
252253

253254
test('.isImageUrl', t => {

packages/metascraper-logo-favicon/src/index.js

+3-3
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,12 @@ const {
99
parseUrl,
1010
normalizeUrl,
1111
toRule,
12-
url: urlFn
12+
logo: logoFn
1313
} = require('@metascraper/helpers')
1414

1515
const SIZE_REGEX_BY_X = /\d+x\d+/
1616

17-
const toUrl = toRule(urlFn)
17+
const toLogo = toRule(logoFn)
1818

1919
const toSize = (input, url) => {
2020
if (isEmpty(input)) return
@@ -170,7 +170,7 @@ module.exports = ({
170170
const rootFavicon = createRootFavicon({ getLogo, withRootFavicon })
171171
return {
172172
logo: [
173-
toUrl($ => {
173+
toLogo($ => {
174174
const sizes = getSizes($, sizeSelectors)
175175
const size = pickFn(sizes, pickBiggerSize)
176176
return get(size, 'url')

packages/metascraper-logo-favicon/test/index.js

+8
Original file line numberDiff line numberDiff line change
@@ -237,3 +237,11 @@ test('resolve logo using from google associated with the domain', async t => {
237237
const metadata = await metascraper({ url })
238238
t.true(metadata.logo.includes('gstatic'))
239239
})
240+
241+
test('avoid data URI when data length is 0', async t => {
242+
const url = 'https://www.adobe.com/'
243+
const html = '<link rel="icon" href="data:,">'
244+
const metascraper = createMetascraper()
245+
const metadata = await metascraper({ url, html })
246+
t.is(metadata.logo, 'https://www.adobe.com/favicon.ico')
247+
})
+15-15
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
'use strict'
22

3-
const { $jsonld, url: urlFn, toRule } = require('@metascraper/helpers')
3+
const { $jsonld, logo: logoFn, toRule } = require('@metascraper/helpers')
44
const { eq, get } = require('lodash')
55

66
const toLogoUrl = ($, propName) => {
@@ -13,26 +13,26 @@ const toLogoUrl = ($, propName) => {
1313
module.exports = ({ filter } = {}) => {
1414
const mapper = filter
1515
? async value => {
16-
const result = urlFn(value)
16+
const result = logoFn(value)
1717
return typeof result === 'string' ? await filter(result) : result
1818
}
19-
: urlFn
19+
: logoFn
2020

21-
const toUrl = toRule(mapper)
21+
const toLogo = toRule(mapper)
2222

2323
return {
2424
logo: [
25-
toUrl($ => $('meta[property="og:logo"]').attr('content')),
26-
toUrl($ => $('meta[itemprop="logo"]').attr('content')),
27-
toUrl($ => $('img[itemprop="logo"]').attr('src')),
28-
toUrl($ => toLogoUrl($, 'brand.logo')),
29-
toUrl($ => toLogoUrl($, 'organization.logo')),
30-
toUrl($ => toLogoUrl($, 'place.logo')),
31-
toUrl($ => toLogoUrl($, 'product.logo')),
32-
toUrl($ => toLogoUrl($, 'service.logo')),
33-
toUrl($ => toLogoUrl($, 'publisher.logo')),
34-
toUrl($ => toLogoUrl($, 'logo.url')),
35-
toUrl($ => toLogoUrl($, 'logo'))
25+
toLogo($ => $('meta[property="og:logo"]').attr('content')),
26+
toLogo($ => $('meta[itemprop="logo"]').attr('content')),
27+
toLogo($ => $('img[itemprop="logo"]').attr('src')),
28+
toLogo($ => toLogoUrl($, 'brand.logo')),
29+
toLogo($ => toLogoUrl($, 'organization.logo')),
30+
toLogo($ => toLogoUrl($, 'place.logo')),
31+
toLogo($ => toLogoUrl($, 'product.logo')),
32+
toLogo($ => toLogoUrl($, 'service.logo')),
33+
toLogo($ => toLogoUrl($, 'publisher.logo')),
34+
toLogo($ => toLogoUrl($, 'logo.url')),
35+
toLogo($ => toLogoUrl($, 'logo'))
3636
]
3737
}
3838
}

packages/metascraper-telegram/src/index.js

+3-1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ const {
44
author,
55
date,
66
image,
7+
logo,
78
memoizeOne,
89
parseUrl,
910
sanetizeUrl,
@@ -17,6 +18,7 @@ const got = require('got')
1718

1819
const toAuthor = toRule(author)
1920
const toImage = toRule(image)
21+
const toLogo = toRule(logo)
2022
const toDate = toRule(date)
2123

2224
const TELEGRAM_DOMAINS = ['telegram.me', 't.me']
@@ -48,7 +50,7 @@ module.exports = ({ gotOpts, keyvOpts } = {}) => {
4850

4951
const rules = {
5052
author: [toAuthor($ => $('meta[property="og:title"]').attr('content'))],
51-
logo: [toImage($ => $('meta[property="og:image"]').attr('content'))],
53+
logo: [toLogo($ => $('meta[property="og:image"]').attr('content'))],
5254
image: [
5355
toImage(
5456
loadIframe(($iframe, url) => {

0 commit comments

Comments
 (0)