Skip to content

Commit

Permalink
fix: avoid use generic publisher (#141)
Browse files Browse the repository at this point in the history
* fix: avoid use generic publisher

* fix: move rules into favicon package

* fix: add keywords meta

* test: update snapshots

* test: add missing dependency

* test: update snapshots
  • Loading branch information
Kikobeats authored Dec 16, 2018
1 parent fe0c9a2 commit c13f833
Show file tree
Hide file tree
Showing 98 changed files with 225 additions and 84 deletions.
4 changes: 4 additions & 0 deletions packages/metascraper-audio/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@
"bugs": {
"url": "https://github.com/microlinkhq/metascraper/issues"
},
"keywords": [
"author",
"metascraper"
],
"dependencies": {
"@metascraper/helpers": "^4.8.3"
},
Expand Down
4 changes: 4 additions & 0 deletions packages/metascraper-author/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@
"bugs": {
"url": "https://github.com/microlinkhq/metascraper/issues"
},
"keywords": [
"author",
"metascraper"
],
"dependencies": {
"@metascraper/helpers": "^4.8.3",
"lodash": "~4.17.10"
Expand Down
1 change: 1 addition & 0 deletions packages/metascraper-clearbit-logo/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
},
"keywords": [
"clearbit",
"logo",
"metascraper"
],
"dependencies": {
Expand Down
4 changes: 4 additions & 0 deletions packages/metascraper-date/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@
"bugs": {
"url": "https://github.com/microlinkhq/metascraper/issues"
},
"keywords": [
"date",
"metascraper"
],
"dependencies": {
"@metascraper/helpers": "^4.8.3"
},
Expand Down
4 changes: 4 additions & 0 deletions packages/metascraper-description/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@
"bugs": {
"url": "https://github.com/microlinkhq/metascraper/issues"
},
"keywords": [
"description",
"metascraper"
],
"dependencies": {
"@metascraper/helpers": "^4.8.3"
},
Expand Down
8 changes: 6 additions & 2 deletions packages/metascraper-helpers/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@
"bugs": {
"url": "https://github.com/microlinkhq/metascraper/issues"
},
"keywords": [
"helpers",
"metascraper"
],
"dependencies": {
"audio-extensions": "0.0.0",
"chrono-node": "~1.3.5",
Expand Down Expand Up @@ -44,10 +48,10 @@
"scripts": {
"test": "NODE_PATH=.. TZ=UTC NODE_ENV=test nyc mocha test"
},
"license": "MIT",
"standard": {
"env": [
"mocha"
]
},
"license": "MIT"
}
}
4 changes: 4 additions & 0 deletions packages/metascraper-image/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@
"bugs": {
"url": "https://github.com/microlinkhq/metascraper/issues"
},
"keywords": [
"image",
"metascraper"
],
"dependencies": {
"@metascraper/helpers": "^4.8.3"
},
Expand Down
4 changes: 4 additions & 0 deletions packages/metascraper-lang-detector/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@
"bugs": {
"url": "https://github.com/microlinkhq/metascraper/issues"
},
"keywords": [
"lang",
"metascraper"
],
"dependencies": {
"@metascraper/helpers": "^4.8.3",
"franc": "~4.0.0",
Expand Down
10 changes: 1 addition & 9 deletions packages/metascraper-lang/__snapshots__/index.js.snap-shot
Original file line number Diff line number Diff line change
@@ -1,12 +1,4 @@
exports['html lang property 1'] = {
"lang": "pl",
"author": "Jakub Majmurek",
"title": "Churchill, bohater naszych czasów / Film / dwutygodnik.com",
"publisher": "Dwutygodnik",
"image": "http://www.dwutygodnik.com/public/media/article/image_full/7615.png",
"date": "2018-01-01T12:00:00.000Z",
"description": "Wysyp filmów o Churchillu w pobrexitowej Brytanii, wydaje się czymś zrozumiałym. Uosabia on ostatni moment prawdziwej wielkości Zjednoczonego Królestwa – wspomnienie tej historycznej chwili pozwala oswoić traumy i lęki",
"logo": "http://www.dwutygodnik.com/public/frontend/image_v4/favicon.ico",
"url": "http://www.dwutygodnik.com/artykul/7615-churchill-bohater-naszych-czasow.html"
"lang": "pl"
}

4 changes: 4 additions & 0 deletions packages/metascraper-lang/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@
"bugs": {
"url": "https://github.com/microlinkhq/metascraper/issues"
},
"keywords": [
"lang",
"metascraper"
],
"dependencies": {
"@metascraper/helpers": "^4.8.3"
},
Expand Down
13 changes: 1 addition & 12 deletions packages/metascraper-lang/test/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,7 @@ const { promisify } = require('util')
const { resolve } = require('path')
const fs = require('fs')

const metascraper = require('metascraper')([
require('metascraper-amazon')(),
require('metascraper-author')(),
require('metascraper-date')(),
require('metascraper-description')(),
require('metascraper-image')(),
require('..')(),
require('metascraper-logo')(),
require('metascraper-publisher')(),
require('metascraper-title')(),
require('metascraper-url')()
])
const metascraper = require('metascraper')([require('..')()])

const readFile = promisify(fs.readFile)

Expand Down
69 changes: 65 additions & 4 deletions packages/metascraper-logo-favicon/index.js
Original file line number Diff line number Diff line change
@@ -1,12 +1,73 @@
'use strict'

const { flow, first, toNumber, split, chain, concat } = require('lodash')
const { resolve: resolveUrl, URL } = require('url')
const { url: urlFn } = require('@metascraper/helpers')
const got = require('got')

const getFaviconUrl = url => {
const {origin} = new URL(url)
return resolveUrl(origin, 'favicon.ico')
const getSize = flow([str => split(str, 'x'), first, toNumber])

const getDomNodeSizes = (domNodes, attr) =>
chain(domNodes)
.map(({ attribs }) => ({
size: getSize(attribs.sizes),
link: attribs[attr]
}))
.value()

const getSizes = ($, collection) =>
chain(collection)
.reduce((acc, { tag, attr }) => {
const domNodes = $(tag).get()
const selectors = getDomNodeSizes(domNodes, attr)
return concat(acc, selectors)
}, [])
.sortBy(({ size }) => -size)
.value()

const sizeSelectors = [
{ tag: 'link[rel="apple-touch-icon"]', attr: 'href' },
{ tag: 'link[rel="apple-touch-icon-precomposed"]', attr: 'href' },
{ tag: 'meta[name="msapplication-TileImage"]', attr: 'content' },
{ tag: 'link[rel="icon"]', attr: 'href' },
{ tag: 'link[rel="shortcut icon"]', attr: 'href' }
]

/**
* Wrap a rule with validation and formatting logic.
*
* @param {Function} rule
* @return {Function} wrapped
*/

const wrap = rule => ({ htmlDom, url }) => {
const value = rule(htmlDom)
return urlFn(value, { url })
}

/**
* Rules.
*/
module.exports = () => ({
logo: [({ htmlDom: $, meta, url }) => getFaviconUrl(url)]
logo: [
wrap($ => {
const sizes = getSizes($, sizeSelectors)
const size = chain(sizes)
.first()
.get('link')
.value()
return size
}),
async ({ url }) => {
const { origin } = new URL(url)
const logoUrl = resolveUrl(origin, 'favicon.ico')

try {
await got.head(logoUrl)
return logoUrl
} catch (err) {
return null
}
}
]
})
9 changes: 7 additions & 2 deletions packages/metascraper-logo-favicon/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,16 @@
"url": "https://github.com/microlinkhq/metascraper-logo-favicon/issues"
},
"keywords": [
"clearbit",
"favicon",
"logo",
"metascraper"
],
"dependencies": {
"@metascraper/helpers": "~4.8.3",
"got": "~9.4.0",
"lodash": "~4.17.11"
},
"devDependencies": {
"lodash": "latest",
"mocha": "latest",
"nyc": "latest",
"should": "latest",
Expand Down
40 changes: 1 addition & 39 deletions packages/metascraper-logo/index.js
Original file line number Diff line number Diff line change
@@ -1,36 +1,7 @@
'use strict'

const { flow, chain, first, concat, toNumber, split } = require('lodash')
const { url: urlFn } = require('@metascraper/helpers')

const getSize = flow([str => split(str, 'x'), first, toNumber])

const getDomNodeSizes = (domNodes, attr) =>
chain(domNodes)
.map(({ attribs }) => ({
size: getSize(attribs.sizes),
link: attribs[attr]
}))
.value()

const getSizes = ($, collection) =>
chain(collection)
.reduce((acc, { tag, attr }) => {
const domNodes = $(tag).get()
const selectors = getDomNodeSizes(domNodes, attr)
return concat(acc, selectors)
}, [])
.sortBy(({ size }) => -size)
.value()

const sizeSelectors = [
{ tag: 'link[rel="apple-touch-icon"]', attr: 'href' },
{ tag: 'link[rel="apple-touch-icon-precomposed"]', attr: 'href' },
{ tag: 'meta[name="msapplication-TileImage"]', attr: 'content' },
{ tag: 'link[rel="icon"]', attr: 'href' },
{ tag: 'link[rel="shortcut icon"]', attr: 'href' }
]

/**
* Wrap a rule with validation and formatting logic.
*
Expand All @@ -46,19 +17,10 @@ const wrap = rule => ({ htmlDom, url }) => {
/**
* Rules.
*/

module.exports = () => ({
logo: [
wrap($ => $('meta[property="og:logo"]').attr('content')),
wrap($ => $('meta[itemprop="logo"]').attr('content')),
wrap($ => $('img[itemprop="logo"]').attr('src')),
wrap($ => {
const sizes = getSizes($, sizeSelectors)
const size = chain(sizes)
.first()
.get('link')
.value()
return size
})
wrap($ => $('img[itemprop="logo"]').attr('src'))
]
})
8 changes: 5 additions & 3 deletions packages/metascraper-logo/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,12 @@
"bugs": {
"url": "https://github.com/microlinkhq/metascraper/issues"
},
"keywords": [
"logo",
"metascraper"
],
"dependencies": {
"@metascraper/helpers": "^4.8.3",
"got": "~9.4.0",
"lodash": "~4.17.10"
"@metascraper/helpers": "^4.8.3"
},
"devDependencies": {
"standard": "11"
Expand Down
5 changes: 5 additions & 0 deletions packages/metascraper-media-provider/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@
"bugs": {
"url": "https://github.com/microlinkhq/metascraper/issues"
},
"keywords": [
"media",
"metascraper",
"youtube-dl"
],
"dependencies": {
"@metascraper/helpers": "^4.8.3",
"got": "~9.4.0",
Expand Down
4 changes: 2 additions & 2 deletions packages/metascraper-media-provider/src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,8 @@ const getAudio = data => getAudioUrls(data, [hasAudio, isHttps])
const getAuthor = ({ uploader, creator, uploader_id: uploaderId }) =>
find([creator, uploader, uploaderId], str => authorFn(str))

const getPublisher = ({ extractor_key: extractorKey }) =>
publisher(extractorKey)
const getPublisher = ({ extractor, extractor_key: extractorKey }) =>
extractor !== 'generic' && publisher(extractorKey)

const getLang = ({ language, http_headers: headers = {} }) =>
lang(language || headers['Accept-Language'])
Expand Down
4 changes: 4 additions & 0 deletions packages/metascraper-publisher/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@
"bugs": {
"url": "https://github.com/microlinkhq/metascraper/issues"
},
"keywords": [
"metascraper",
"publisher"
],
"dependencies": {
"@metascraper/helpers": "^4.8.3"
},
Expand Down
5 changes: 3 additions & 2 deletions packages/metascraper-soundcloud/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,9 @@
"url": "https://github.com/microlinkhq/metascraper-soundcloud/issues"
},
"keywords": [
"clearbit",
"metascraper"
"audio",
"metascraper",
"soundcloud"
],
"dependencies": {
"@metascraper/helpers": "^4.8.3"
Expand Down
1 change: 1 addition & 0 deletions packages/metascraper-soundcloud/test/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ const metascraper = require('metascraper')([
require('metascraper-image')(),
require('metascraper-lang')(),
require('metascraper-logo')(),
require('metascraper-logo-favicon')(),
require('metascraper-publisher')(),
require('metascraper-title')(),
require('metascraper-url')()
Expand Down
4 changes: 4 additions & 0 deletions packages/metascraper-title/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@
"bugs": {
"url": "https://github.com/microlinkhq/metascraper/issues"
},
"keywords": [
"metascraper",
"title"
],
"dependencies": {
"@metascraper/helpers": "^4.8.3",
"lodash": "~4.17.10"
Expand Down
4 changes: 4 additions & 0 deletions packages/metascraper-url/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@
"bugs": {
"url": "https://github.com/microlinkhq/metascraper/issues"
},
"keywords": [
"metascraper",
"url"
],
"dependencies": {
"@metascraper/helpers": "^4.8.3"
},
Expand Down
Loading

0 comments on commit c13f833

Please sign in to comment.