Skip to content

Commit

Permalink
feat: add test function to rules bundle (#189)
Browse files Browse the repository at this point in the history
* feat: add test function to rules bundle

closes #184

* build: update dependencies

* fix: associate test with rules

* refactor: tweaks

* refactor: unify noopTest

* fix: typo
  • Loading branch information
Kikobeats authored Jul 10, 2019
1 parent cc6eac9 commit 7c209e9
Show file tree
Hide file tree
Showing 24 changed files with 191 additions and 117 deletions.
27 changes: 21 additions & 6 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ Rules bundles are a collection of HTML selectors around a determinate property.

## Writing Your Own Rules

Just you need to declare your rules using the following interface:
### Get value from HTML

Every rule receives `htmlDom` (*cheerio*) and `url` as parameters inside an object:

```js
'use strict'
Expand All @@ -18,19 +20,32 @@ Just you need to declare your rules using the following interface:
*
**/
module.exports = () => {
return ({
const rules = {
logo: [
// They receive as parameter:
// - `htmlDom`: the cheerio HTML instance.
// - `url`: The input URL used for extact the content.
({ htmlDom: $, url }) => wrap($ => $('meta[property="og:logo"]').attr('content')),
({ htmlDom: $, url }) => wrap($ => $('meta[itemprop="logo"]').attr('content'))
({ htmlDom: $, url }) => $('meta[property="og:logo"]').attr('content'),
({ htmlDom: $, url }) => $('meta[itemprop="logo"]').attr('content')
]
})
}
return rules
}
```

The order of rules are loaded are important: Just the first rule that returns a truthy value will be used. The rest rules after that will be not invoked.
You can declare any logic you need in order to determinate the output.

A set of rules under the same namespace runs on series and only the value returned by the first rule that output a [truthy](https://developer.mozilla.org/en-US/docs/Glossary/Falsy) value will be taken. So remember, the order is important!.

### Defining `test` function

You can associate a `test` function with your rule bundle:

```js
rules.test = ({url}) => getVideoInfo(url).service === 'youtube'))
```

The `test` function will receive the same arguments than a rule. This is useful for just skip all the rules into that doesn't target an specific URL.

## Testing your Rules

Expand Down
47 changes: 25 additions & 22 deletions packages/metascraper-amazon/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ const {
$filter,
title,
author,
createWard,
createWrap,
lang
} = require('@metascraper/helpers')
Expand Down Expand Up @@ -37,24 +36,28 @@ const wrapUrl = createWrap(urlFn)
const wrapAuthor = createWrap(author)
const wrapTitle = createWrap(title, { removeSeparator: false })
const wrapLang = createWrap(lang)
const ward = createWard(({ url }) => isValidUrl(url))

module.exports = () => ({
lang: [ward(wrapLang(($, url) => getDomainLanguage(url)))],
author: [
ward(wrapAuthor($ => $('.contributorNameID').text())),
ward(wrapAuthor($ => $('#bylineInfo').text())),
ward(wrapAuthor($ => $('#brand').text()))
],
title: [
ward(wrapTitle($ => $('#productTitle').text())),
ward(wrapTitle($ => $('#btAsinTitle').text())),
ward(wrapTitle($ => $filter($, $('h1.a-size-large')))),
ward(wrapTitle($ => $('#item_name').text()))
],
publisher: [ward(() => 'Amazon')],
image: [
ward(wrapUrl($ => $('.a-dynamic-image').attr('data-old-hires'))),
ward(wrapUrl($ => $('.a-dynamic-image').attr('src')))
]
})

module.exports = () => {
const rules = {
lang: [wrapLang(($, url) => getDomainLanguage(url))],
author: [
wrapAuthor($ => $('.contributorNameID').text()),
wrapAuthor($ => $('#bylineInfo').text()),
wrapAuthor($ => $('#brand').text())
],
title: [
wrapTitle($ => $('#productTitle').text()),
wrapTitle($ => $('#btAsinTitle').text()),
wrapTitle($ => $filter($, $('h1.a-size-large'))),
wrapTitle($ => $('#item_name').text())
],
publisher: [() => 'Amazon'],
image: [
wrapUrl($ => $('.a-dynamic-image').attr('data-old-hires')),
wrapUrl($ => $('.a-dynamic-image').attr('src'))
]
}

rules.test = ({ url }) => isValidUrl(url)
return rules
}
2 changes: 1 addition & 1 deletion packages/metascraper-amazon/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
],
"dependencies": {
"@metascraper/helpers": "^5.5.4",
"memoize-one": "~5.0.4",
"memoize-one": "~5.0.5",
"tldts": "~5.3.0"
},
"devDependencies": {
Expand Down
2 changes: 1 addition & 1 deletion packages/metascraper-author/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
],
"dependencies": {
"@metascraper/helpers": "^5.5.4",
"lodash": "~4.17.11"
"lodash": "~4.17.12"
},
"devDependencies": {
"standard": "latest"
Expand Down
10 changes: 4 additions & 6 deletions packages/metascraper-clearbit/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,7 @@ const clearbit = memoizeOne(async ({ url }) => {

const getClearbit = createValidator(clearbit)

module.exports = () => {
return {
logo: getClearbit({ from: 'logo' }),
publisher: getClearbit({ from: 'name', to: 'publisher' })
}
}
module.exports = () => ({
logo: getClearbit({ from: 'logo' }),
publisher: getClearbit({ from: 'name', to: 'publisher' })
})
2 changes: 1 addition & 1 deletion packages/metascraper-clearbit/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
"dependencies": {
"@metascraper/helpers": "^5.5.4",
"got": "~9.6.0",
"memoize-one": "~5.0.4",
"memoize-one": "~5.0.5",
"tldts": "~5.3.0"
},
"devDependencies": {
Expand Down
9 changes: 1 addition & 8 deletions packages/metascraper-helpers/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -285,12 +285,6 @@ const createWrap = (fn, opts) => rule => ({ htmlDom, url }) => {
return fn(value, opts)
}

/**
* Ward a rule only if `validator` returns `true`.
*/
const createWard = validator => fn => args =>
validator(args) ? fn(args) : null

module.exports = {
$filter,
$jsonld,
Expand Down Expand Up @@ -327,6 +321,5 @@ module.exports = {
video,
validator,
createValidator,
createWrap,
createWard
createWrap
}
2 changes: 1 addition & 1 deletion packages/metascraper-helpers/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
"is-uri": "~1.2.0",
"iso-639-3": "~1.2.0",
"isostring": "0.0.1",
"lodash": "~4.17.11",
"lodash": "~4.17.12",
"mem": "~5.1.1",
"mime-types": "~2.1.24",
"normalize-url": "~4.3.0",
Expand Down
2 changes: 1 addition & 1 deletion packages/metascraper-logo-favicon/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
"dependencies": {
"@metascraper/helpers": "^5.5.4",
"got": "~9.6.0",
"lodash": "~4.17.11"
"lodash": "~4.17.12"
},
"devDependencies": {
"coveralls": "latest",
Expand Down
4 changes: 2 additions & 2 deletions packages/metascraper-media-provider/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@
"@microlink/youtube-dl": "~2.0.0",
"debug": "~4.1.1",
"got": "~9.6.0",
"lodash": "~4.17.11",
"lodash": "~4.17.12",
"luminati-tunnel": "~1.3.0",
"memoize-one": "~5.0.4"
"memoize-one": "~5.0.5"
},
"devDependencies": {
"coveralls": "latest",
Expand Down
2 changes: 1 addition & 1 deletion packages/metascraper-readability/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
"dependencies": {
"@metascraper/helpers": "^5.5.4",
"jsdom": "~15.1.1",
"memoize-one": "~5.0.4",
"memoize-one": "~5.0.5",
"readability": "github:mozilla/readability"
},
"devDependencies": {
Expand Down
21 changes: 12 additions & 9 deletions packages/metascraper-soundcloud/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,24 @@ const {
$filter,
author,
description,
createWrap,
createWard
createWrap
} = require('@metascraper/helpers')
const memoizeOne = require('memoize-one')
const { getDomain } = require('tldts')

const isValidUrl = memoizeOne(url => getDomain(url) === 'soundcloud.com')

const ward = createWard(({ url }) => isValidUrl(url))
const wrapDescription = createWrap(description)
const wrapAuthor = createWrap(author)

module.exports = () => ({
author: [ward(wrapAuthor($ => $filter($, $('.soundTitle__username'))))],
description: [
ward(wrapDescription($ => $filter($, $('.soundTitle__description'))))
]
})
module.exports = () => {
const rules = {
author: [wrapAuthor($ => $filter($, $('.soundTitle__username')))],
description: [
wrapDescription($ => $filter($, $('.soundTitle__description')))
]
}

rules.test = ({ url }) => isValidUrl(url)
return rules
}
2 changes: 1 addition & 1 deletion packages/metascraper-soundcloud/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
],
"dependencies": {
"@metascraper/helpers": "^5.5.4",
"memoize-one": "~5.0.4",
"memoize-one": "~5.0.5",
"tldts": "~5.3.0"
},
"devDependencies": {
Expand Down
2 changes: 1 addition & 1 deletion packages/metascraper-title/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
],
"dependencies": {
"@metascraper/helpers": "^5.5.4",
"lodash": "~4.17.11"
"lodash": "~4.17.12"
},
"devDependencies": {
"standard": "latest"
Expand Down
28 changes: 14 additions & 14 deletions packages/metascraper-uol/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@ const {
$jsonld,
title,
description,
createWrap,
createWard
createWrap
} = require('@metascraper/helpers')
const memoizeOne = require('memoize-one')
const { getDomain } = require('tldts')
Expand All @@ -16,20 +15,21 @@ const isValidUrl = memoizeOne(url =>
ROOT_DOMAINS.some(domain => getDomain(url) === domain)
)

const ward = createWard(({ url }) => isValidUrl(url))

const wrapTitle = createWrap(title)
const wrapDescription = createWrap(description)

module.exports = () => ({
title: [
ward(wrapTitle(($, url) => $jsonld('headline')($, url))),
ward(wrapTitle(($, url) => $jsonld('name')($, url))),
ward(wrapTitle($ => $('title').text()))
],
description: [
ward(wrapDescription(($, url) => $jsonld('description')($, url)))
]
})
module.exports = () => {
const rules = {
title: [
wrapTitle(($, url) => $jsonld('headline')($, url)),
wrapTitle(($, url) => $jsonld('name')($, url)),
wrapTitle($ => $('title').text())
],
description: [wrapDescription(($, url) => $jsonld('description')($, url))]
}

rules.test = ({ url }) => isValidUrl(url)
return rules
}

module.exports.isValidUrl = isValidUrl
2 changes: 1 addition & 1 deletion packages/metascraper-uol/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
],
"dependencies": {
"@metascraper/helpers": "^5.5.4",
"memoize-one": "~5.0.4",
"memoize-one": "~5.0.5",
"tldts": "~5.3.0"
},
"devDependencies": {
Expand Down
2 changes: 1 addition & 1 deletion packages/metascraper-video/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
],
"dependencies": {
"@metascraper/helpers": "^5.5.4",
"lodash": "~4.17.11"
"lodash": "~4.17.12"
},
"devDependencies": {
"coveralls": "latest",
Expand Down
45 changes: 24 additions & 21 deletions packages/metascraper-youtube/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@ const {
$filter,
author,
description,
createWrap,
createWard
createWrap
} = require('@metascraper/helpers')

const isReachable = require('is-reachable')
Expand Down Expand Up @@ -34,24 +33,28 @@ const wrapDescription = createWrap(description)

const getVideoInfo = memoizeOne(getVideoId)

const isValidUrl = url => getVideoInfo(url).service === 'youtube'

const ward = createWard(({ url }) => isValidUrl(url))

module.exports = () => ({
author: [
ward(wrapAuthor($ => $('#owner-name').text())),
ward(wrapAuthor($ => $('#channel-title').text())),
ward(wrapAuthor($ => $filter($, $('[class*="user-info" i]'))))
],
description: [ward(wrapDescription($ => $('#description').text()))],
publisher: [ward(() => 'YouTube')],
image: [
ward(({ htmlDom, url }) => {
const { id } = getVideoId(url)
return id && getThumbnailUrl(id)
})
]
})
const isValidUrl = memoizeOne(url => getVideoInfo(url).service === 'youtube')

module.exports = () => {
const rules = {
author: [
wrapAuthor($ => $('#owner-name').text()),
wrapAuthor($ => $('#channel-title').text()),
wrapAuthor($ => $filter($, $('[class*="user-info" i]')))
],
description: [wrapDescription($ => $('#description').text())],
publisher: [() => 'YouTube'],
image: [
({ htmlDom, url }) => {
const { id } = getVideoId(url)
return id && getThumbnailUrl(id)
}
]
}

rules.test = ({ url }) => isValidUrl(url)

return rules
}

module.exports.isValidUrl = isValidUrl
2 changes: 1 addition & 1 deletion packages/metascraper-youtube/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
"@metascraper/helpers": "^5.5.4",
"get-video-id": "~3.1.3",
"is-reachable": "~3.1.0",
"memoize-one": "~5.0.4",
"memoize-one": "~5.0.5",
"p-locate": "~4.1.0"
},
"devDependencies": {
Expand Down
2 changes: 1 addition & 1 deletion packages/metascraper/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
"@metascraper/helpers": "^5.5.4",
"cheerio": "~1.0.0-rc.2",
"cheerio-advanced-selectors": "~2.0.1",
"lodash": "~4.17.11",
"lodash": "~4.17.12",
"whoops": "~4.0.2",
"xss": "~1.0.6"
},
Expand Down
Loading

0 comments on commit 7c209e9

Please sign in to comment.