Skip to content

Commit

Permalink
feat: add inline rules at the end (#163)
Browse files Browse the repository at this point in the history
  • Loading branch information
Kikobeats authored Apr 3, 2019
1 parent bd21875 commit 936749a
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 68 deletions.
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,9 @@ The HTML markup for extracting the content.

Type: `Array`

You can pass additional rules on execution time. These rules will be merged with your loaded rules.
You can pass additional rules to add on execution time.

These rules will be merged with your loaded [`rules`](#rules) at the beginning.

## Benchmark

Expand Down
24 changes: 14 additions & 10 deletions packages/metascraper/src/merge-rules.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,17 @@

const { cloneDeep, concat, first, findIndex, forEach, chain } = require('lodash')

module.exports = (rules, baseRules) => chain(rules)
.reduce((acc, rules) => {
forEach(rules, (rule, propName) => {
const index = findIndex(acc, item => first(item) === propName)
if (index !== -1) acc[index][1] = concat(acc[index][1], rule)
else acc.push([propName, rule])
})
return acc
}, cloneDeep(baseRules))
.value()
module.exports = (rules, baseRules) =>
chain(rules)
.reduce((acc, rules) => {
forEach(rules, (rule, propName) => {
// find the rules associated with `propName`
const index = findIndex(acc, item => first(item) === propName)
// if `propName` has more rule, add the new rule from the end
if (index !== -1) acc[index][1] = concat(rule, ...acc[index][1])
// otherwise, create an array of rules
else acc.push([propName, rule])
})
return acc
}, cloneDeep(baseRules))
.value()
62 changes: 5 additions & 57 deletions packages/metascraper/test/unit/merge-rules.js
Original file line number Diff line number Diff line change
Expand Up @@ -49,76 +49,24 @@ it('add a new rule for a prop that exists', async () => {
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta http-equiv="X-UA-Compatible" content="ie=edge">
<meta property="og:image" content="http://ia.media-imdb.com/images/rock.jpg" />
<title>Document</title>
</head>
<body>
<div class="logos">
<img class="logo" href="https://microlink.io/logo.png">
<img class="logo" href="https://microlink.io/logo.png">
<img class="logo" href="https://microlink.io/logo.png">
<img class="logo" href="https://microlink.io/logo.png">
</div>
<img class="main-logo" href="https://microlink.io/logo.png">
<img id="logo" src="https://microlink.io/logo.png">
<p>Hello World </p>
</body>
</html>
`

const rules = [
{
foo: [() => 'bar']
image: [({ htmlDom: $ }) => $('#logo').attr('src')]
}
]

const metascraper = require('../..')([
{
foo: [() => false, () => false, () => false]
}
])

const meta = await metascraper({ url, html, rules })
should(meta.foo).be.equal('bar')
})

it('rules are added from the end', async () => {
const url = 'https://microlink.io'

const html = `
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta http-equiv="X-UA-Compatible" content="ie=edge">
<title>Document</title>
</head>
<body>
<div class="logos">
<img class="logo" href="https://microlink.io/logo.png">
<img class="logo" href="https://microlink.io/logo.png">
<img class="logo" href="https://microlink.io/logo.png">
<img class="logo" href="https://microlink.io/logo.png">
</div>
<img class="main-logo" href="https://microlink.io/logo.png">
<p>Hello World </p>
</body>
</html>
`

const rules = [
{
foo: [() => 'bar']
}
]

const metascraper = require('../..')([
{
foo: [() => false, () => false, () => 'baz']
}
])
const metascraper = require('../..')([require('metascraper-image')()])

const meta = await metascraper({ url, html, rules })
should(meta.foo).be.equal('baz')
should(meta.image).be.equal('https://microlink.io/logo.png')
})

0 comments on commit 936749a

Please sign in to comment.