Skip to content

Commit

Permalink
Move helpers method into helpers package
Browse files Browse the repository at this point in the history
  • Loading branch information
Kikobeats committed Sep 1, 2018
1 parent 9f09429 commit a807b70
Show file tree
Hide file tree
Showing 36 changed files with 2,351 additions and 1,054 deletions.
43 changes: 22 additions & 21 deletions packages/metascraper-amazon/index.js
Original file line number Diff line number Diff line change
@@ -1,47 +1,48 @@
'use strict'

const { getUrl, getValue, titleize, isUrl } = require('@metascraper/helpers')
const { url: urlFn, $filter, titleize } = require('@metascraper/helpers')
const { URL } = require('url')
const { chain } = require('lodash')

const REGEX_AMAZON_URL = /https?:\/\/(.*amazon\..*\/.*|.*amzn\..*\/.*|.*a\.co\/.*)/i
const isAmazonUrl = url => REGEX_AMAZON_URL.test(url)

const SUFFIX_LANGUAGES = {
'ca': 'en',
'cn': 'zh',
ca: 'en',
cn: 'zh',
'co.jp': 'ja',
'co.uk': 'en',
'com.mx': 'es',
'com': 'en',
'de': 'de',
'es': 'es',
'fr': 'fr',
'in': 'en',
'it': 'it'
com: 'en',
de: 'de',
es: 'es',
fr: 'fr',
in: 'en',
it: 'it'
}

const getSuffix = host => chain(host)
.replace('www.', '')
.split('.')
.tail()
.join('.')
.value()
const getSuffix = host =>
chain(host)
.replace('www.', '')
.split('.')
.tail()
.join('.')
.value()

const getDomainLanguage = url => (
SUFFIX_LANGUAGES[getSuffix(new URL(url).host)]
)
const getDomainLanguage = url => SUFFIX_LANGUAGES[getSuffix(new URL(url).host)]

const createWrap = fn => rule => ({ htmlDom, url }) => {
const value = isAmazonUrl(url) && rule(htmlDom)
return fn(url, value)
}

const wrap = createWrap((url, value) => value)
const wrapUrl = createWrap((url, value) => isUrl(value) && getUrl(url, value))
const wrapUrl = createWrap((url, value) => urlFn(value, { url }))

module.exports = () => ({
lang: [({ htmlDom: $, meta, url }) => isAmazonUrl(url) && getDomainLanguage(url)],
lang: [
({ htmlDom: $, meta, url }) => isAmazonUrl(url) && getDomainLanguage(url)
],
author: [
wrap($ => titleize($('.contributorNameID').text())),
wrap($ => titleize($('#bylineInfo').text())),
Expand All @@ -50,7 +51,7 @@ module.exports = () => ({
title: [
wrap($ => titleize($('#productTitle').text())),
wrap($ => titleize($('#btAsinTitle').text())),
wrap($ => titleize(getValue($, $('h1.a-size-large')))),
wrap($ => titleize($filter($, $('h1.a-size-large')))),
wrap($ => titleize($('#item_name').text()))
],
publisher: [wrap($ => 'Amazon')],
Expand Down
6 changes: 3 additions & 3 deletions packages/metascraper-amazon/test/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ const fs = require('fs')
const readFile = promisify(fs.readFile)

const metascraper = require('metascraper')([
require('metascraper-amazon')(),
require('..')(),
require('metascraper-author')(),
require('metascraper-date')(),
require('metascraper-description')(),
Expand All @@ -30,8 +30,8 @@ describe('metascraper-amazon', () => {
)
const url =
'https://www.amazon.co.uk/Vegetable-Perfection-tasty-recipes-shoots/dp/1849757097/ref=asap_bc?ie=UTF8'
const meta = omit(await metascraper({ html, url }), ['date'])
snapshot(meta)
const metadata = omit(await metascraper({ html, url }), ['date'])
snapshot(metadata)
})
})

Expand Down
31 changes: 11 additions & 20 deletions packages/metascraper-author/index.js
Original file line number Diff line number Diff line change
@@ -1,16 +1,9 @@
'use strict'

const { getValue, isUrl, titleize } = require('@metascraper/helpers')
const { isString } = require('lodash')
const { $filter, author } = require('@metascraper/helpers')

const REGEX_STRICT = /^\S+\s+\S+/

const validator = value => (
isString(value) &&
!isUrl(value, {relative: false}) &&
titleize(value, {removeBy: true})
)

/**
* Wrap a rule with validation and formatting logic.
*
Expand All @@ -20,7 +13,7 @@ const validator = value => (

const wrap = rule => ({ htmlDom }) => {
const value = rule(htmlDom)
return validator(value)
return author(value)
}

/**
Expand All @@ -44,16 +37,14 @@ module.exports = () => ({
wrap($ => $('meta[name="author"]').attr('content')),
wrap($ => $('meta[property="author"]').attr('content')),
wrap($ => $('meta[property="article:author"]').attr('content')),
wrap($ => getValue($, $('[itemprop*="author"] [itemprop="name"]'))),
wrap($ => getValue($, $('[itemprop*="author"]'))),
wrap($ => getValue($, $('[rel="author"]'))),
strict(wrap($ => getValue($, $('a[class*="author"]')))),
strict(wrap($ => getValue($, $('[class*="author"] a')))),
strict(wrap($ => getValue($, $('a[href*="/author/"]')))),
wrap($ => getValue($, $('a[class*="screenname"]'))),
strict(wrap($ => getValue($, $('[class*="author"]')))),
strict(wrap($ => getValue($, $('[class*="byline"]'))))
wrap($ => $filter($, $('[itemprop*="author"] [itemprop="name"]'))),
wrap($ => $filter($, $('[itemprop*="author"]'))),
wrap($ => $filter($, $('[rel="author"]'))),
strict(wrap($ => $filter($, $('a[class*="author"]')))),
strict(wrap($ => $filter($, $('[class*="author"] a')))),
strict(wrap($ => $filter($, $('a[href*="/author/"]')))),
wrap($ => $filter($, $('a[class*="screenname"]'))),
strict(wrap($ => $filter($, $('[class*="author"]')))),
strict(wrap($ => $filter($, $('[class*="byline"]'))))
]
})

module.exports.validator = validator
25 changes: 2 additions & 23 deletions packages/metascraper-date/index.js
Original file line number Diff line number Diff line change
@@ -1,25 +1,6 @@
'use strict'

const chrono = require('chrono-node')
const isIso = require('isostring')

const validator = value => {
if (!value) return false

// remove whitespace for easier parsing
value = value.trim()

// convert isodates to restringify, because sometimes they are truncated
if (isIso(value)) return new Date(value).toISOString()

// try to parse with the built-in date parser
const native = new Date(value)
if (!isNaN(native.getTime())) return native.toISOString()

// try to parse a complex date string
const parsed = chrono.parseDate(value)
if (parsed) return parsed.toISOString()
}
const { date } = require('@metascraper/helpers')

/**
* Wrap a rule with validation and formatting logic.
Expand All @@ -30,7 +11,7 @@ const validator = value => {

const wrap = rule => ({ htmlDom }) => {
const value = rule(htmlDom)
return validator(value)
return date(value)
}

/**
Expand Down Expand Up @@ -68,5 +49,3 @@ module.exports = () => ({
wrap($ => $('[class*="time"]').text())
]
})

module.exports.validator = validator
3 changes: 1 addition & 2 deletions packages/metascraper-date/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@
"url": "https://github.com/microlinkhq/metascraper/issues"
},
"dependencies": {
"chrono-node": "~1.3.5",
"isostring": "0.0.1"
"@metascraper/helpers": "^4.0.1"
},
"devDependencies": {
"standard": "latest"
Expand Down
20 changes: 4 additions & 16 deletions packages/metascraper-description/index.js
Original file line number Diff line number Diff line change
@@ -1,16 +1,6 @@
'use strict'

const { getValue, titleize } = require('@metascraper/helpers')
const { isString } = require('lodash')

const REGEX_LOCATION = /^[A-Z\s]+\s+[-—–]\s+/

const removeLocation = value => value.replace(REGEX_LOCATION, '')

const validator = value => (
isString(value) &&
titleize(removeLocation(value), { capitalize: false })
)
const { $filter, description } = require('@metascraper/helpers')

/**
* Wrap a rule with validation and formatting logic.
Expand All @@ -21,7 +11,7 @@ const validator = value => (

const wrap = rule => ({ htmlDom }) => {
const value = rule(htmlDom)
return validator(value)
return description(value)
}

/**
Expand All @@ -35,9 +25,7 @@ module.exports = () => ({
wrap($ => $('meta[name="description"]').attr('content')),
wrap($ => $('meta[itemprop="description"]').attr('content')),
wrap($ => $('#description').text()),
wrap($ => getValue($, $('[class*="content"] > p'))),
wrap($ => getValue($, $('[class*="content"] p')))
wrap($ => $filter($, $('[class*="content"] > p'))),
wrap($ => $filter($, $('[class*="content"] p')))
]
})

module.exports.validator = validator
3 changes: 1 addition & 2 deletions packages/metascraper-description/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@
"url": "https://github.com/microlinkhq/metascraper/issues"
},
"dependencies": {
"@metascraper/helpers": "^4.0.1",
"lodash": "~4.17.10"
"@metascraper/helpers": "^4.0.1"
},
"devDependencies": {
"standard": "latest"
Expand Down
Loading

0 comments on commit a807b70

Please sign in to comment.