Skip to content

Commit

Permalink
Add get language from amazon urls
Browse files Browse the repository at this point in the history
  • Loading branch information
Kikobeats committed Dec 30, 2017
1 parent 5ec9472 commit f6de069
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 0 deletions.
4 changes: 4 additions & 0 deletions packages/metascraper-amazon/__snapshots__/index.js.snap-shot
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
exports['product url 1'] = {
"lang": "en",
"author": "Mat Follas",
"title": "Vegetable Perfection: 100 delicious recipes for roots, bulbs, shoots and stems",
"publisher": "Amazon",
Expand All @@ -9,6 +10,7 @@ exports['product url 1'] = {
}

exports['ansi url 1'] = {
"lang": "en",
"author": "SainSmart",
"title": "SainSmart 4-Channel Relay Module",
"publisher": "Amazon",
Expand All @@ -19,6 +21,7 @@ exports['ansi url 1'] = {
}

exports['product url 2'] = {
"lang": "en",
"author": "David Baldacci",
"title": "The Whole Truth (A. Shaw Book 1) Kindle Edition",
"publisher": "Amazon",
Expand All @@ -29,6 +32,7 @@ exports['product url 2'] = {
}

exports['product url 3'] = {
"lang": "es",
"author": "Cecotec",
"title": "Robot aspirador Conga Excellence 990 de Cecotec. Friega el suelo, Barre, aspira y pasa la mopa. Programable 24h. 5 modos de limpieza. Base de carga. Silencioso. Potente. Filtro HEPA.",
"publisher": "Amazon",
Expand Down
23 changes: 23 additions & 0 deletions packages/metascraper-amazon/index.js
Original file line number Diff line number Diff line change
@@ -1,18 +1,41 @@
'use strict'

const { titleize, isUrl } = require('@metascraper/helpers')
const { URL } = require('url')

const REGEX_AMAZON_URL = /https?:\/\/(.*amazon\..*\/.*|.*amzn\..*\/.*|.*a\.co\/.*)/i
const isAmazonUrl = url => REGEX_AMAZON_URL.test(url)

const SUFFIX_LANGUAGES = {
'ca': 'en',
'cn': 'zh',
'co.jp': 'ja',
'co.uk': 'en',
'com.mx': 'es',
'com': 'en',
'de': 'de',
'es': 'es',
'fr': 'fr',
'in': 'en',
'it': 'it'
}

const wrap = rule => ({ htmlDom, url }) => isAmazonUrl(url) && rule(htmlDom)

const wrapUrl = rule => ({ htmlDom, url }) => {
const value = wrap(rule)({htmlDom, url})
return isUrl(value) && value
}

const getDomainLanguage = url => {
const {host} = new URL(url)
const suffix = host.replace('www.', '').split('.')
suffix.shift()
return SUFFIX_LANGUAGES[suffix.join('.')]
}

module.exports = () => ({
lang: [({ htmlDom: $, meta, url }) => getDomainLanguage(url)],
author: [
wrap($ => titleize($('.contributorNameID').text())),
wrap($ => titleize($('#bylineInfo').text())),
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
exports['create an absolute faivcon url if the logo is not present 1'] = {
"lang": "en",
"author": "Mat Follas",
"title": "Vegetable Perfection: 100 delicious recipes for roots, bulbs, shoots and stems",
"publisher": "Amazon",
Expand Down

0 comments on commit f6de069

Please sign in to comment.