Skip to content

Commit

Permalink
Add html sanitization
Browse files Browse the repository at this point in the history
Remove unnecessary rules
  • Loading branch information
Kikobeats committed Jul 1, 2017
1 parent 5bf8d93 commit 818a19b
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 10 deletions.
9 changes: 4 additions & 5 deletions index.js
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
'use strict'

const rules = require('req-all')('./src/rules')
const reduce = require('lodash.reduce')
const cheerio = require('cheerio')

const rules = require('req-all')('./src/rules')
const loadHtml = require('./src/html')

const isValid = result => result !== null && result !== undefined && result !== ''

Expand All @@ -18,9 +19,7 @@ const getValue = ($, conditions) => {
}

module.exports = rawHtml => {
const html = cheerio.load(rawHtml, {
lowerCaseAttributeNames: true
})
const html = loadHtml(rawHtml)

return reduce(rules, (acc, conditions, ruleName) => {
const value = getValue(html, conditions)
Expand Down
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
"lodash.reduce": "~4.6.0",
"normalize-url": "~1.9.1",
"req-all": "~1.0.0",
"sanitize-html": "~1.14.1",
"to-title-case": "~1.0.0",
"url-regex": "~4.1.1"
},
Expand Down
23 changes: 23 additions & 0 deletions src/html/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
'use strict'

const sanitizeHtml = require('sanitize-html')
const flow = require('lodash.flow')
const cheerio = require('cheerio')

const sanitize = html => sanitizeHtml(html, {
allowedTags: false,
allowedAttributes: false,
transformTags: {
meta: (tagName, attribs) => {
if (attribs.name) attribs.name = attribs.name.toLowerCase()
return {tagName, attribs}
}
}
})

const load = cheerio.load.bind(cheerio)

module.exports = flow([
sanitize,
load
])
5 changes: 0 additions & 5 deletions src/rules/date.js
Original file line number Diff line number Diff line change
Expand Up @@ -36,16 +36,11 @@ const wrap = rule => $ => {
module.exports = [
wrap($ => $('meta[property="article:published_time"]').attr('content')),
wrap($ => $('meta[name="dc.date"]').attr('content')),
wrap($ => $('meta[name="DC.date"]').attr('content')),
wrap($ => $('meta[name="dc.date.issued"]').attr('content')),
wrap($ => $('meta[name="DC.date.issued"]').attr('content')),
wrap($ => $('meta[name="dc.date.created"]').attr('content')),
wrap($ => $('meta[name="DC.date.created"]').attr('content')),
wrap($ => $('meta[name="DC.Date"]').attr('content')),
wrap($ => $('meta[name="date"]').attr('content')),
wrap($ => $('meta[name="dcterms.date"]').attr('content')),
wrap($ => $('[itemprop="datePublished"]').attr('content')),
wrap($ => $('time[itemprop*="pubDate"]').attr('datetime')),
wrap($ => $('time[itemprop*="pubdate"]').attr('datetime')),
wrap($ => $('[property*="dc:date"]').attr('content')),
wrap($ => $('[property*="dc:created"]').attr('content')),
Expand Down

0 comments on commit 818a19b

Please sign in to comment.