Skip to content

Commit 566f9d1

Browse files
committed
Avoid repeat parsing steps
1 parent 5b3861b commit 566f9d1

File tree

2 files changed

+11
-3
lines changed

2 files changed

+11
-3
lines changed

packages/metascraper/src/load-html.js

+10-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
'use strict'
2-
32
const { forEach, flow, isEmpty, toLower } = require('lodash')
43
const sanitizeHtml = require('sanitize-html')
54
const cheerio = require('cheerio')
@@ -19,9 +18,18 @@ const sanitize = html =>
1918
meta: normalizeAttributes(['name', 'property']),
2019
a: normalizeAttributes(['href']),
2120
link: normalizeAttributes(['rel'])
21+
},
22+
parser: {
23+
lowerCaseTags: true,
24+
decodeEntities: true,
25+
lowerCaseAttributeNames: true
2226
}
2327
})
2428

25-
const load = cheerio.load.bind(cheerio)
29+
const load = html => cheerio.load(html, {
30+
lowerCaseTags: false,
31+
decodeEntities: false,
32+
lowerCaseAttributeNames: false
33+
})
2634

2735
module.exports = flow([sanitize, load])

packages/metascraper/test/integration/cbr/input.html

+1-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
<script src="/Scripts/jquery.validate.js" type="text/javascript"></script>
3737
<script src="/Scripts/xVal.jquery.validate.js" type="text/javascript"></script>
3838
<script src="/Scripts/pmg.CustomValidators.js" type="text/javascript"></script>
39-
<link rel="SHORTCUT ICON" href="http://www.cbronline.com/Content/images/favicon.ico">
39+
<LINK REL="SHORTCUT ICON" href="http://www.cbronline.com/Content/images/favicon.ico">
4040
<meta name="google-site-verification" content="Ektry6dPOSxDEy6rz0_bYZlTumQ5O0ZMigiavMhwFjY"/>
4141
<meta name="DCSext.WebsiteName" content="www.cbronline.com"/>
4242
<script type="text/javascript">

0 commit comments

Comments
 (0)