diff --git a/build.js b/build.js index 4e8cbd1..cb58c48 100644 --- a/build.js +++ b/build.js @@ -1,31 +1,38 @@ 'use strict'; var fs = require('fs'); +var http = require('http'); var bail = require('bail'); -var jsdom = require('jsdom'); +var concat = require('concat-stream'); +var unified = require('unified'); +var parse = require('rehype-parse'); +var selectAll = require('hast-util-select').selectAll; +var toString = require('hast-util-to-string'); -jsdom.env('http://www.readabilityformulas.com/articles/spache-formula-word-list.php', read); +http.get('http://www.readabilityformulas.com/articles/spache-formula-word-list.php', function (res) { + res.pipe(concat(onconcat)).on('error', bail); -function read(err, window) { - bail(err); + function onconcat(buf) { + var tree = unified().use(parse).parse(buf); + var values = selectAll('td p', tree) + .map(toString) + .join('|') + .replace(/\\/g, '$&\'') + .trim() + .split(/\s*\|\s*/g) + .filter(Boolean) + .map(lower) + .filter(unique) + .sort(); - var values = [].slice.call(window.document.querySelectorAll('td p')) - .map(function (node) { - return node.textContent; - }) - .join('|') - .replace(/\\/g, '$&\'') - .trim() - .split(/\s*\|\s*/g) - .filter(Boolean) - .map(function (value) { - return value.toLowerCase(); - }) - .sort(); + fs.writeFile('index.json', JSON.stringify(values, 0, 2) + '\n', bail); + } +}); - values = values.filter(function (value, index) { - return values.indexOf(value, index + 1) === -1; - }); +function lower(value) { + return value.toLowerCase(); +} - fs.writeFileSync('index.json', JSON.stringify(values, 0, 2) + '\n'); +function unique(value, index, all) { + return all.indexOf(value, index + 1) === -1; } diff --git a/package.json b/package.json index bf25289..60f3af3 100644 --- a/package.json +++ b/package.json @@ -22,12 +22,16 @@ "devDependencies": { "bail": "^1.0.1", "browserify": "^14.0.0", + "concat-stream": "^1.6.0", "esmangle": "^1.0.1", - "jsdom": "^9.8.3", + "hast-util-select": "^1.0.1", + "hast-util-to-string": "^1.0.0", "nyc": "^11.0.0", + "rehype-parse": "^4.0.0", "remark-cli": "^3.0.0", "remark-preset-wooorm": "^3.0.0", "tape": "^4.0.0", + "unified": "^6.1.5", "xo": "^0.18.0" }, "scripts": { @@ -35,7 +39,7 @@ "build-generate": "node build", "build-bundle": "browserify index.json --bare -s spache > spache.js", "build-mangle": "esmangle spache.js > spache.min.js", - "build": "npm run build-md && npm run build-bundle && npm run build-mangle", + "build": "npm run build-generate && npm run build-md && npm run build-bundle && npm run build-mangle", "lint": "xo", "test-api": "node test", "test-coverage": "nyc --reporter lcov tape test.js",