Skip to content

Commit

Permalink
Merge pull request #157 from pelias/joxit/wof_extract_sqlite
Browse files Browse the repository at this point in the history
Extract data via WOF SQLite database
  • Loading branch information
orangejulius authored Apr 23, 2020
2 parents 0250dae + 2fcd7df commit adedc3c
Show file tree
Hide file tree
Showing 6 changed files with 73 additions and 85 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ RUN npm install
# copy code from local checkout
ADD . ${WORKDIR}

ENV WOF_DIR '/data/whosonfirst/data'
ENV WOF_DIR '/data/whosonfirst/sqlite'
ENV PLACEHOLDER_DATA '/data/placeholder'

USER pelias
Expand Down
35 changes: 0 additions & 35 deletions cmd/download_extract.sh

This file was deleted.

2 changes: 1 addition & 1 deletion cmd/extract.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,6 @@ mkdir -p ${PLACEHOLDER_DATA};

echo "Creating extract at ${PLACEHOLDER_DATA}/wof.extract"

${DIR}/wof_extract.sh > ${PLACEHOLDER_DATA}/wof.extract;
exec node --max_old_space_size=8000 ${DIR}/wof_extract_sqlite.js > ${PLACEHOLDER_DATA}/wof.extract;

echo 'Done!'
48 changes: 0 additions & 48 deletions cmd/wof_extract.sh

This file was deleted.

69 changes: 69 additions & 0 deletions cmd/wof_extract_sqlite.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
const path = require('path');
const fs = require('fs');
const whosonfirst = require('pelias-whosonfirst');
const config = require('pelias-config').generate().imports.whosonfirst;
const SQLiteStream = whosonfirst.SQLiteStream;
const through = require('through2');
const Placeholder = require('../Placeholder');
const combinedStream = require('combined-stream');

const SQLITE_REGEX = /whosonfirst-data-[a-z0-9-]+\.db$/;

const WOF_DIR = process.env.WOF_DIR || '/data/whosonfirst-data/sqlite';

const layers = fs.readFileSync(path.join(__dirname, 'placetype.filter'), 'utf-8')
.replace(/^.*\(/, '') // Removes all characters before the first parenthesis
.match(/[a-z]+/g); // Get the layer list

const jq_filter = fs.readFileSync(path.join(__dirname, 'jq.filter'), 'utf-8')
.match(/test\("(.*)"\)/g) // Get all tests
.map(s => s.replace(/^[^"]+"/, '').replace(/"[^"]+$/, '')) // Get only regex part
.map(s => new RegExp(s)); // Transform it into JS RegExp

const output = () => {
if (process.argv.length > 2 && process.argv[2] === 'build') {
const ph = new Placeholder();
ph.load({ reset: true });
return through.obj((row, _, next) => {
ph.insertWofRecord(row, next);
}, done => {
console.error('populate fts...');
ph.populate();
console.error('optimize...');
ph.optimize();
console.error('close...');
ph.close();
done();
});
} else {
return through.obj((row, _, next) => {
console.log(JSON.stringify(row));
next();
});
}
};

const sqliteStream = combinedStream.create();
fs.readdirSync(WOF_DIR)
.filter(file => SQLITE_REGEX.test(file))
.map(file => path.join(WOF_DIR, file))
.forEach(dbPath => {
sqliteStream.append(next => {
next(new SQLiteStream(
dbPath,
config.importPlace ?
SQLiteStream.findGeoJSONByPlacetypeAndWOFId(layers, config.importPlace) :
SQLiteStream.findGeoJSONByPlacetype(layers)
));
});
});

sqliteStream
.pipe(whosonfirst.toJSONStream())
.pipe(through.obj((row, _, next) => {
Object.keys(row.properties)
.filter(key => !jq_filter.some(regex => regex.test(key)))
.forEach(key => delete row.properties[key]);
next(null, row.properties);
}))
.pipe(output());
2 changes: 2 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,9 @@
"lower-case": "^2.0.0",
"morgan": "^1.9.0",
"pelias-blacklist-stream": "^1.1.0",
"pelias-config": "^4.5.0",
"pelias-logger": "^1.2.1",
"pelias-whosonfirst": "^4.0.0",
"remove-accents": "^0.4.0",
"require-dir": "^1.0.0",
"sorted-intersect": "^0.1.4",
Expand Down

0 comments on commit adedc3c

Please sign in to comment.