Skip to content

Commit

Permalink
Better left join
Browse files Browse the repository at this point in the history
  • Loading branch information
Maximillian Murphy committed Mar 21, 2018
1 parent f1944ff commit 2f708b9
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 35 deletions.
87 changes: 53 additions & 34 deletions bin/join_left.js
Original file line number Diff line number Diff line change
@@ -1,43 +1,62 @@
#!/usr/bin/env node

// Sorts a file where each line is a JSON object by the provided key:
// Join stdin with zero or more dictionaries.

const fs = require('fs'),
jline = require('jline'),
parseStream = jline.parseStream;

var by = process.argv[2]
, files = process.argv.slice(3)
, fs = require('fs')
, split = require('split');
, files = process.argv.slice(3);

process.stdout.on('error',process.exit);
console.log.apply(null,['#'].concat(process.argv));

var ans = {};

fs.createReadStream(files[0])
.pipe(split())
.on('data', function (line) {
var record;
try{
record = JSON.parse(line);
}catch(e){return;}
ans[record[by]] = record;
})
.on('end', function(){
fs.createReadStream(files[1])
.pipe(split())
.on('data', function(line){
var record;
try {
record = JSON.parse(line);
} catch(e){return;}
var combined = ans[record[by]];
if (combined !== undefined){
Object.keys(record).forEach(function(k){
combined[k] = record[k];
});
}
})
.on('end', function(){
Object.keys(ans).forEach(function(k){console.log(JSON.stringify(ans[k]));});
});
});
combiDict(by, files).then(combined => join(process.stdin, by, combined)).then(() => process.exit(0));

function join(infilePointer, joinKey, dictionary){
console.error(`Enriching with ${Object.keys(dictionary).length} values`);
return new Promise((yay, nay) => {try {
parseStream(infilePointer)
.on('jline', (record) => {
const key = record[joinKey];
const enrichment = dictionary[key];
const ans = ((undefined === key) || (undefined === enrichment)) ? record : extend(record, enrichment);
console.log(JSON.stringify(ans));
})
.on('error', (e) => nay(e))
.on('end', () => yay())
} catch (e) { nay(e) }});
}

function createReadStream(filename) {
try {
return fs.createReadStream(filename);
} catch(e) {
console.error(`ERROR: Could not open file: '${filename}'`);
process.exit(1);
}
}

function loadDict(joinKey, filename) {
return new Promise((yay, nay) => {try {
const ans = {};
parseStream(createReadStream(filename))
.on('jline', (data) => ans[data[joinKey]] = data)
.on('error', (e) => nay(e))
.on('end', () => yay(ans));
} catch (e) { nay(e) }});
}

function set(d,k,v) {
if (undefined !== v) d[k] = v;
return d;
}

function extend(dict, newcomer) {
return Object.keys(newcomer).reduce((d, k) => set(d, k, newcomer[k]), dict);
}

function combiDict(joinKey, filenames) {
return Promise.all(filenames.map(filename => loadDict(joinKey, filename))).then(datasets => datasets.reduce(extend, {})).catch((e) => { console.error(e); process.exit(2); });
}
10 changes: 10 additions & 0 deletions bin/join_left.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
jline-join-left
===============

Join stdin with zero or more JSON line files on a given common join key.

## Command line:

printf "%s\n" '{"id":999, "price":-1}' '{"id":666, "price":1000000}' > prices.jline
printf "%s\n" '{"id":999, "weight":2}' '{"id":666, "weight":99}' > weights.jline
echo '{"id":999, "description":"pineapple"}' | jline-join-left id prices.jline weights.jline
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "jline",
"version": "0.0.48",
"version": "0.0.49",
"description": "Sort, grep and join files where every line is JSON.",
"main": "index.js",
"bin": {
Expand Down

0 comments on commit 2f708b9

Please sign in to comment.