-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwikidataId.js
61 lines (46 loc) · 1.51 KB
/
wikidataId.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
"use strict"
// includes
const Parser = require( 'papaparse' ),
Fs = require( 'mz/fs' ),
Request = require( 'request-promise' );
const cfg = {
baseUrl: 'https://de.wikipedia.org/w/api.php?action=query&prop=pageprops&ppprop=wikibase_item&redirects=1&format=json&titles=',
wikiUrl: 'http://de.wikipedia.org/wiki/',
regexp: /wikibase_item":"(Q\d+)"/gi,
COL_WIKILINK: 1
};
!( async function(){
// get file
const file = await Fs.readFile( __dirname + '/data/res_geoloc.tsv', 'utf8' );
const data = Parser.parse( file, { delimiter: '\t' } );
for( let i=0; i<data.data.length; i++ ) {
// shortcut
let d = data.data[i];
// get wiki-id
const wikiId = d[cfg.COL_WIKILINK].replace( cfg.wikiUrl, '' );
// skip if empty
if( wikiId == '' ) {
d.push( null );
continue;
}
// get API response
const wikires = await Request( cfg.baseUrl + wikiId );
// wikidata id
cfg.regexp.lastIndex = 0;
const wikidataIdRes = cfg.regexp.exec( wikires );
// do we have a match?
let wikidataId = null;
if( wikidataIdRes ) {
console.log( 'match:', wikiId, ' - ', wikidataIdRes[1] )
wikidataId = wikidataIdRes[1];
} else {
console.log( 'no match:', wikiId );
console.log();
}
// append to table
d.push( wikidataId );
}
const result = Parser.unparse( data, { delimiter: '\t' } );
await Fs.writeFile( __dirname + '/data/res_wikidataId.tsv', result );
})()
.catch( e => console.log(e) );