Skip to content

Commit

Permalink
extraction script and fix domain sync issue in country-collapse script
Browse files Browse the repository at this point in the history
  • Loading branch information
willscott committed Feb 1, 2016
1 parent d9d13c5 commit 9017c1d
Show file tree
Hide file tree
Showing 5 changed files with 77 additions and 9 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,5 @@ authdata.json
*.gz
*.jsonList
config.json
cluster_correlation/correlation-distr
cluster_correlation/correlation-distr
postrun.sh
12 changes: 7 additions & 5 deletions asn_aggregation/asn_asn-to-country_country.js
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,18 @@ var outFile = process.argv[5];

var ip2asn = lookuper.lookup.bind({}, asn_lookup);

function doDomain(into, map, domains, line) {
function doDomain(into, map, line) {
var asn_info, domain, countrymap = {};
domain = domains.shift();
try {
asn_info = JSON.parse(line);
} catch (e) {
return;
}
domain = asn_info.name;
if (!domain) {
return;
}

Object.keys(asn_info).forEach(function(asn) {
var cntry = map[asn];
if (!cntry) {
Expand All @@ -59,13 +63,11 @@ function doAll() {

console.log(chalk.blue('Starting'));
return countries.then(function(map) {
var dlines = fs.readFileSync(domains).toString().split('\n');

return Q.Promise(function (resolve, reject) {
fs.createReadStream(inFile)
.pipe(progress({total: total}))
.pipe(es.split())
.pipe(es.mapSync(doDomain.bind({}, into, map, dlines)))
.pipe(es.mapSync(doDomain.bind({}, into, map)))
.pipe(es.join('\n'))
.on('end', function () {
resolve(into);
Expand Down
4 changes: 2 additions & 2 deletions favicon/validate.js
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ var ipDomainValidation = function (ip) {
if (ip.length < 2) { return; }

var doms = Object.keys(ip[1]);
var cc = iu.getClassC(ip[0]);
var cc = ip[0];
for (var i = 0; i < doms.length; i += 1) {
var domain = doms[i];
if (scores[domain] && scores[domain][cc] !== undefined) {
Expand Down Expand Up @@ -85,7 +85,7 @@ var reduceDomains = function () {
console.log('done.');
console.log('Gave score of true to', t_c, ' of ', t_tt);
console.log('Gave score of false to', t_f, ' of ', t_tf);
console.log('Under curve is ', (t_c + tf) / (t_tt + t_tf));
console.log('Under curve is ', (t_c + t_f) / (t_tt + t_tf));
process.exit(0);
};

Expand Down
6 changes: 5 additions & 1 deletion fullrun.sh
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ buildMatrices()
echo "Assigning Domains to clusters..."
node cluster_correlation/correlation-distr/run-distr.js runs/$thisRun/similarity06 runs/$thisRun/clusters.json
echo "Assigning IPs to clusters..."
node cluster_correlation/cluster-footprint.js runs/$thisRun/clusters.json runs/$thisRun/aggregate.classC-domain.json runs/$thisRun/similarity06 runs/$thisRun/clusters.ips.json
node cluster_correlation/cluster-footprint.js runs/$thisRun/clusters.json runs/$thisRun/aggregate.classC-domain.json runs/$thisRun/similarity07 runs/$thisRun/clusters.ips.json
echo "Secondary Signal Aggregation [ptrs]"
node cluster_correlation/merge_on_metadata.js runs/$thisRun/clusters.json runs/$thisRun/clusters.ips.json runs/$thisRun/ptrs.json 0.8 runs/$thisRuns/clusters.merged.json

Expand Down Expand Up @@ -225,6 +225,10 @@ reverseLookup # do PTR lookups
#favicon # Favicon scan and compare - not default.
buildMatrices # build similarity table
cleanup
if [ -f postrun.sh ]
then
bash postrun.sh
fi
else
thisRun=${2}
${1}
Expand Down
61 changes: 61 additions & 0 deletions interference/extract_local_resolutions.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
var fs = require('fs');
var es = require('event-stream');
var countries = require('ip2country/src/as2country').createAS2CountryMap();
var progress = require('progressbar-stream');
var iputils = require('../util/ip_utils');

// Tells you which sites have a significant resolution to a specific prefix
// across the different ASNs in a given country.
// A sufficient script to extract sites blocked to a known block-page IP address
//
// usage: node get_blocks.js <runs/date/asn.js> <blockedomains.json> <CountryCode>
var country = process.argv[4];

var doDomain = function (asns, list, domLine) {
if (!domLine.length) {
return;
}
var dom;
try {
dom = JSON.parse(domLine);
} catch (e) {
return;
}
var good = 0, bad = 0;
asns.forEach (function (asn) {
if (!dom[asn]) {
return;
}
Object.keys(dom[asn]).forEach(function (ip) {
if (iputils.isReserved(ip)) {
bad += dom[asn][ip];
} else {
good += dom[asn][ip];
}
});
});
if (bad * 4 > good) {
//console.log(dom.name);
list.push([dom.name, good, bad]);
}
}

countries.then (function (cmap) {
var irASNs = [];
Object.keys(cmap).forEach(function (asn) {
if (cmap[asn] === country) {
irASNs.push(asn);
}
});
var asnFile = process.argv[2];
var list = [];
fs.createReadStream(asnFile)
.pipe(progress({total: fs.statSync(asnFile).size}))
.pipe(es.split())
.pipe(es.mapSync(doDomain.bind({}, irASNs, list)))
.on('end', function () {
//console.log(list);
fs.writeFileSync(process.argv[3], JSON.stringify(list));
process.exit(0);
});
});

0 comments on commit 9017c1d

Please sign in to comment.