Skip to content

Commit

Permalink
Tool for turning OCR Derp messages into something useful
Browse files Browse the repository at this point in the history
  • Loading branch information
Oliver Smith committed Feb 1, 2015
1 parent ebcac06 commit e6eb06a
Showing 1 changed file with 30 additions and 0 deletions.
30 changes: 30 additions & 0 deletions misc/badstations.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#! /usr/bin/perl

# Load the headings from Station.csv
$heading = `head -1 data/Station.csv`;
$results = 0;

open(BADSTATIONS, ">tmp/badstations.csv") || die "Can't open tmp/badstations.csv file";
sub print_bad_station($$) {
my ($sys, $stn) = (@_);
print(BADSTATIONS $heading) if (!$results);
$sys =~ s/'/''/g;
$stn =~ s/'/''/g;
print(BADSTATIONS "'$sys','$stn',-1,'?','?'\n");
}

open(CORRECTIONS, ">tmp/corrections.py") || die "Can't open tmp/corrections.py";
sub print_correction($$) {
my ($sys, $stn) = (@_);
$sys =~ s/"/\\"/g;
$stn =~ s/"/\\"/g;
print(CORRECTIONS qq!\t"\U${sys}\E/\U${stn}\U":\t\tDELETED,\n!);
}

while (<>) {
next unless m!Ignoring '(.*)/(.*)' because it looks like OCR derp!;
my ($sys, $stn) = ($1, $2);
print_bad_station($sys, $stn);
print_correction($sys, $stn);
++$results;
}

0 comments on commit e6eb06a

Please sign in to comment.