Skip to content

Commit

Permalink
Code style improvements (#12)
Browse files Browse the repository at this point in the history
* Structure code so we can potentially pip install it

* WIP: Simplify error handling fix bad operator prec;

* Finish more of operator prec / messages refactor

* Reduce local variables / get / null checks

* Avoid passing around line_number context

* Turn callbacks into generators

* Rename expected_fields to defined_fields

* Ask intellij to reformat the code

* Auto-attach line context in CSV reader

* WIP: remove bogus params

* Make more tolerant of cgobbledegook agency files

* Only use one CSV reading function; use sets in a few more places

* More sets

* Add very gradual schema to prevent some potential mistakes in the code from going undetected

* More type structure for the overall GTFS struct

* Type safety + more sets for linked entities

* One last cleanup

* Fix: an stupid

* Flip in to not in

Co-authored-by: Jeremy Steele <[email protected]>
  • Loading branch information
npaun and jsteelz authored Jul 23, 2021
1 parent f50e005 commit 4004ab0
Show file tree
Hide file tree
Showing 68 changed files with 1,123 additions and 1,157 deletions.
2 changes: 1 addition & 1 deletion .python-version
Original file line number Diff line number Diff line change
@@ -1 +1 @@
3.7.4
3.7.11
File renamed without changes.
File renamed without changes.
34 changes: 34 additions & 0 deletions fares_validator/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import argparse
from os import path

from .loader import run_validator


def main():
parser = argparse.ArgumentParser(description='Validate GTFS fares-v2 data.')
parser.add_argument("-s", "--read-stop-times", help="Scan stop_times for area_ids", action='store_true')
parser.add_argument("-o", "--output-file", type=str, help="Export the errors and warnings to a file")
parser.add_argument("input_gtfs_folder", type=str, help="Path to unzipped folder containing the Fares-v2 GTFS")

args = parser.parse_args()

gtfs_path = args.input_gtfs_folder
if not path.isdir(gtfs_path):
raise Exception('Input path is not a valid folder.')

read_stop_times = args.read_stop_times
results = run_validator(gtfs_path, read_stop_times)
output = results.to_string()

if args.output_file:
try:
f = open(args.output_file, 'w')
f.write(output)
except Exception:
raise Exception('Writing to output file failed. Please ensure the output file path is valid.')
else:
print(output)


if __name__ == '__main__':
main()
Original file line number Diff line number Diff line change
Expand Up @@ -139,4 +139,4 @@
'YER': 2,
'ZAR': 2,
'ZMW': 2,
}
}
44 changes: 44 additions & 0 deletions fares_validator/diagnostics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
def format(code, line_context='', path='', extra_info=''):
msg = ''
if path:
msg += path + ': '

msg += code

if extra_info:
msg += '\n' + extra_info

msg += line_context
return msg


class Diagnostics:
def __init__(self):
self.errors = []
self.warnings = []

def add_warning(self, message):
self.warnings.append(message)

def add_error(self, message):
self.errors.append(message)

def to_string(self):
output = ''
if len(self.errors):
output += 'ERRORS:\n'

for error in self.errors:
output += f'\n{error}\n'
else:
output += 'No errors detected.\n'

if len(self.warnings):
output += '\n\nWARNINGS:\n'

for warning in self.warnings:
output += f'\n{warning}\n'
else:
output += '\n\nNo warnings to report.'

return output
11 changes: 0 additions & 11 deletions src/errors.py → fares_validator/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,14 +101,3 @@
SPANNING_LIMIT_WITH_TRANSFER_ID = 'An entry in fare_transfer_rules.txt has spanning_limit with transfer_id defined.'
TRANSFER_ID_WITHOUT_TRANSFER_SEQUENCE = 'A transfer_id in fare_transfer_rules.txt is defined without a transfer_sequence.'
TRANSFER_SEQUENCE_WITHOUT_TRANSFER_ID = 'A transfer_sequence in fare_transfer_rules.txt is defined without a transfer_id.'

def add_error(error, line_num_error_msg, errors, path='', extra_info=''):
error_msg = ''
if path:
error_msg += path + ': '
error_msg += error
if extra_info:
error_msg += '\n' + extra_info
error_msg += line_num_error_msg

errors.append(error_msg)
50 changes: 50 additions & 0 deletions fares_validator/fare_leg_rule_checkers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from . import utils
from .errors import *


def check_areas(line, areas, unused_areas):
if line.is_symmetrical and line.is_symmetrical not in {'0', '1'}:
line.add_error(INVALID_IS_SYMMETRICAL_LEG_RULES)

if line.contains_area_id and (not line.from_area_id and not line.to_area_id):
line.add_error(CONTAINS_AREA_WITHOUT_FROM_TO_AREA)

if (line.from_area_id or line.to_area_id) and not line.is_symmetrical:
line.add_error(AREA_WITHOUT_IS_SYMMETRICAL)

if (not line.from_area_id and not line.to_area_id) and line.is_symmetrical:
line.add_error(IS_SYMMETRICAL_WITHOUT_FROM_TO_AREA)

if line.from_area_id and line.from_area_id in unused_areas:
unused_areas.remove(line.from_area_id)
if line.to_area_id and line.to_area_id in unused_areas:
unused_areas.remove(line.to_area_id)

utils.check_linked_id(line, 'from_area_id', areas)
utils.check_linked_id(line, 'to_area_id', areas)
utils.check_linked_id(line, 'contains_area_id', areas)


def check_distances(line):
if line.distance_type and line.distance_type not in {'0', '1'}:
line.add_error(INVALID_DISTANCE_TYPE)

if line.min_distance:
try:
dist = float(line.min_distance)
if dist < 0:
line.add_error(NEGATIVE_MIN_DISTANCE)
except ValueError:
line.add_error(INVALID_MIN_DISTANCE)
if line.max_distance:
try:
dist = float(line.max_distance)
if dist < 0:
line.add_error(NEGATIVE_MAX_DISTANCE)
except ValueError:
line.add_error(INVALID_MAX_DISTANCE)

if (line.min_distance or line.max_distance) and not line.distance_type:
line.add_error(DISTANCE_WITHOUT_DISTANCE_TYPE)
if (not line.min_distance and not line.max_distance) and line.distance_type:
line.add_error(DISTANCE_TYPE_WITHOUT_DISTANCE)
89 changes: 89 additions & 0 deletions fares_validator/fare_product_checkers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
from .errors import *
from .warnings import *


class LinkedEntities:
def __init__(self):
self.rider_category_ids = set()
self.fare_container_ids = set()


def check_linked_fp_entities(line, rider_categories, rider_category_by_fare_container, linked_entities_by_fare_product):
linked_entities = linked_entities_by_fare_product.setdefault(line.fare_product_id, LinkedEntities())

if line.rider_category_id:
linked_entities.rider_category_ids.add(line.rider_category_id)
if line.rider_category_id not in rider_categories:
line.add_error(NONEXISTENT_RIDER_CATEGORY_ID)

if line.fare_container_id:
linked_entities.fare_container_ids.add(line.fare_container_id)
if line.fare_container_id not in rider_category_by_fare_container:
line.add_error(NONEXISTENT_FARE_CONTAINER_ID)

fare_container_rider_cat = rider_category_by_fare_container.get(line.fare_container_id)
if line.rider_category_id and fare_container_rider_cat and (line.rider_category_id != fare_container_rider_cat):
line.add_error(CONFLICTING_RIDER_CATEGORY_ON_FARE_CONTAINER)

linked_entities_by_fare_product[line.fare_product_id] = linked_entities


def check_bundle(line):
if line.bundle_amount:
try:
bundle_amt = int(line.bundle_amount)
if bundle_amt < 0:
line.add_error(INVALID_BUNDLE_AMOUNT)
except ValueError:
line.add_error(INVALID_BUNDLE_AMOUNT)


def check_durations_and_offsets(line):
if line.duration_start and line.duration_start not in {'0', '1'}:
line.add_error(INVALID_DURATION_START)

if line.duration_unit and line.duration_unit not in {'0', '1', '2', '3', '4', '5', '6'}:
line.add_error(INVALID_DURATION_UNIT)

if line.duration_type and line.duration_type not in {'1', '2'}:
line.add_error(INVALID_DURATION_TYPE)

if line.duration_type == '1' and line.duration_start:
line.add_error(DURATION_START_WITH_DURATION_TYPE)

if line.duration_amount:
try:
amt = int(line.duration_amount)
if amt < 1:
line.add_error(NEGATIVE_OR_ZERO_DURATION)
except ValueError:
line.add_error(NON_INT_DURATION_AMOUNT)

if not line.duration_unit:
line.add_error(DURATION_WITHOUT_UNIT)

if not line.duration_type:
line.add_error(DURATION_WITHOUT_TYPE)
else:
if line.duration_type:
line.add_error(DURATION_TYPE_WITHOUT_AMOUNT)
if line.duration_unit:
line.add_error(DURATION_UNIT_WITHOUT_AMOUNT)

if line.offset_unit and line.offset_unit not in {'0', '1', '2', '3', '4', '5', '6'}:
line.add_error(INVALID_OFFSET_UNIT)

if line.offset_amount:
try:
amt = int(line.offset_amount)
except ValueError:
line.add_error(NON_INT_OFFSET_AMOUNT)

if line.duration_type == '2':
line.add_error(OFFSET_AMOUNT_WITH_DURATION_TYPE)

if not line.offset_unit:
line.add_warning(OFFSET_AMOUNT_WITHOUT_OFFSET_UNIT)
else:
if line.offset_unit:
line.add_error(OFFSET_UNIT_WITHOUT_AMOUNT)
65 changes: 65 additions & 0 deletions fares_validator/fare_transfer_rule_checkers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
from .errors import *


def check_leg_groups(line, leg_group_ids, unused_leg_groups):
if line.is_symmetrical and line.is_symmetrical not in {'0', '1'}:
line.add_error(INVALID_IS_SYMMETRICAL_TRANSFER_RULES)
if (line.from_leg_group_id or line.to_leg_group_id) and not line.is_symmetrical:
line.add_error(LEG_GROUP_WITHOUT_IS_SYMMETRICAL)
if (not line.from_leg_group_id and not line.to_leg_group_id) and line.is_symmetrical:
line.add_error(IS_SYMMETRICAL_WITHOUT_FROM_TO_LEG_GROUP)
if line.from_leg_group_id and not line.from_leg_group_id in leg_group_ids:
line.add_error(INVALID_FROM_LEG_GROUP)
if line.to_leg_group_id and not line.to_leg_group_id in leg_group_ids:
line.add_error(INVALID_TO_LEG_GROUP)

if line.from_leg_group_id in unused_leg_groups:
unused_leg_groups.remove(line.from_leg_group_id)
if line.to_leg_group_id in unused_leg_groups:
unused_leg_groups.remove(line.to_leg_group_id)


def check_spans_and_transfer_ids(line):
if line.spanning_limit:
if line.from_leg_group_id != line.to_leg_group_id:
line.add_error(SPANNING_LIMIT_WITH_BAD_LEGS)
if line.transfer_id:
line.add_error(SPANNING_LIMIT_WITH_TRANSFER_ID)
try:
limit = int(line.spanning_limit)
if limit <= 1:
line.add_error(INVALID_SPANNING_LIMIT)
except ValueError:
line.add_error(INVALID_SPANNING_LIMIT)

if line.transfer_id:
if not line.transfer_sequence:
line.add_error(TRANSFER_ID_WITHOUT_TRANSFER_SEQUENCE)

if line.transfer_sequence:
if not line.transfer_id:
line.add_error(TRANSFER_SEQUENCE_WITHOUT_TRANSFER_ID)
try:
seq = int(line.transfer_sequence)
if seq < 1:
line.add_error(INVALID_TRANSFER_SEQUENCE)
except ValueError:
line.add_error(INVALID_TRANSFER_SEQUENCE)


def check_durations(line):
if line.duration_limit_type and line.duration_limit_type not in {'0', '1', '2', '3'}:
line.add_error(INVALID_DURATION_LIMIT_TYPE)

if line.duration_limit:
if not line.duration_limit_type:
line.add_error(DURATION_LIMIT_WITHOUT_LIMIT_TYPE)
try:
limit = int(line.duration_limit)
if limit < 1:
line.add_error(INVALID_DURATION_LIMIT)
except ValueError:
line.add_error(INVALID_DURATION_LIMIT)
else:
if line.duration_limit_type:
line.add_error(DURATION_LIMIT_TYPE_WITHOUT_DURATION)
49 changes: 49 additions & 0 deletions fares_validator/loader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from pathlib import Path

from . import read_gtfs_entities, read_fares_entities, diagnostics
from . import warnings as warn


class Entities:
# Can eventually list the known types here for a typechecker like mypy
pass


def run_validator(gtfs_root_dir, should_read_stop_times):
gtfs_root_dir = Path(gtfs_root_dir)
results = diagnostics.Diagnostics()

gtfs = Entities()

gtfs.areas = read_fares_entities.areas(gtfs_root_dir, results)

gtfs.networks = read_gtfs_entities.networks(gtfs_root_dir, results)

read_gtfs_entities.verify_stop_area_linkage(gtfs_root_dir, gtfs.areas, results, should_read_stop_times)

gtfs.service_ids = read_gtfs_entities.service_ids(gtfs_root_dir, results)

gtfs.timeframe_ids = read_fares_entities.timeframes(gtfs_root_dir, results)
unused_timeframes = gtfs.timeframe_ids.copy()

gtfs.rider_category_ids = read_fares_entities.rider_categories(gtfs_root_dir, results)

gtfs.rider_category_by_fare_container = read_fares_entities.fare_containers(gtfs_root_dir,
gtfs.rider_category_ids,
results)

gtfs.linked_entities_by_fare_product = read_fares_entities.fare_products(gtfs_root_dir,
gtfs,
unused_timeframes,
results)

gtfs.leg_group_ids = read_fares_entities.fare_leg_rules(gtfs_root_dir, gtfs,
unused_timeframes, results)

read_fares_entities.fare_transfer_rules(gtfs_root_dir, gtfs, results)

if len(unused_timeframes):
warning_info = 'Unused timeframes: ' + str(unused_timeframes)
results.add_warning(diagnostics.format(warn.UNUSED_TIMEFRAME_IDS, '', '', warning_info))

return results
Loading

0 comments on commit 4004ab0

Please sign in to comment.