-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Structure code so we can potentially pip install it * WIP: Simplify error handling fix bad operator prec; * Finish more of operator prec / messages refactor * Reduce local variables / get / null checks * Avoid passing around line_number context * Turn callbacks into generators * Rename expected_fields to defined_fields * Ask intellij to reformat the code * Auto-attach line context in CSV reader * WIP: remove bogus params * Make more tolerant of cgobbledegook agency files * Only use one CSV reading function; use sets in a few more places * More sets * Add very gradual schema to prevent some potential mistakes in the code from going undetected * More type structure for the overall GTFS struct * Type safety + more sets for linked entities * One last cleanup * Fix: an stupid * Flip in to not in Co-authored-by: Jeremy Steele <[email protected]>
- Loading branch information
Showing
68 changed files
with
1,123 additions
and
1,157 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
3.7.4 | ||
3.7.11 |
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
import argparse | ||
from os import path | ||
|
||
from .loader import run_validator | ||
|
||
|
||
def main(): | ||
parser = argparse.ArgumentParser(description='Validate GTFS fares-v2 data.') | ||
parser.add_argument("-s", "--read-stop-times", help="Scan stop_times for area_ids", action='store_true') | ||
parser.add_argument("-o", "--output-file", type=str, help="Export the errors and warnings to a file") | ||
parser.add_argument("input_gtfs_folder", type=str, help="Path to unzipped folder containing the Fares-v2 GTFS") | ||
|
||
args = parser.parse_args() | ||
|
||
gtfs_path = args.input_gtfs_folder | ||
if not path.isdir(gtfs_path): | ||
raise Exception('Input path is not a valid folder.') | ||
|
||
read_stop_times = args.read_stop_times | ||
results = run_validator(gtfs_path, read_stop_times) | ||
output = results.to_string() | ||
|
||
if args.output_file: | ||
try: | ||
f = open(args.output_file, 'w') | ||
f.write(output) | ||
except Exception: | ||
raise Exception('Writing to output file failed. Please ensure the output file path is valid.') | ||
else: | ||
print(output) | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -139,4 +139,4 @@ | |
'YER': 2, | ||
'ZAR': 2, | ||
'ZMW': 2, | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
def format(code, line_context='', path='', extra_info=''): | ||
msg = '' | ||
if path: | ||
msg += path + ': ' | ||
|
||
msg += code | ||
|
||
if extra_info: | ||
msg += '\n' + extra_info | ||
|
||
msg += line_context | ||
return msg | ||
|
||
|
||
class Diagnostics: | ||
def __init__(self): | ||
self.errors = [] | ||
self.warnings = [] | ||
|
||
def add_warning(self, message): | ||
self.warnings.append(message) | ||
|
||
def add_error(self, message): | ||
self.errors.append(message) | ||
|
||
def to_string(self): | ||
output = '' | ||
if len(self.errors): | ||
output += 'ERRORS:\n' | ||
|
||
for error in self.errors: | ||
output += f'\n{error}\n' | ||
else: | ||
output += 'No errors detected.\n' | ||
|
||
if len(self.warnings): | ||
output += '\n\nWARNINGS:\n' | ||
|
||
for warning in self.warnings: | ||
output += f'\n{warning}\n' | ||
else: | ||
output += '\n\nNo warnings to report.' | ||
|
||
return output |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
from . import utils | ||
from .errors import * | ||
|
||
|
||
def check_areas(line, areas, unused_areas): | ||
if line.is_symmetrical and line.is_symmetrical not in {'0', '1'}: | ||
line.add_error(INVALID_IS_SYMMETRICAL_LEG_RULES) | ||
|
||
if line.contains_area_id and (not line.from_area_id and not line.to_area_id): | ||
line.add_error(CONTAINS_AREA_WITHOUT_FROM_TO_AREA) | ||
|
||
if (line.from_area_id or line.to_area_id) and not line.is_symmetrical: | ||
line.add_error(AREA_WITHOUT_IS_SYMMETRICAL) | ||
|
||
if (not line.from_area_id and not line.to_area_id) and line.is_symmetrical: | ||
line.add_error(IS_SYMMETRICAL_WITHOUT_FROM_TO_AREA) | ||
|
||
if line.from_area_id and line.from_area_id in unused_areas: | ||
unused_areas.remove(line.from_area_id) | ||
if line.to_area_id and line.to_area_id in unused_areas: | ||
unused_areas.remove(line.to_area_id) | ||
|
||
utils.check_linked_id(line, 'from_area_id', areas) | ||
utils.check_linked_id(line, 'to_area_id', areas) | ||
utils.check_linked_id(line, 'contains_area_id', areas) | ||
|
||
|
||
def check_distances(line): | ||
if line.distance_type and line.distance_type not in {'0', '1'}: | ||
line.add_error(INVALID_DISTANCE_TYPE) | ||
|
||
if line.min_distance: | ||
try: | ||
dist = float(line.min_distance) | ||
if dist < 0: | ||
line.add_error(NEGATIVE_MIN_DISTANCE) | ||
except ValueError: | ||
line.add_error(INVALID_MIN_DISTANCE) | ||
if line.max_distance: | ||
try: | ||
dist = float(line.max_distance) | ||
if dist < 0: | ||
line.add_error(NEGATIVE_MAX_DISTANCE) | ||
except ValueError: | ||
line.add_error(INVALID_MAX_DISTANCE) | ||
|
||
if (line.min_distance or line.max_distance) and not line.distance_type: | ||
line.add_error(DISTANCE_WITHOUT_DISTANCE_TYPE) | ||
if (not line.min_distance and not line.max_distance) and line.distance_type: | ||
line.add_error(DISTANCE_TYPE_WITHOUT_DISTANCE) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
from .errors import * | ||
from .warnings import * | ||
|
||
|
||
class LinkedEntities: | ||
def __init__(self): | ||
self.rider_category_ids = set() | ||
self.fare_container_ids = set() | ||
|
||
|
||
def check_linked_fp_entities(line, rider_categories, rider_category_by_fare_container, linked_entities_by_fare_product): | ||
linked_entities = linked_entities_by_fare_product.setdefault(line.fare_product_id, LinkedEntities()) | ||
|
||
if line.rider_category_id: | ||
linked_entities.rider_category_ids.add(line.rider_category_id) | ||
if line.rider_category_id not in rider_categories: | ||
line.add_error(NONEXISTENT_RIDER_CATEGORY_ID) | ||
|
||
if line.fare_container_id: | ||
linked_entities.fare_container_ids.add(line.fare_container_id) | ||
if line.fare_container_id not in rider_category_by_fare_container: | ||
line.add_error(NONEXISTENT_FARE_CONTAINER_ID) | ||
|
||
fare_container_rider_cat = rider_category_by_fare_container.get(line.fare_container_id) | ||
if line.rider_category_id and fare_container_rider_cat and (line.rider_category_id != fare_container_rider_cat): | ||
line.add_error(CONFLICTING_RIDER_CATEGORY_ON_FARE_CONTAINER) | ||
|
||
linked_entities_by_fare_product[line.fare_product_id] = linked_entities | ||
|
||
|
||
def check_bundle(line): | ||
if line.bundle_amount: | ||
try: | ||
bundle_amt = int(line.bundle_amount) | ||
if bundle_amt < 0: | ||
line.add_error(INVALID_BUNDLE_AMOUNT) | ||
except ValueError: | ||
line.add_error(INVALID_BUNDLE_AMOUNT) | ||
|
||
|
||
def check_durations_and_offsets(line): | ||
if line.duration_start and line.duration_start not in {'0', '1'}: | ||
line.add_error(INVALID_DURATION_START) | ||
|
||
if line.duration_unit and line.duration_unit not in {'0', '1', '2', '3', '4', '5', '6'}: | ||
line.add_error(INVALID_DURATION_UNIT) | ||
|
||
if line.duration_type and line.duration_type not in {'1', '2'}: | ||
line.add_error(INVALID_DURATION_TYPE) | ||
|
||
if line.duration_type == '1' and line.duration_start: | ||
line.add_error(DURATION_START_WITH_DURATION_TYPE) | ||
|
||
if line.duration_amount: | ||
try: | ||
amt = int(line.duration_amount) | ||
if amt < 1: | ||
line.add_error(NEGATIVE_OR_ZERO_DURATION) | ||
except ValueError: | ||
line.add_error(NON_INT_DURATION_AMOUNT) | ||
|
||
if not line.duration_unit: | ||
line.add_error(DURATION_WITHOUT_UNIT) | ||
|
||
if not line.duration_type: | ||
line.add_error(DURATION_WITHOUT_TYPE) | ||
else: | ||
if line.duration_type: | ||
line.add_error(DURATION_TYPE_WITHOUT_AMOUNT) | ||
if line.duration_unit: | ||
line.add_error(DURATION_UNIT_WITHOUT_AMOUNT) | ||
|
||
if line.offset_unit and line.offset_unit not in {'0', '1', '2', '3', '4', '5', '6'}: | ||
line.add_error(INVALID_OFFSET_UNIT) | ||
|
||
if line.offset_amount: | ||
try: | ||
amt = int(line.offset_amount) | ||
except ValueError: | ||
line.add_error(NON_INT_OFFSET_AMOUNT) | ||
|
||
if line.duration_type == '2': | ||
line.add_error(OFFSET_AMOUNT_WITH_DURATION_TYPE) | ||
|
||
if not line.offset_unit: | ||
line.add_warning(OFFSET_AMOUNT_WITHOUT_OFFSET_UNIT) | ||
else: | ||
if line.offset_unit: | ||
line.add_error(OFFSET_UNIT_WITHOUT_AMOUNT) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
from .errors import * | ||
|
||
|
||
def check_leg_groups(line, leg_group_ids, unused_leg_groups): | ||
if line.is_symmetrical and line.is_symmetrical not in {'0', '1'}: | ||
line.add_error(INVALID_IS_SYMMETRICAL_TRANSFER_RULES) | ||
if (line.from_leg_group_id or line.to_leg_group_id) and not line.is_symmetrical: | ||
line.add_error(LEG_GROUP_WITHOUT_IS_SYMMETRICAL) | ||
if (not line.from_leg_group_id and not line.to_leg_group_id) and line.is_symmetrical: | ||
line.add_error(IS_SYMMETRICAL_WITHOUT_FROM_TO_LEG_GROUP) | ||
if line.from_leg_group_id and not line.from_leg_group_id in leg_group_ids: | ||
line.add_error(INVALID_FROM_LEG_GROUP) | ||
if line.to_leg_group_id and not line.to_leg_group_id in leg_group_ids: | ||
line.add_error(INVALID_TO_LEG_GROUP) | ||
|
||
if line.from_leg_group_id in unused_leg_groups: | ||
unused_leg_groups.remove(line.from_leg_group_id) | ||
if line.to_leg_group_id in unused_leg_groups: | ||
unused_leg_groups.remove(line.to_leg_group_id) | ||
|
||
|
||
def check_spans_and_transfer_ids(line): | ||
if line.spanning_limit: | ||
if line.from_leg_group_id != line.to_leg_group_id: | ||
line.add_error(SPANNING_LIMIT_WITH_BAD_LEGS) | ||
if line.transfer_id: | ||
line.add_error(SPANNING_LIMIT_WITH_TRANSFER_ID) | ||
try: | ||
limit = int(line.spanning_limit) | ||
if limit <= 1: | ||
line.add_error(INVALID_SPANNING_LIMIT) | ||
except ValueError: | ||
line.add_error(INVALID_SPANNING_LIMIT) | ||
|
||
if line.transfer_id: | ||
if not line.transfer_sequence: | ||
line.add_error(TRANSFER_ID_WITHOUT_TRANSFER_SEQUENCE) | ||
|
||
if line.transfer_sequence: | ||
if not line.transfer_id: | ||
line.add_error(TRANSFER_SEQUENCE_WITHOUT_TRANSFER_ID) | ||
try: | ||
seq = int(line.transfer_sequence) | ||
if seq < 1: | ||
line.add_error(INVALID_TRANSFER_SEQUENCE) | ||
except ValueError: | ||
line.add_error(INVALID_TRANSFER_SEQUENCE) | ||
|
||
|
||
def check_durations(line): | ||
if line.duration_limit_type and line.duration_limit_type not in {'0', '1', '2', '3'}: | ||
line.add_error(INVALID_DURATION_LIMIT_TYPE) | ||
|
||
if line.duration_limit: | ||
if not line.duration_limit_type: | ||
line.add_error(DURATION_LIMIT_WITHOUT_LIMIT_TYPE) | ||
try: | ||
limit = int(line.duration_limit) | ||
if limit < 1: | ||
line.add_error(INVALID_DURATION_LIMIT) | ||
except ValueError: | ||
line.add_error(INVALID_DURATION_LIMIT) | ||
else: | ||
if line.duration_limit_type: | ||
line.add_error(DURATION_LIMIT_TYPE_WITHOUT_DURATION) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
from pathlib import Path | ||
|
||
from . import read_gtfs_entities, read_fares_entities, diagnostics | ||
from . import warnings as warn | ||
|
||
|
||
class Entities: | ||
# Can eventually list the known types here for a typechecker like mypy | ||
pass | ||
|
||
|
||
def run_validator(gtfs_root_dir, should_read_stop_times): | ||
gtfs_root_dir = Path(gtfs_root_dir) | ||
results = diagnostics.Diagnostics() | ||
|
||
gtfs = Entities() | ||
|
||
gtfs.areas = read_fares_entities.areas(gtfs_root_dir, results) | ||
|
||
gtfs.networks = read_gtfs_entities.networks(gtfs_root_dir, results) | ||
|
||
read_gtfs_entities.verify_stop_area_linkage(gtfs_root_dir, gtfs.areas, results, should_read_stop_times) | ||
|
||
gtfs.service_ids = read_gtfs_entities.service_ids(gtfs_root_dir, results) | ||
|
||
gtfs.timeframe_ids = read_fares_entities.timeframes(gtfs_root_dir, results) | ||
unused_timeframes = gtfs.timeframe_ids.copy() | ||
|
||
gtfs.rider_category_ids = read_fares_entities.rider_categories(gtfs_root_dir, results) | ||
|
||
gtfs.rider_category_by_fare_container = read_fares_entities.fare_containers(gtfs_root_dir, | ||
gtfs.rider_category_ids, | ||
results) | ||
|
||
gtfs.linked_entities_by_fare_product = read_fares_entities.fare_products(gtfs_root_dir, | ||
gtfs, | ||
unused_timeframes, | ||
results) | ||
|
||
gtfs.leg_group_ids = read_fares_entities.fare_leg_rules(gtfs_root_dir, gtfs, | ||
unused_timeframes, results) | ||
|
||
read_fares_entities.fare_transfer_rules(gtfs_root_dir, gtfs, results) | ||
|
||
if len(unused_timeframes): | ||
warning_info = 'Unused timeframes: ' + str(unused_timeframes) | ||
results.add_warning(diagnostics.format(warn.UNUSED_TIMEFRAME_IDS, '', '', warning_info)) | ||
|
||
return results |
Oops, something went wrong.