diff --git a/.python-version b/.python-version index 0833a98..584a914 100644 --- a/.python-version +++ b/.python-version @@ -1 +1 @@ -3.7.4 +3.7.11 diff --git a/License b/LICENSE similarity index 100% rename from License rename to LICENSE diff --git a/__init__.py b/fares_validator/__init__.py similarity index 100% rename from __init__.py rename to fares_validator/__init__.py diff --git a/fares_validator/__main__.py b/fares_validator/__main__.py new file mode 100644 index 0000000..409a56f --- /dev/null +++ b/fares_validator/__main__.py @@ -0,0 +1,34 @@ +import argparse +from os import path + +from .loader import run_validator + + +def main(): + parser = argparse.ArgumentParser(description='Validate GTFS fares-v2 data.') + parser.add_argument("-s", "--read-stop-times", help="Scan stop_times for area_ids", action='store_true') + parser.add_argument("-o", "--output-file", type=str, help="Export the errors and warnings to a file") + parser.add_argument("input_gtfs_folder", type=str, help="Path to unzipped folder containing the Fares-v2 GTFS") + + args = parser.parse_args() + + gtfs_path = args.input_gtfs_folder + if not path.isdir(gtfs_path): + raise Exception('Input path is not a valid folder.') + + read_stop_times = args.read_stop_times + results = run_validator(gtfs_path, read_stop_times) + output = results.to_string() + + if args.output_file: + try: + f = open(args.output_file, 'w') + f.write(output) + except Exception: + raise Exception('Writing to output file failed. Please ensure the output file path is valid.') + else: + print(output) + + +if __name__ == '__main__': + main() diff --git a/src/decimals_by_currency.py b/fares_validator/decimals_by_currency.py similarity index 99% rename from src/decimals_by_currency.py rename to fares_validator/decimals_by_currency.py index ac41121..afcf00d 100644 --- a/src/decimals_by_currency.py +++ b/fares_validator/decimals_by_currency.py @@ -139,4 +139,4 @@ 'YER': 2, 'ZAR': 2, 'ZMW': 2, -} \ No newline at end of file +} diff --git a/fares_validator/diagnostics.py b/fares_validator/diagnostics.py new file mode 100644 index 0000000..38b5ce8 --- /dev/null +++ b/fares_validator/diagnostics.py @@ -0,0 +1,44 @@ +def format(code, line_context='', path='', extra_info=''): + msg = '' + if path: + msg += path + ': ' + + msg += code + + if extra_info: + msg += '\n' + extra_info + + msg += line_context + return msg + + +class Diagnostics: + def __init__(self): + self.errors = [] + self.warnings = [] + + def add_warning(self, message): + self.warnings.append(message) + + def add_error(self, message): + self.errors.append(message) + + def to_string(self): + output = '' + if len(self.errors): + output += 'ERRORS:\n' + + for error in self.errors: + output += f'\n{error}\n' + else: + output += 'No errors detected.\n' + + if len(self.warnings): + output += '\n\nWARNINGS:\n' + + for warning in self.warnings: + output += f'\n{warning}\n' + else: + output += '\n\nNo warnings to report.' + + return output diff --git a/src/errors.py b/fares_validator/errors.py similarity index 96% rename from src/errors.py rename to fares_validator/errors.py index 13285a7..240668a 100644 --- a/src/errors.py +++ b/fares_validator/errors.py @@ -101,14 +101,3 @@ SPANNING_LIMIT_WITH_TRANSFER_ID = 'An entry in fare_transfer_rules.txt has spanning_limit with transfer_id defined.' TRANSFER_ID_WITHOUT_TRANSFER_SEQUENCE = 'A transfer_id in fare_transfer_rules.txt is defined without a transfer_sequence.' TRANSFER_SEQUENCE_WITHOUT_TRANSFER_ID = 'A transfer_sequence in fare_transfer_rules.txt is defined without a transfer_id.' - -def add_error(error, line_num_error_msg, errors, path='', extra_info=''): - error_msg = '' - if path: - error_msg += path + ': ' - error_msg += error - if extra_info: - error_msg += '\n' + extra_info - error_msg += line_num_error_msg - - errors.append(error_msg) \ No newline at end of file diff --git a/fares_validator/fare_leg_rule_checkers.py b/fares_validator/fare_leg_rule_checkers.py new file mode 100644 index 0000000..b84d0ac --- /dev/null +++ b/fares_validator/fare_leg_rule_checkers.py @@ -0,0 +1,50 @@ +from . import utils +from .errors import * + + +def check_areas(line, areas, unused_areas): + if line.is_symmetrical and line.is_symmetrical not in {'0', '1'}: + line.add_error(INVALID_IS_SYMMETRICAL_LEG_RULES) + + if line.contains_area_id and (not line.from_area_id and not line.to_area_id): + line.add_error(CONTAINS_AREA_WITHOUT_FROM_TO_AREA) + + if (line.from_area_id or line.to_area_id) and not line.is_symmetrical: + line.add_error(AREA_WITHOUT_IS_SYMMETRICAL) + + if (not line.from_area_id and not line.to_area_id) and line.is_symmetrical: + line.add_error(IS_SYMMETRICAL_WITHOUT_FROM_TO_AREA) + + if line.from_area_id and line.from_area_id in unused_areas: + unused_areas.remove(line.from_area_id) + if line.to_area_id and line.to_area_id in unused_areas: + unused_areas.remove(line.to_area_id) + + utils.check_linked_id(line, 'from_area_id', areas) + utils.check_linked_id(line, 'to_area_id', areas) + utils.check_linked_id(line, 'contains_area_id', areas) + + +def check_distances(line): + if line.distance_type and line.distance_type not in {'0', '1'}: + line.add_error(INVALID_DISTANCE_TYPE) + + if line.min_distance: + try: + dist = float(line.min_distance) + if dist < 0: + line.add_error(NEGATIVE_MIN_DISTANCE) + except ValueError: + line.add_error(INVALID_MIN_DISTANCE) + if line.max_distance: + try: + dist = float(line.max_distance) + if dist < 0: + line.add_error(NEGATIVE_MAX_DISTANCE) + except ValueError: + line.add_error(INVALID_MAX_DISTANCE) + + if (line.min_distance or line.max_distance) and not line.distance_type: + line.add_error(DISTANCE_WITHOUT_DISTANCE_TYPE) + if (not line.min_distance and not line.max_distance) and line.distance_type: + line.add_error(DISTANCE_TYPE_WITHOUT_DISTANCE) diff --git a/fares_validator/fare_product_checkers.py b/fares_validator/fare_product_checkers.py new file mode 100644 index 0000000..b753d27 --- /dev/null +++ b/fares_validator/fare_product_checkers.py @@ -0,0 +1,89 @@ +from .errors import * +from .warnings import * + + +class LinkedEntities: + def __init__(self): + self.rider_category_ids = set() + self.fare_container_ids = set() + + +def check_linked_fp_entities(line, rider_categories, rider_category_by_fare_container, linked_entities_by_fare_product): + linked_entities = linked_entities_by_fare_product.setdefault(line.fare_product_id, LinkedEntities()) + + if line.rider_category_id: + linked_entities.rider_category_ids.add(line.rider_category_id) + if line.rider_category_id not in rider_categories: + line.add_error(NONEXISTENT_RIDER_CATEGORY_ID) + + if line.fare_container_id: + linked_entities.fare_container_ids.add(line.fare_container_id) + if line.fare_container_id not in rider_category_by_fare_container: + line.add_error(NONEXISTENT_FARE_CONTAINER_ID) + + fare_container_rider_cat = rider_category_by_fare_container.get(line.fare_container_id) + if line.rider_category_id and fare_container_rider_cat and (line.rider_category_id != fare_container_rider_cat): + line.add_error(CONFLICTING_RIDER_CATEGORY_ON_FARE_CONTAINER) + + linked_entities_by_fare_product[line.fare_product_id] = linked_entities + + +def check_bundle(line): + if line.bundle_amount: + try: + bundle_amt = int(line.bundle_amount) + if bundle_amt < 0: + line.add_error(INVALID_BUNDLE_AMOUNT) + except ValueError: + line.add_error(INVALID_BUNDLE_AMOUNT) + + +def check_durations_and_offsets(line): + if line.duration_start and line.duration_start not in {'0', '1'}: + line.add_error(INVALID_DURATION_START) + + if line.duration_unit and line.duration_unit not in {'0', '1', '2', '3', '4', '5', '6'}: + line.add_error(INVALID_DURATION_UNIT) + + if line.duration_type and line.duration_type not in {'1', '2'}: + line.add_error(INVALID_DURATION_TYPE) + + if line.duration_type == '1' and line.duration_start: + line.add_error(DURATION_START_WITH_DURATION_TYPE) + + if line.duration_amount: + try: + amt = int(line.duration_amount) + if amt < 1: + line.add_error(NEGATIVE_OR_ZERO_DURATION) + except ValueError: + line.add_error(NON_INT_DURATION_AMOUNT) + + if not line.duration_unit: + line.add_error(DURATION_WITHOUT_UNIT) + + if not line.duration_type: + line.add_error(DURATION_WITHOUT_TYPE) + else: + if line.duration_type: + line.add_error(DURATION_TYPE_WITHOUT_AMOUNT) + if line.duration_unit: + line.add_error(DURATION_UNIT_WITHOUT_AMOUNT) + + if line.offset_unit and line.offset_unit not in {'0', '1', '2', '3', '4', '5', '6'}: + line.add_error(INVALID_OFFSET_UNIT) + + if line.offset_amount: + try: + amt = int(line.offset_amount) + except ValueError: + line.add_error(NON_INT_OFFSET_AMOUNT) + + if line.duration_type == '2': + line.add_error(OFFSET_AMOUNT_WITH_DURATION_TYPE) + + if not line.offset_unit: + line.add_warning(OFFSET_AMOUNT_WITHOUT_OFFSET_UNIT) + else: + if line.offset_unit: + line.add_error(OFFSET_UNIT_WITHOUT_AMOUNT) diff --git a/fares_validator/fare_transfer_rule_checkers.py b/fares_validator/fare_transfer_rule_checkers.py new file mode 100644 index 0000000..ee3efa9 --- /dev/null +++ b/fares_validator/fare_transfer_rule_checkers.py @@ -0,0 +1,65 @@ +from .errors import * + + +def check_leg_groups(line, leg_group_ids, unused_leg_groups): + if line.is_symmetrical and line.is_symmetrical not in {'0', '1'}: + line.add_error(INVALID_IS_SYMMETRICAL_TRANSFER_RULES) + if (line.from_leg_group_id or line.to_leg_group_id) and not line.is_symmetrical: + line.add_error(LEG_GROUP_WITHOUT_IS_SYMMETRICAL) + if (not line.from_leg_group_id and not line.to_leg_group_id) and line.is_symmetrical: + line.add_error(IS_SYMMETRICAL_WITHOUT_FROM_TO_LEG_GROUP) + if line.from_leg_group_id and not line.from_leg_group_id in leg_group_ids: + line.add_error(INVALID_FROM_LEG_GROUP) + if line.to_leg_group_id and not line.to_leg_group_id in leg_group_ids: + line.add_error(INVALID_TO_LEG_GROUP) + + if line.from_leg_group_id in unused_leg_groups: + unused_leg_groups.remove(line.from_leg_group_id) + if line.to_leg_group_id in unused_leg_groups: + unused_leg_groups.remove(line.to_leg_group_id) + + +def check_spans_and_transfer_ids(line): + if line.spanning_limit: + if line.from_leg_group_id != line.to_leg_group_id: + line.add_error(SPANNING_LIMIT_WITH_BAD_LEGS) + if line.transfer_id: + line.add_error(SPANNING_LIMIT_WITH_TRANSFER_ID) + try: + limit = int(line.spanning_limit) + if limit <= 1: + line.add_error(INVALID_SPANNING_LIMIT) + except ValueError: + line.add_error(INVALID_SPANNING_LIMIT) + + if line.transfer_id: + if not line.transfer_sequence: + line.add_error(TRANSFER_ID_WITHOUT_TRANSFER_SEQUENCE) + + if line.transfer_sequence: + if not line.transfer_id: + line.add_error(TRANSFER_SEQUENCE_WITHOUT_TRANSFER_ID) + try: + seq = int(line.transfer_sequence) + if seq < 1: + line.add_error(INVALID_TRANSFER_SEQUENCE) + except ValueError: + line.add_error(INVALID_TRANSFER_SEQUENCE) + + +def check_durations(line): + if line.duration_limit_type and line.duration_limit_type not in {'0', '1', '2', '3'}: + line.add_error(INVALID_DURATION_LIMIT_TYPE) + + if line.duration_limit: + if not line.duration_limit_type: + line.add_error(DURATION_LIMIT_WITHOUT_LIMIT_TYPE) + try: + limit = int(line.duration_limit) + if limit < 1: + line.add_error(INVALID_DURATION_LIMIT) + except ValueError: + line.add_error(INVALID_DURATION_LIMIT) + else: + if line.duration_limit_type: + line.add_error(DURATION_LIMIT_TYPE_WITHOUT_DURATION) diff --git a/fares_validator/loader.py b/fares_validator/loader.py new file mode 100644 index 0000000..7bac5af --- /dev/null +++ b/fares_validator/loader.py @@ -0,0 +1,49 @@ +from pathlib import Path + +from . import read_gtfs_entities, read_fares_entities, diagnostics +from . import warnings as warn + + +class Entities: + # Can eventually list the known types here for a typechecker like mypy + pass + + +def run_validator(gtfs_root_dir, should_read_stop_times): + gtfs_root_dir = Path(gtfs_root_dir) + results = diagnostics.Diagnostics() + + gtfs = Entities() + + gtfs.areas = read_fares_entities.areas(gtfs_root_dir, results) + + gtfs.networks = read_gtfs_entities.networks(gtfs_root_dir, results) + + read_gtfs_entities.verify_stop_area_linkage(gtfs_root_dir, gtfs.areas, results, should_read_stop_times) + + gtfs.service_ids = read_gtfs_entities.service_ids(gtfs_root_dir, results) + + gtfs.timeframe_ids = read_fares_entities.timeframes(gtfs_root_dir, results) + unused_timeframes = gtfs.timeframe_ids.copy() + + gtfs.rider_category_ids = read_fares_entities.rider_categories(gtfs_root_dir, results) + + gtfs.rider_category_by_fare_container = read_fares_entities.fare_containers(gtfs_root_dir, + gtfs.rider_category_ids, + results) + + gtfs.linked_entities_by_fare_product = read_fares_entities.fare_products(gtfs_root_dir, + gtfs, + unused_timeframes, + results) + + gtfs.leg_group_ids = read_fares_entities.fare_leg_rules(gtfs_root_dir, gtfs, + unused_timeframes, results) + + read_fares_entities.fare_transfer_rules(gtfs_root_dir, gtfs, results) + + if len(unused_timeframes): + warning_info = 'Unused timeframes: ' + str(unused_timeframes) + results.add_warning(diagnostics.format(warn.UNUSED_TIMEFRAME_IDS, '', '', warning_info)) + + return results diff --git a/fares_validator/read_fares_entities.py b/fares_validator/read_fares_entities.py new file mode 100644 index 0000000..b7f00b3 --- /dev/null +++ b/fares_validator/read_fares_entities.py @@ -0,0 +1,281 @@ +# Reads files introduced as part of the GTFS fares-v2 specification + +import re + +from . import schema, diagnostics +from .errors import * +from .fare_leg_rule_checkers import check_areas, check_distances +from .fare_product_checkers import check_linked_fp_entities, check_bundle, check_durations_and_offsets +from .fare_transfer_rule_checkers import check_leg_groups, check_spans_and_transfer_ids, check_durations +from .utils import check_fare_amount, read_csv_file, check_linked_id, check_amts, check_linked_flr_ftr_entities +from .warnings import * + + +def areas(gtfs_root_dir, messages): + greater_area_id_by_area_id = {} + + for line in read_csv_file(gtfs_root_dir, schema.AREAS, messages): + if line.area_id in greater_area_id_by_area_id: + line.add_error(DUPLICATE_AREA_ID) + continue + + if not line.area_id: + line.add_error(EMPTY_AREA_ID) + continue + + greater_area_id_by_area_id[line.area_id] = line.greater_area_id + + for area_id in greater_area_id_by_area_id: + greater_area_id = greater_area_id_by_area_id[area_id] + + while greater_area_id: + if greater_area_id == area_id: + messages.add_error(diagnostics.format(GREATER_AREA_ID_LOOP, '', '', f'area_id: {area_id}')) + break + + if greater_area_id not in greater_area_id_by_area_id: + messages.add_error(diagnostics.format(UNDEFINED_GREATER_AREA_ID, '', '', + f'greater_area_id: {greater_area_id}')) + break + + greater_area_id = greater_area_id_by_area_id[greater_area_id] + + return set(greater_area_id_by_area_id.keys()) + + +def timeframes(gtfs_root_dir, messages): + timeframes = set() + for line in read_csv_file(gtfs_root_dir, schema.TIMEFRAMES, + messages): + if not line.timeframe_id: + line.add_error(EMPTY_TIMEFRAME_ID) + continue + if not line.start_time: + line.add_error(EMPTY_START_TIME) + continue + if not line.end_time: + line.add_error(EMPTY_END_TIME) + continue + + starttimematch = re.search(r'^\d?\d:\d\d:\d\d$', line.start_time) + endtimematch = re.search(r'^\d?\d:\d\d:\d\d$', line.end_time) + + if not starttimematch or not endtimematch: + line.add_error(INVALID_TIME_FORMAT) + timeframes.add(line.timeframe_id) + continue + + starttime_split = line.start_time.split(':') + endtime_split = line.end_time.split(':') + + if int(starttime_split[0]) > 23 or int(endtime_split[0]) > 23: + line.add_error(INVALID_TIME_FORMAT) + + if int(starttime_split[1]) > 59 or int(endtime_split[1]) > 59: + line.add_error(INVALID_TIME_FORMAT) + + if int(starttime_split[2]) > 59 or int(endtime_split[2]) > 59: + line.add_error(INVALID_TIME_FORMAT) + + timeframes.add(line.timeframe_id) + + return timeframes + + +def rider_categories(gtfs_root_dir, messages): + rider_categories = set() + for line in read_csv_file(gtfs_root_dir, + schema.RIDER_CATEGORIES, messages): + min_age_int = 0 + if not line.rider_category_id: + line.add_error(EMPTY_RIDER_CATEGORY_ID) + continue + + rider_categories.add(line.rider_category_id) + + if line.min_age: + try: + min_age_int = int(line.min_age) + if min_age_int < 0: + line.add_error(NEGATIVE_MIN_AGE) + if min_age_int > 100: + line.add_warning(VERY_LARGE_MIN_AGE) + except ValueError: + line.add_error(NON_INT_MIN_AGE) + + if line.max_age: + try: + max_age_int = int(line.max_age) + if max_age_int < 0: + line.add_error(NEGATIVE_MAX_AGE) + if max_age_int > 100: + line.add_warning(VERY_LARGE_MAX_AGE) + if max_age_int <= min_age_int: + line.add_warning(MAX_AGE_LESS_THAN_MIN_AGE) + except ValueError: + line.add_error(NON_INT_MAX_AGE) + + return rider_categories + + +def fare_containers(gtfs_root_dir, rider_categories, messages): + rider_category_by_fare_container = {} + + for line in read_csv_file(gtfs_root_dir, + schema.FARE_CONTAINERS, messages): + if not line.fare_container_id: + line.add_error(EMPTY_FARE_CONTAINER_ID) + continue + + if not line.fare_container_name: + line.add_error(EMPTY_FARE_CONTAINER_NAME) + continue + + amount_exists = check_fare_amount(line, 'amount', 'currency') + min_purchase_exists = check_fare_amount(line, 'minimum_initial_purchase', 'currency') + if (not amount_exists and not min_purchase_exists) and line.currency: + line.add_error(CURRENCY_WITHOUT_AMOUNT) + + if line.fare_container_id in rider_category_by_fare_container: + line.add_error(DUPLICATE_FARE_CONTAINER_ID) + continue + + if line.rider_category_id: + if line.rider_category_id not in rider_categories: + line.add_error(NONEXISTENT_RIDER_CATEGORY_ID) + + rider_category_by_fare_container[line.fare_container_id] = line.rider_category_id + + return rider_category_by_fare_container + + +def fare_products(gtfs_root_dir, gtfs, unused_timeframes, messages): + linked_entities_by_fare_product = {} + + fare_products_path = gtfs_root_dir / 'fare_products.txt' + + for line in read_csv_file(gtfs_root_dir, schema.FARE_PRODUCTS, messages): + if not line.fare_product_id: + line.add_error(EMPTY_FARE_PRODUCT_ID) + continue + if not line.fare_product_name: + line.add_error(EMPTY_FARE_PRODUCT_NAME) + continue + + check_linked_fp_entities(line, gtfs.rider_category_ids, gtfs.rider_category_by_fare_container, + linked_entities_by_fare_product) + + min_amt_exists = check_fare_amount(line, 'min_amount', 'currency') + max_amt_exists = check_fare_amount(line, 'max_amount', 'currency') + amt_exists = check_fare_amount(line, 'amount', 'currency') + if (not min_amt_exists and not max_amt_exists and not amt_exists) and line.currency: + line.add_error(CURRENCY_WITHOUT_AMOUNT) + + check_amts(fare_products_path, line, min_amt_exists, max_amt_exists, amt_exists) + + check_bundle(line) + check_linked_id(line, 'service_id', gtfs.service_ids) + timeframe_exists = check_linked_id(line, 'timeframe_id', gtfs.timeframe_ids) + + if line.timeframe_id in unused_timeframes: + unused_timeframes.remove(line.timeframe_id) + if timeframe_exists: + if line.timeframe_type not in {'0', '1'}: + line.add_error(INVALID_TIMEFRAME_TYPE) + else: + if line.timeframe_type: + line.add_error(TIMEFRAME_TYPE_WITHOUT_TIMEFRAME) + + check_durations_and_offsets(line) + + return linked_entities_by_fare_product + + +def fare_leg_rules(gtfs_root_dir, gtfs, unused_timeframes, messages): + leg_group_ids = set() + + unused_areas = gtfs.areas.copy() + unused_networks = gtfs.networks.copy() + fare_leg_rules_path = gtfs_root_dir / 'fare_leg_rules.txt' + + if not fare_leg_rules_path.exists(): + messages.add_warning(diagnostics.format(NO_FARE_LEG_RULES, '')) + + for line in read_csv_file(gtfs_root_dir, schema.FARE_LEG_RULES, messages): + if line.leg_group_id: + leg_group_ids.add(line.leg_group_id) + + check_areas(line, gtfs.areas, unused_areas) + + check_linked_id(line, 'network_id', gtfs.networks) + if line.network_id in unused_networks: + unused_networks.remove(line.network_id) + + check_linked_id(line, 'from_timeframe_id', gtfs.timeframe_ids) + if line.from_timeframe_id in unused_timeframes: + unused_timeframes.remove(line.from_timeframe_id) + check_linked_id(line, 'to_timeframe_id', gtfs.timeframe_ids) + if line.to_timeframe_id in unused_timeframes: + unused_timeframes.remove(line.to_timeframe_id) + + check_linked_id(line, 'service_id', gtfs.service_ids) + + check_distances(line) + + min_amt_exists = check_fare_amount(line, 'min_amount', 'currency') + max_amt_exists = check_fare_amount(line, 'max_amount', 'currency') + amt_exists = check_fare_amount(line, 'amount', 'currency') + if (not min_amt_exists and not max_amt_exists and not amt_exists) and line.currency: + line.add_error(CURRENCY_WITHOUT_AMOUNT) + check_amts(fare_leg_rules_path, line, min_amt_exists, max_amt_exists, amt_exists) + if (min_amt_exists or max_amt_exists or amt_exists) and line.fare_product_id: + line.add_error(AMOUNT_WITH_FARE_PRODUCT) + + if line.fare_leg_name and line.fare_product_id: + line.add_error(FARE_LEG_NAME_WITH_FARE_PRODUCT) + + check_linked_flr_ftr_entities(line, gtfs.rider_category_ids, gtfs.rider_category_by_fare_container, + gtfs.linked_entities_by_fare_product) + + if len(unused_areas): + messages.add_warning(diagnostics.format(UNUSED_AREA_IDS, '', '', f'Unused areas: {unused_areas}')) + + if len(unused_networks): + messages.add_warning(diagnostics.format(UNUSED_NETWORK_IDS, '', '', f'Unused networks: {unused_networks}')) + + return leg_group_ids + + +def fare_transfer_rules(gtfs_root_dir, gtfs, messages): + unused_leg_groups = gtfs.leg_group_ids.copy() + fare_transfer_rules_path = gtfs_root_dir / 'fare_transfer_rules.txt' + + if not fare_transfer_rules_path.exists(): + messages.add_warning(diagnostics.format(NO_FARE_TRANSFER_RULES, '')) + + for line in read_csv_file(gtfs_root_dir, schema.FARE_TRANSFER_RULES, messages): + check_leg_groups(line, gtfs.leg_group_ids, unused_leg_groups) + check_spans_and_transfer_ids(line) + check_durations(line) + + min_amt_exists = check_fare_amount(line, 'min_amount', 'currency', ) + max_amt_exists = check_fare_amount(line, 'max_amount', 'currency') + amt_exists = check_fare_amount(line, 'amount', 'currency') + if (not min_amt_exists and not max_amt_exists and not amt_exists) and line.currency: + line.add_error(CURRENCY_WITHOUT_AMOUNT) + + check_amts(fare_transfer_rules_path, line, min_amt_exists, max_amt_exists, amt_exists) + + if (min_amt_exists or max_amt_exists or amt_exists) and not line.fare_transfer_type: + line.add_error(AMOUNT_WITHOUT_FARE_TRANSFER_TYPE) + if (not min_amt_exists and not max_amt_exists and not amt_exists) and line.fare_transfer_type: + line.add_error(FARE_TRANSFER_TYPE_WITHOUT_AMOUNT) + if line.fare_transfer_type and (line.fare_transfer_type not in {'0', '1', '2', '3'}): + line.add_error(INVALID_FARE_TRANSFER_TYPE) + + check_linked_flr_ftr_entities(line, gtfs.rider_category_ids, + gtfs.rider_category_by_fare_container, gtfs.linked_entities_by_fare_product) + + if len(unused_leg_groups): + messages.add_warning(diagnostics.format(UNUSED_LEG_GROUPS, '', '', + f'Unused leg groups: {unused_leg_groups}')) diff --git a/fares_validator/read_gtfs_entities.py b/fares_validator/read_gtfs_entities.py new file mode 100644 index 0000000..b7a7809 --- /dev/null +++ b/fares_validator/read_gtfs_entities.py @@ -0,0 +1,61 @@ +""" +Reads files introduced as part of the original GTFS specification +""" + +from . import diagnostics, utils, schema +from .errors import * +from .warnings import * + + +def networks(gtfs_root_dir, messages): + networks = set() + + for line in utils.read_csv_file(gtfs_root_dir, schema.ROUTES, messages): + if line.network_id: + networks.add(line.network_id) + + return networks + + +def verify_stop_area_linkage(gtfs_root_dir, areas, messages, should_read_stop_times): + stops_path = gtfs_root_dir / 'stops.txt' + stop_times_path = gtfs_root_dir / 'stop_times.txt' + + unused_areas = areas.copy() + + if stops_path.exists(): + utils.check_areas_of_file(stops_path, 'stop', areas, unused_areas, messages) + else: + messages.add_warning(diagnostics.format(NO_STOPS, '')) + + if should_read_stop_times and stop_times_path.exists(): + utils.check_areas_of_file(stop_times_path, 'stop_time', areas, unused_areas, messages) + + if len(unused_areas): + messages.add_warning(diagnostics.format(UNUSED_AREAS_IN_STOPS, '', '', f'Unused areas: {unused_areas}')) + + +def service_ids(gtfs_root_dir, messages): + service_ids = set() + if not (gtfs_root_dir / 'calendar.txt').exists() and not (gtfs_root_dir / 'calendar_dates.txt').exists(): + messages.add_warning(diagnostics.format(NO_SERVICE_IDS, '')) + return service_ids + + for line in utils.read_csv_file(gtfs_root_dir, schema.CALENDAR, messages): + if not line.service_id: + line.add_error(EMPTY_SERVICE_ID_CALENDAR) + continue + + if line.service_id in service_ids: + line.add_error(DUPLICATE_SERVICE_ID, f'service_id: {line.service_id}') + + service_ids.add(line.service_id) + + for line in utils.read_csv_file(gtfs_root_dir, schema.CALENDAR_DATES, messages): + if not line.service_id: + line.add_error(EMPTY_SERVICE_ID_CALENDAR_DATES) + continue + + service_ids.add(line.service_id) + + return service_ids diff --git a/src/expected_fields.py b/fares_validator/schema.py similarity index 54% rename from src/expected_fields.py rename to fares_validator/schema.py index 7057e5e..6481336 100644 --- a/src/expected_fields.py +++ b/fares_validator/schema.py @@ -1,33 +1,46 @@ -EXPECTED_AREAS_FIELDS = [ - 'area_id', - 'area_name', - 'greater_area_id' -] +from . import warnings +from .utils import Schema -EXPECTED_TIMEFRAMES_FIELDS = [ +AREAS = Schema('areas.txt', + required_fields={'area_id'}, + defined_fields={ + 'area_id', + 'area_name', + 'greater_area_id' + }, message_if_missing=warnings.NO_AREAS) + +ROUTES = Schema('routes.txt', set(), {'network_id'}, + message_if_missing=warnings.NO_ROUTES, + suppress_undefined_field_warning=True) + +CALENDAR = Schema('calendar.txt', {'service_id'}, set()) + +CALENDAR_DATES = Schema('calendar_dates.txt', {'service_id'}, set()) + +TIMEFRAMES = Schema('timeframes.txt', {'timeframe_id', 'start_time', 'end_time'}, { 'timeframe_id', 'start_time', 'end_time' -] +}, message_if_missing=warnings.NO_TIMEFRAMES) -EXPECTED_RIDER_CATEGORIES_FIELDS = [ +RIDER_CATEGORIES = Schema('rider_categories.txt', {'rider_category_id'}, { 'rider_category_id', 'min_age', 'max_age', 'rider_category_name', 'eligibility_url' -] +}, message_if_missing=warnings.NO_RIDER_CATEGORIES) -EXPECTED_FARE_CONTAINERS_FIELDS = [ +FARE_CONTAINERS = Schema('fare_containers.txt', {'fare_container_id', 'fare_container_name'}, { 'fare_container_id', 'fare_container_name', 'minimum_initial_purchase', 'amount', 'currency', 'rider_category_id' -] +}, message_if_missing=warnings.NO_FARE_CONTAINERS) -EXPECTED_FARE_PRODUCTS_FIELDS = [ +FARE_PRODUCTS = Schema('fare_products.txt', {'fare_product_id', 'fare_product_name'}, { 'fare_product_id', 'fare_product_name', 'rider_category_id', @@ -48,9 +61,9 @@ 'min_amount', 'max_amount', 'currency' -] +}, message_if_missing=warnings.NO_FARE_PRODUCTS) -EXPECTED_FARE_LEG_RULES_FIELDS = [ +FARE_LEG_RULES = Schema('fare_leg_rules.txt', set(), { 'leg_group_id', 'fare_leg_name', 'network_id', @@ -72,9 +85,9 @@ 'fare_container_id', 'rider_category_id', 'eligible_cap_id' -] +}) -EXPECTED_FARE_TRANSFER_RULES_FIELDS = [ +FARE_TRANSFER_RULES = Schema('fare_transfer_rules.txt', set(), { 'from_leg_group_id', 'to_leg_group_id', 'is_symmetrical', @@ -92,4 +105,4 @@ 'fare_container_id', 'rider_category_id', 'eligible_cap_id' -] \ No newline at end of file +}) diff --git a/src/__init__.py b/fares_validator/tests/__init__.py similarity index 100% rename from src/__init__.py rename to fares_validator/tests/__init__.py diff --git a/tests/test_data/bad_fare_leg_rules/areas.txt b/fares_validator/tests/test_data/bad_fare_leg_rules/areas.txt similarity index 100% rename from tests/test_data/bad_fare_leg_rules/areas.txt rename to fares_validator/tests/test_data/bad_fare_leg_rules/areas.txt diff --git a/tests/test_data/bad_fare_leg_rules/fare_containers.txt b/fares_validator/tests/test_data/bad_fare_leg_rules/fare_containers.txt similarity index 100% rename from tests/test_data/bad_fare_leg_rules/fare_containers.txt rename to fares_validator/tests/test_data/bad_fare_leg_rules/fare_containers.txt diff --git a/tests/test_data/bad_fare_leg_rules/fare_leg_rules.txt b/fares_validator/tests/test_data/bad_fare_leg_rules/fare_leg_rules.txt similarity index 100% rename from tests/test_data/bad_fare_leg_rules/fare_leg_rules.txt rename to fares_validator/tests/test_data/bad_fare_leg_rules/fare_leg_rules.txt diff --git a/tests/test_data/bad_fare_leg_rules/fare_products.txt b/fares_validator/tests/test_data/bad_fare_leg_rules/fare_products.txt similarity index 100% rename from tests/test_data/bad_fare_leg_rules/fare_products.txt rename to fares_validator/tests/test_data/bad_fare_leg_rules/fare_products.txt diff --git a/tests/test_data/bad_fare_leg_rules/rider_categories.txt b/fares_validator/tests/test_data/bad_fare_leg_rules/rider_categories.txt similarity index 100% rename from tests/test_data/bad_fare_leg_rules/rider_categories.txt rename to fares_validator/tests/test_data/bad_fare_leg_rules/rider_categories.txt diff --git a/tests/test_data/bad_fare_products/fare_products.txt b/fares_validator/tests/test_data/bad_fare_products/fare_products.txt similarity index 100% rename from tests/test_data/bad_fare_products/fare_products.txt rename to fares_validator/tests/test_data/bad_fare_products/fare_products.txt diff --git a/tests/test_data/bad_fare_products/timeframes.txt b/fares_validator/tests/test_data/bad_fare_products/timeframes.txt similarity index 100% rename from tests/test_data/bad_fare_products/timeframes.txt rename to fares_validator/tests/test_data/bad_fare_products/timeframes.txt diff --git a/tests/test_data/bad_fare_transfer_rules/fare_containers.txt b/fares_validator/tests/test_data/bad_fare_transfer_rules/fare_containers.txt similarity index 100% rename from tests/test_data/bad_fare_transfer_rules/fare_containers.txt rename to fares_validator/tests/test_data/bad_fare_transfer_rules/fare_containers.txt diff --git a/tests/test_data/bad_fare_transfer_rules/fare_leg_rules.txt b/fares_validator/tests/test_data/bad_fare_transfer_rules/fare_leg_rules.txt similarity index 100% rename from tests/test_data/bad_fare_transfer_rules/fare_leg_rules.txt rename to fares_validator/tests/test_data/bad_fare_transfer_rules/fare_leg_rules.txt diff --git a/tests/test_data/bad_fare_transfer_rules/fare_products.txt b/fares_validator/tests/test_data/bad_fare_transfer_rules/fare_products.txt similarity index 100% rename from tests/test_data/bad_fare_transfer_rules/fare_products.txt rename to fares_validator/tests/test_data/bad_fare_transfer_rules/fare_products.txt diff --git a/tests/test_data/bad_fare_transfer_rules/fare_transfer_rules.txt b/fares_validator/tests/test_data/bad_fare_transfer_rules/fare_transfer_rules.txt similarity index 100% rename from tests/test_data/bad_fare_transfer_rules/fare_transfer_rules.txt rename to fares_validator/tests/test_data/bad_fare_transfer_rules/fare_transfer_rules.txt diff --git a/tests/test_data/bad_fare_transfer_rules/rider_categories.txt b/fares_validator/tests/test_data/bad_fare_transfer_rules/rider_categories.txt similarity index 100% rename from tests/test_data/bad_fare_transfer_rules/rider_categories.txt rename to fares_validator/tests/test_data/bad_fare_transfer_rules/rider_categories.txt diff --git a/tests/test_data/bad_gtfs_simple/areas.txt b/fares_validator/tests/test_data/bad_gtfs_simple/areas.txt similarity index 100% rename from tests/test_data/bad_gtfs_simple/areas.txt rename to fares_validator/tests/test_data/bad_gtfs_simple/areas.txt diff --git a/tests/test_data/bad_gtfs_simple/calendar.txt b/fares_validator/tests/test_data/bad_gtfs_simple/calendar.txt similarity index 100% rename from tests/test_data/bad_gtfs_simple/calendar.txt rename to fares_validator/tests/test_data/bad_gtfs_simple/calendar.txt diff --git a/tests/test_data/bad_gtfs_simple/calendar_dates.txt b/fares_validator/tests/test_data/bad_gtfs_simple/calendar_dates.txt similarity index 100% rename from tests/test_data/bad_gtfs_simple/calendar_dates.txt rename to fares_validator/tests/test_data/bad_gtfs_simple/calendar_dates.txt diff --git a/tests/test_data/bad_gtfs_simple/fare_containers.txt b/fares_validator/tests/test_data/bad_gtfs_simple/fare_containers.txt similarity index 100% rename from tests/test_data/bad_gtfs_simple/fare_containers.txt rename to fares_validator/tests/test_data/bad_gtfs_simple/fare_containers.txt diff --git a/tests/test_data/bad_gtfs_simple/rider_categories.txt b/fares_validator/tests/test_data/bad_gtfs_simple/rider_categories.txt similarity index 100% rename from tests/test_data/bad_gtfs_simple/rider_categories.txt rename to fares_validator/tests/test_data/bad_gtfs_simple/rider_categories.txt diff --git a/tests/test_data/bad_gtfs_simple/stop_times.txt b/fares_validator/tests/test_data/bad_gtfs_simple/stop_times.txt similarity index 100% rename from tests/test_data/bad_gtfs_simple/stop_times.txt rename to fares_validator/tests/test_data/bad_gtfs_simple/stop_times.txt diff --git a/tests/test_data/bad_gtfs_simple/stops.txt b/fares_validator/tests/test_data/bad_gtfs_simple/stops.txt similarity index 100% rename from tests/test_data/bad_gtfs_simple/stops.txt rename to fares_validator/tests/test_data/bad_gtfs_simple/stops.txt diff --git a/tests/test_data/bad_gtfs_simple/timeframes.txt b/fares_validator/tests/test_data/bad_gtfs_simple/timeframes.txt similarity index 100% rename from tests/test_data/bad_gtfs_simple/timeframes.txt rename to fares_validator/tests/test_data/bad_gtfs_simple/timeframes.txt diff --git a/tests/test_data/required_fields_test/areas.txt b/fares_validator/tests/test_data/required_fields_test/areas.txt similarity index 100% rename from tests/test_data/required_fields_test/areas.txt rename to fares_validator/tests/test_data/required_fields_test/areas.txt diff --git a/tests/test_data/required_fields_test/calendar.txt b/fares_validator/tests/test_data/required_fields_test/calendar.txt similarity index 100% rename from tests/test_data/required_fields_test/calendar.txt rename to fares_validator/tests/test_data/required_fields_test/calendar.txt diff --git a/tests/test_data/required_fields_test/calendar_dates.txt b/fares_validator/tests/test_data/required_fields_test/calendar_dates.txt similarity index 100% rename from tests/test_data/required_fields_test/calendar_dates.txt rename to fares_validator/tests/test_data/required_fields_test/calendar_dates.txt diff --git a/tests/test_data/required_fields_test/fare_containers.txt b/fares_validator/tests/test_data/required_fields_test/fare_containers.txt similarity index 100% rename from tests/test_data/required_fields_test/fare_containers.txt rename to fares_validator/tests/test_data/required_fields_test/fare_containers.txt diff --git a/tests/test_data/required_fields_test/fare_products.txt b/fares_validator/tests/test_data/required_fields_test/fare_products.txt similarity index 100% rename from tests/test_data/required_fields_test/fare_products.txt rename to fares_validator/tests/test_data/required_fields_test/fare_products.txt diff --git a/tests/test_data/required_fields_test/rider_categories.txt b/fares_validator/tests/test_data/required_fields_test/rider_categories.txt similarity index 100% rename from tests/test_data/required_fields_test/rider_categories.txt rename to fares_validator/tests/test_data/required_fields_test/rider_categories.txt diff --git a/tests/test_data/required_fields_test/timeframes.txt b/fares_validator/tests/test_data/required_fields_test/timeframes.txt similarity index 100% rename from tests/test_data/required_fields_test/timeframes.txt rename to fares_validator/tests/test_data/required_fields_test/timeframes.txt diff --git a/tests/test_data/warnings_test_gtfs/areas.txt b/fares_validator/tests/test_data/warnings_test_gtfs/areas.txt similarity index 100% rename from tests/test_data/warnings_test_gtfs/areas.txt rename to fares_validator/tests/test_data/warnings_test_gtfs/areas.txt diff --git a/tests/test_data/warnings_test_gtfs/calendar.txt b/fares_validator/tests/test_data/warnings_test_gtfs/calendar.txt similarity index 100% rename from tests/test_data/warnings_test_gtfs/calendar.txt rename to fares_validator/tests/test_data/warnings_test_gtfs/calendar.txt diff --git a/tests/test_data/warnings_test_gtfs/fare_containers.txt b/fares_validator/tests/test_data/warnings_test_gtfs/fare_containers.txt similarity index 100% rename from tests/test_data/warnings_test_gtfs/fare_containers.txt rename to fares_validator/tests/test_data/warnings_test_gtfs/fare_containers.txt diff --git a/tests/test_data/warnings_test_gtfs/fare_leg_rules.txt b/fares_validator/tests/test_data/warnings_test_gtfs/fare_leg_rules.txt similarity index 100% rename from tests/test_data/warnings_test_gtfs/fare_leg_rules.txt rename to fares_validator/tests/test_data/warnings_test_gtfs/fare_leg_rules.txt diff --git a/tests/test_data/warnings_test_gtfs/fare_products.txt b/fares_validator/tests/test_data/warnings_test_gtfs/fare_products.txt similarity index 100% rename from tests/test_data/warnings_test_gtfs/fare_products.txt rename to fares_validator/tests/test_data/warnings_test_gtfs/fare_products.txt diff --git a/tests/test_data/warnings_test_gtfs/fare_transfer_rules.txt b/fares_validator/tests/test_data/warnings_test_gtfs/fare_transfer_rules.txt similarity index 100% rename from tests/test_data/warnings_test_gtfs/fare_transfer_rules.txt rename to fares_validator/tests/test_data/warnings_test_gtfs/fare_transfer_rules.txt diff --git a/tests/test_data/warnings_test_gtfs/rider_categories.txt b/fares_validator/tests/test_data/warnings_test_gtfs/rider_categories.txt similarity index 100% rename from tests/test_data/warnings_test_gtfs/rider_categories.txt rename to fares_validator/tests/test_data/warnings_test_gtfs/rider_categories.txt diff --git a/tests/test_data/warnings_test_gtfs/routes.txt b/fares_validator/tests/test_data/warnings_test_gtfs/routes.txt similarity index 100% rename from tests/test_data/warnings_test_gtfs/routes.txt rename to fares_validator/tests/test_data/warnings_test_gtfs/routes.txt diff --git a/tests/test_data/warnings_test_gtfs/stops.txt b/fares_validator/tests/test_data/warnings_test_gtfs/stops.txt similarity index 100% rename from tests/test_data/warnings_test_gtfs/stops.txt rename to fares_validator/tests/test_data/warnings_test_gtfs/stops.txt diff --git a/tests/test_data/warnings_test_gtfs/timeframes.txt b/fares_validator/tests/test_data/warnings_test_gtfs/timeframes.txt similarity index 100% rename from tests/test_data/warnings_test_gtfs/timeframes.txt rename to fares_validator/tests/test_data/warnings_test_gtfs/timeframes.txt diff --git a/fares_validator/tests/test_errors.py b/fares_validator/tests/test_errors.py new file mode 100644 index 0000000..6d2bf6f --- /dev/null +++ b/fares_validator/tests/test_errors.py @@ -0,0 +1,188 @@ +from fares_validator.loader import run_validator +from fares_validator import errors +from pathlib import Path + +test_data_dir = Path(__file__).parent / 'test_data' + +def test_errors_simple_files(): + results = run_validator(test_data_dir / 'bad_gtfs_simple', True) + + # Areas errors + assert errors.DUPLICATE_AREA_ID in results.errors[0] + assert errors.EMPTY_AREA_ID in results.errors[1] + assert errors.GREATER_AREA_ID_LOOP in results.errors[2] + + # Stops errors + assert errors.NONEXISTENT_AREA_ID in results.errors[5] + + # Stop times errors + assert errors.NONEXISTENT_AREA_ID in results.errors[6] + + # Calendar errors + assert errors.EMPTY_SERVICE_ID_CALENDAR in results.errors[7] + assert errors.DUPLICATE_SERVICE_ID in results.errors[8] + + # Calendar dates errors + assert errors.EMPTY_SERVICE_ID_CALENDAR_DATES in results.errors[9] + + # Timeframes errors + assert errors.INVALID_TIME_FORMAT in results.errors[10] + assert errors.INVALID_TIME_FORMAT in results.errors[11] + assert errors.EMPTY_START_TIME in results.errors[12] + assert errors.EMPTY_END_TIME in results.errors[13] + assert errors.EMPTY_TIMEFRAME_ID in results.errors[14] + + # Rider categories errors + assert errors.EMPTY_RIDER_CATEGORY_ID in results.errors[15] + assert errors.NEGATIVE_MIN_AGE in results.errors[16] + assert errors.NEGATIVE_MAX_AGE in results.errors[17] + assert errors.NON_INT_MIN_AGE in results.errors[18] + assert errors.NON_INT_MAX_AGE in results.errors[19] + + # Fare containers errors + assert errors.EMPTY_FARE_CONTAINER_ID in results.errors[20] + assert errors.EMPTY_FARE_CONTAINER_NAME in results.errors[21] + assert errors.NONEXISTENT_RIDER_CATEGORY_ID in results.errors[22] + assert errors.AMOUNT_WITHOUT_CURRENCY in results.errors[23] + assert errors.INVALID_AMOUNT_FORMAT in results.errors[24] + assert errors.AMOUNT_WITHOUT_CURRENCY in results.errors[25] + assert errors.INVALID_AMOUNT_FORMAT in results.errors[26] + assert errors.CURRENCY_WITHOUT_AMOUNT in results.errors[27] + assert errors.DUPLICATE_FARE_CONTAINER_ID in results.errors[28] + + assert len(results.errors) == 29 + +def test_errors_fare_products(): + results = run_validator(test_data_dir / 'bad_fare_products', False) + + assert errors.EMPTY_FARE_PRODUCT_ID in results.errors[0] + assert errors.EMPTY_FARE_PRODUCT_NAME in results.errors[1] + assert errors.MISSING_MIN_OR_MAX_AMOUNT in results.errors[2] + assert errors.AMOUNT_WITH_MIN_OR_MAX_AMOUNT in results.errors[3] + assert errors.AMOUNT_WITHOUT_CURRENCY in results.errors[4] + assert errors.AMOUNT_WITHOUT_CURRENCY in results.errors[5] + assert errors.AMOUNT_WITHOUT_CURRENCY in results.errors[6] # this also is for line 7 of fare products + assert errors.NO_AMOUNT_DEFINED in results.errors[7] + assert errors.FOREIGN_ID_INVALID in results.errors[8] + assert errors.INVALID_TIMEFRAME_TYPE in results.errors[9] + assert errors.INVALID_TIMEFRAME_TYPE in results.errors[10] + assert errors.FOREIGN_ID_INVALID in results.errors[11] + assert errors.TIMEFRAME_TYPE_WITHOUT_TIMEFRAME in results.errors[12] + + assert len(results.errors) == 13 + +def test_errors_fare_leg_rules(): + results = run_validator(test_data_dir / 'bad_fare_leg_rules', False) + + # check areas + assert errors.AREA_WITHOUT_IS_SYMMETRICAL in results.errors[0] + assert errors.CONTAINS_AREA_WITHOUT_FROM_TO_AREA in results.errors[1] + assert errors.IS_SYMMETRICAL_WITHOUT_FROM_TO_AREA in results.errors[2] + assert errors.INVALID_IS_SYMMETRICAL_LEG_RULES in results.errors[3] + assert errors.FOREIGN_ID_INVALID in results.errors[4] + + # check networks + assert errors.FOREIGN_ID_INVALID in results.errors[5] + + # check timeframes + assert errors.FOREIGN_ID_INVALID in results.errors[6] + assert errors.FOREIGN_ID_INVALID in results.errors[7] + + # check service_id + assert errors.FOREIGN_ID_INVALID in results.errors[8] + + # check distances + assert errors.INVALID_MIN_DISTANCE in results.errors[9] + assert errors.INVALID_MAX_DISTANCE in results.errors[10] + assert errors.DISTANCE_WITHOUT_DISTANCE_TYPE in results.errors[11] + assert errors.INVALID_DISTANCE_TYPE in results.errors[12] + assert errors.NEGATIVE_MIN_DISTANCE in results.errors[13] + assert errors.NEGATIVE_MAX_DISTANCE in results.errors[14] + assert errors.DISTANCE_TYPE_WITHOUT_DISTANCE in results.errors[15] + + # check amounts/fare_product/fare_leg_name + assert errors.CURRENCY_WITHOUT_AMOUNT in results.errors[16] + assert errors.AMOUNT_WITH_FARE_PRODUCT in results.errors[17] + assert errors.AMOUNT_WITH_MIN_OR_MAX_AMOUNT in results.errors[18] + assert errors.MISSING_MIN_OR_MAX_AMOUNT in results.errors[19] + assert errors.FARE_LEG_NAME_WITH_FARE_PRODUCT in results.errors[20] + + # check linked entities + assert errors.NONEXISTENT_FARE_PRODUCT_ID in results.errors[21] + assert errors.NONEXISTENT_RIDER_CATEGORY_ID in results.errors[22] + assert errors.NONEXISTENT_FARE_CONTAINER_ID in results.errors[23] + assert errors.CONFLICTING_RIDER_CATEGORY_ON_FARE_PRODUCT in results.errors[24] + assert errors.CONFLICTING_FARE_CONTAINER_ON_FARE_PRODUCT in results.errors[25] + assert errors.CONFLICTING_RIDER_CATEGORY_ON_FARE_CONTAINER in results.errors[26] + + assert len(results.errors) == 27 + +def test_errors_fare_transfer_rules(): + results = run_validator(test_data_dir / 'bad_fare_transfer_rules', False) + + # check leg groups + assert errors.IS_SYMMETRICAL_WITHOUT_FROM_TO_LEG_GROUP in results.errors[0] + assert errors.LEG_GROUP_WITHOUT_IS_SYMMETRICAL in results.errors[1] + assert errors.INVALID_IS_SYMMETRICAL_TRANSFER_RULES in results.errors[2] + assert errors.INVALID_TO_LEG_GROUP in results.errors[3] + assert errors.INVALID_FROM_LEG_GROUP in results.errors[4] + + # check transfer_id and spans + assert errors.SPANNING_LIMIT_WITH_BAD_LEGS in results.errors[5] + assert errors.INVALID_SPANNING_LIMIT in results.errors[6] + assert errors.INVALID_SPANNING_LIMIT in results.errors[7] + assert errors.SPANNING_LIMIT_WITH_TRANSFER_ID in results.errors[8] + assert errors.TRANSFER_ID_WITHOUT_TRANSFER_SEQUENCE in results.errors[9] + assert errors.TRANSFER_SEQUENCE_WITHOUT_TRANSFER_ID in results.errors[10] + assert errors.INVALID_TRANSFER_SEQUENCE in results.errors[11] + assert errors.INVALID_TRANSFER_SEQUENCE in results.errors[12] + + # check durations + assert errors.INVALID_DURATION_LIMIT_TYPE in results.errors[13] + assert errors.DURATION_LIMIT_WITHOUT_LIMIT_TYPE in results.errors[14] + assert errors.INVALID_DURATION_LIMIT in results.errors[15] + assert errors.DURATION_LIMIT_TYPE_WITHOUT_DURATION in results.errors[16] + + # check amounts + assert errors.CURRENCY_WITHOUT_AMOUNT in results.errors[17] + assert errors.AMOUNT_WITHOUT_CURRENCY in results.errors[18] + assert errors.AMOUNT_WITHOUT_FARE_TRANSFER_TYPE in results.errors[19] + assert errors.INVALID_FARE_TRANSFER_TYPE in results.errors[20] + assert errors.UNRECOGNIZED_CURRENCY_CODE in results.errors[21] + assert errors.FARE_TRANSFER_TYPE_WITHOUT_AMOUNT in results.errors[22] + + # check linked entities + assert errors.NONEXISTENT_FARE_PRODUCT_ID in results.errors[23] + assert errors.NONEXISTENT_RIDER_CATEGORY_ID in results.errors[24] + assert errors.NONEXISTENT_FARE_CONTAINER_ID in results.errors[25] + assert errors.CONFLICTING_RIDER_CATEGORY_ON_FARE_PRODUCT in results.errors[26] + assert errors.CONFLICTING_FARE_CONTAINER_ON_FARE_PRODUCT in results.errors[27] + assert errors.CONFLICTING_RIDER_CATEGORY_ON_FARE_CONTAINER in results.errors[28] + + assert len(results.errors) == 29 + +def test_required_fields(): + results = run_validator(test_data_dir / 'required_fields_test', False) + + assert errors.REQUIRED_FIELD_MISSING in results.errors[0] + assert 'areas.txt' in results.errors[0] + + assert errors.REQUIRED_FIELD_MISSING in results.errors[1] + assert 'calendar.txt' in results.errors[1] + + assert errors.REQUIRED_FIELD_MISSING in results.errors[2] + assert 'calendar_dates.txt' in results.errors[2] + + assert errors.REQUIRED_FIELD_MISSING in results.errors[3] + assert 'timeframes.txt' in results.errors[3] + + assert errors.REQUIRED_FIELD_MISSING in results.errors[4] + assert 'rider_categories.txt' in results.errors[4] + + assert errors.REQUIRED_FIELD_MISSING in results.errors[5] + assert 'fare_containers.txt' in results.errors[5] + + assert errors.REQUIRED_FIELD_MISSING in results.errors[6] + assert 'fare_products.txt' in results.errors[6] + + assert len(results.errors) == 7 diff --git a/fares_validator/tests/test_warnings.py b/fares_validator/tests/test_warnings.py new file mode 100644 index 0000000..902ae90 --- /dev/null +++ b/fares_validator/tests/test_warnings.py @@ -0,0 +1,47 @@ +from fares_validator.loader import run_validator +from fares_validator import warnings +from pathlib import Path + +test_data_dir = Path(__file__).parent / 'test_data' + +def test_warnings(): + results = run_validator(test_data_dir / 'warnings_test_gtfs', True) + + # Stops / stop times warnings + assert warnings.UNUSED_AREAS_IN_STOPS in results.warnings[0] + + # Rider categories warnings + assert warnings.MAX_AGE_LESS_THAN_MIN_AGE in results.warnings[1] + assert warnings.VERY_LARGE_MIN_AGE in results.warnings[2] + assert warnings.VERY_LARGE_MAX_AGE in results.warnings[3] + + # Fare products warnings + assert warnings.OFFSET_AMOUNT_WITHOUT_OFFSET_UNIT in results.warnings[4] + + # Fare leg rule warnings + assert warnings.UNUSED_AREA_IDS in results.warnings[5] + assert warnings.UNUSED_NETWORK_IDS in results.warnings[6] + + # Fare transfer rule warnings + assert warnings.UNUSED_LEG_GROUPS in results.warnings[7] + + # generic warnings + assert warnings.UNUSED_TIMEFRAME_IDS in results.warnings[8] + + assert len(results.warnings) == 9 + +def test_warnings_nonexistent_files(): + results = run_validator(test_data_dir / 'no_files', True) + + assert warnings.NO_AREAS in results.warnings[0] + assert warnings.NO_ROUTES in results.warnings[1] + assert warnings.NO_STOPS in results.warnings[2] + assert warnings.NO_SERVICE_IDS in results.warnings[3] + assert warnings.NO_TIMEFRAMES in results.warnings[4] + assert warnings.NO_RIDER_CATEGORIES in results.warnings[5] + assert warnings.NO_FARE_CONTAINERS in results.warnings[6] + assert warnings.NO_FARE_PRODUCTS in results.warnings[7] + assert warnings.NO_FARE_LEG_RULES in results.warnings[8] + assert warnings.NO_FARE_TRANSFER_RULES in results.warnings[9] + + assert len(results.warnings) == 10 diff --git a/fares_validator/utils.py b/fares_validator/utils.py new file mode 100644 index 0000000..b195aeb --- /dev/null +++ b/fares_validator/utils.py @@ -0,0 +1,166 @@ +import csv +from pathlib import Path + +from . import diagnostics +from .decimals_by_currency import decimals_by_currency +from .errors import * +from .warnings import * + + +class Schema: + FAKE_FIELDS = {'line_num_error_msg'} + + def __init__(self, basename, required_fields, defined_fields, *, + message_if_missing=None, + suppress_undefined_field_warning=False): + self.basename = basename + self.required_fields = required_fields + self.defined_fields = defined_fields + self.valid_fields = self.defined_fields | self.required_fields | Schema.FAKE_FIELDS + self.message_if_missing = message_if_missing + self.suppress_undefined_field_warning = suppress_undefined_field_warning + + def has_field(self, field_name): + return field_name in self.valid_fields + + +class Entity: + def __init__(self, schema, messages, original_dict): + self._schema = schema + self._messages = messages + self._data = original_dict + + def __getattr__(self, item): + if self._schema.has_field(item): + return self._data.get(item) + else: + raise TypeError(f'Reference to undefined field {item} in code!') + + def add_error(self, code, extra_info=''): + self._messages.add_error(diagnostics.format(code, self.line_num_error_msg, self._schema.basename, extra_info)) + + def add_warning(self, code, extra_info=''): + self._messages.add_warning(diagnostics.format(code, self.line_num_error_msg, self._schema.basename, extra_info)) + + +def read_csv_file(gtfs_root_dir, schema, messages): + path = gtfs_root_dir / schema.basename + + if not path.exists(): + if schema.message_if_missing: + messages.add_warning(diagnostics.format(schema.message_if_missing)) + return [] + + with open(path, 'r', encoding='utf-8-sig') as csvfile: + reader = csv.DictReader(csvfile, skipinitialspace=True) + + for required_field in schema.required_fields: + if required_field not in reader.fieldnames: + messages.add_error( + diagnostics.format(REQUIRED_FIELD_MISSING, '', schema.basename, f'field: {required_field}')) + return [] + + if schema.defined_fields and not schema.suppress_undefined_field_warning: + unexpected_fields = [] + for field in reader.fieldnames: + if field not in schema.defined_fields: + unexpected_fields.append(field) + if len(unexpected_fields): + messages.add_warning(diagnostics.format(UNEXPECTED_FIELDS, '', schema.basename, + f'\nColumn(s): {unexpected_fields}')) + + for line in reader: + line['line_num_error_msg'] = f'\nLine: {reader.line_num}' + entity = Entity(schema, messages, line) + yield entity + + +def check_fare_amount(line, fare_field, currency_field): + fare = getattr(line, fare_field) + currency = getattr(line, currency_field) + + if fare: + if not currency: + line.add_error(AMOUNT_WITHOUT_CURRENCY) + return True + if currency not in decimals_by_currency: + line.add_error(UNRECOGNIZED_CURRENCY_CODE) + return True + try: + float(fare) + if '.' in fare: + num_decimal_places = len(fare.split('.')[1]) + if num_decimal_places > decimals_by_currency[currency]: + line.add_error(TOO_MANY_AMOUNT_DECIMALS) + except Exception: + line.add_error(INVALID_AMOUNT_FORMAT) + return True + else: + return False + + +def check_amts(path, line, min_amt_exists, max_amt_exists, amt_exists): + filename = Path(path).name + if (min_amt_exists or max_amt_exists) and amt_exists: + line.add_error(AMOUNT_WITH_MIN_OR_MAX_AMOUNT) + if (min_amt_exists and not max_amt_exists) or (max_amt_exists and not min_amt_exists): + line.add_error(MISSING_MIN_OR_MAX_AMOUNT) + if (not amt_exists and not min_amt_exists and not max_amt_exists) and filename == 'fare_products.txt': + line.add_error(NO_AMOUNT_DEFINED) + + +def check_areas_of_file(path, stop_or_stop_time, areas, unused_areas, messages): + with open(path, 'r', encoding='utf-8-sig') as csvfile: + reader = csv.DictReader(csvfile, skipinitialspace=True) + + # Avoid parsing huge file if areas are not in use + if 'area_id' not in reader.fieldnames: + return + + for line in reader: + area_id = line.get('area_id') + + if not area_id: + continue + + if area_id not in areas: + messages.add_error( + diagnostics.format(NONEXISTENT_AREA_ID, f'\nLine: {reader.line_num}', stop_or_stop_time)) + continue + + if area_id in unused_areas: + unused_areas.remove(area_id) + + +def check_linked_id(line, fieldname, defined_ids): + if not getattr(line, fieldname): + return False + + if getattr(line, fieldname) not in defined_ids: + line.add_error(FOREIGN_ID_INVALID, extra_info=f'{fieldname}: {getattr(line, fieldname)}') + + return True + + +def check_linked_flr_ftr_entities(line, rider_categories, rider_category_by_fare_container, + linked_entities_by_fare_product): + if line.fare_product_id and line.fare_product_id not in linked_entities_by_fare_product: + line.add_error(NONEXISTENT_FARE_PRODUCT_ID) + if line.rider_category_id and line.rider_category_id not in rider_categories: + line.add_error(NONEXISTENT_RIDER_CATEGORY_ID) + if line.fare_container_id and line.fare_container_id not in rider_category_by_fare_container: + line.add_error(NONEXISTENT_FARE_CONTAINER_ID) + + if line.fare_product_id: + if line.rider_category_id: + fp_rider_cats = linked_entities_by_fare_product[line.fare_product_id].rider_category_ids + if len(fp_rider_cats) and (line.rider_category_id not in fp_rider_cats): + line.add_error(CONFLICTING_RIDER_CATEGORY_ON_FARE_PRODUCT) + if line.fare_container_id: + fp_fare_containers = linked_entities_by_fare_product[line.fare_product_id].fare_container_ids + if len(fp_fare_containers) and (line.fare_container_id not in fp_fare_containers): + line.add_error(CONFLICTING_FARE_CONTAINER_ON_FARE_PRODUCT) + if line.rider_category_id and line.fare_container_id: + fc_rider_cat = rider_category_by_fare_container[line.fare_container_id] + if fc_rider_cat and (fc_rider_cat != line.rider_category_id): + line.add_error(CONFLICTING_RIDER_CATEGORY_ON_FARE_CONTAINER) diff --git a/src/warnings.py b/fares_validator/warnings.py similarity index 87% rename from src/warnings.py rename to fares_validator/warnings.py index 0be0a4c..ee20969 100644 --- a/src/warnings.py +++ b/fares_validator/warnings.py @@ -39,14 +39,3 @@ # fare_transfer_rules.txt NO_FARE_TRANSFER_RULES = 'No fare_transfer_rules.txt was found, will assume no fare_transfer_rules exist.' UNUSED_LEG_GROUPS = 'Leg groups defined in fare_leg_rules.txt are unused in fare_transfer_rules.txt.' - -def add_warning(warning, line_num_error_msg, warnings, path='', extra_info=''): - warning_msg = '' - if path: - warning_msg += path + ': ' - warning_msg += warning - if extra_info: - warning_msg += '\n' + extra_info - warning_msg += line_num_error_msg - - warnings.append(warning_msg) \ No newline at end of file diff --git a/run_validator.py b/run_validator.py deleted file mode 100644 index bb7cb28..0000000 --- a/run_validator.py +++ /dev/null @@ -1,38 +0,0 @@ -from src import read_gtfs_entities as read_gtfs, read_fares_entities as read_fares -from src import warnings as warn - -def run_validator(gtfs_root_dir, should_read_stop_times): - errors = [] - warnings = [] - - dependent_entities = {} - - dependent_entities['areas'] = read_fares.areas(gtfs_root_dir, errors, warnings) - - dependent_entities['networks'] = read_gtfs.networks(gtfs_root_dir, warnings) - - read_gtfs.stop_areas(gtfs_root_dir, dependent_entities['areas'], errors, warnings, should_read_stop_times) - - dependent_entities['service_ids'] = read_gtfs.service_ids(gtfs_root_dir, errors, warnings) - - dependent_entities['timeframe_ids'] = read_fares.timeframes(gtfs_root_dir, errors, warnings) - unused_timeframes = dependent_entities['timeframe_ids'].copy() - - dependent_entities['rider_category_ids'] = read_fares.rider_categories(gtfs_root_dir, errors, warnings) - - dependent_entities['rider_category_by_fare_container'] = read_fares.fare_containers(gtfs_root_dir, dependent_entities['rider_category_ids'], errors, warnings) - - dependent_entities['linked_entities_by_fare_product'] = read_fares.fare_products(gtfs_root_dir, dependent_entities, unused_timeframes, errors, warnings) - - dependent_entities['leg_group_ids'] = read_fares.fare_leg_rules(gtfs_root_dir, dependent_entities, unused_timeframes, errors, warnings) - - read_fares.fare_transfer_rules(gtfs_root_dir, dependent_entities, errors, warnings) - - if len(unused_timeframes) > 0: - warning_info = 'Unused timeframes: ' + str(unused_timeframes) - warn.add_warning(warn.UNUSED_TIMEFRAME_IDS, '', warnings, '', warning_info) - - return { - 'errors': errors, - 'warnings': warnings - } \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..508772d --- /dev/null +++ b/setup.py @@ -0,0 +1,13 @@ +from setuptools import setup + +setup(name='gtfs-fares-v2-validator', + version='0.1.0', + description='Validate transit feeds for conformance to the GTFS Fares v2 specification', + url='https://github.com/TransitApp/gtfs-fares-v2-validator', + author='Jeremy Steele', + packages=['fares_validator'], + classifiers=[ + 'License :: OSI Approved :: MIT License' + ], + zip_safe=False, + install_requires=[]) diff --git a/src/fare_leg_rule_checkers.py b/src/fare_leg_rule_checkers.py deleted file mode 100644 index 7a681c8..0000000 --- a/src/fare_leg_rule_checkers.py +++ /dev/null @@ -1,58 +0,0 @@ -from src.errors import add_error -from .utils import check_linked_id -from .errors import * - -def check_areas(path, line, line_num_error_msg, areas, unused_areas, errors): - is_symmetrical = line.get('is_symmetrical') - if is_symmetrical and (not is_symmetrical in ['0', '1']): - add_error(INVALID_IS_SYMMETRICAL_LEG_RULES, line_num_error_msg, errors) - - from_area = line.get('from_area_id') - to_area = line.get('to_area_id') - contains_area = line.get('contains_area_id') - - if contains_area and (not from_area and not to_area): - add_error(CONTAINS_AREA_WITHOUT_FROM_TO_AREA, line_num_error_msg, errors) - - if (from_area or to_area) and not is_symmetrical: - add_error(AREA_WITHOUT_IS_SYMMETRICAL, line_num_error_msg, errors) - - if (not from_area and not to_area) and is_symmetrical: - add_error(IS_SYMMETRICAL_WITHOUT_FROM_TO_AREA, line_num_error_msg, errors) - - if from_area and from_area in unused_areas: - unused_areas.remove(from_area) - if to_area and to_area in unused_areas: - unused_areas.remove(from_area) - - check_linked_id(path, line, 'from_area_id', areas, line_num_error_msg, errors) - check_linked_id(path, line, 'to_area_id', areas, line_num_error_msg, errors) - check_linked_id(path, line, 'contains_area_id', areas, line_num_error_msg, errors) - -def check_distances(line, line_num_error_msg, errors): - min_distance = line.get('min_distance') - max_distance = line.get('max_distance') - distance_type = line.get('distance_type') - - if distance_type and (not distance_type in ['0', '1']): - add_error(INVALID_DISTANCE_TYPE, line_num_error_msg, errors) - - if min_distance: - try: - dist = float(min_distance) - if (dist < 0): - add_error(NEGATIVE_MIN_DISTANCE, line_num_error_msg, errors) - except ValueError: - add_error(INVALID_MIN_DISTANCE, line_num_error_msg, errors) - if max_distance: - try: - dist = float(max_distance) - if (dist < 0): - add_error(NEGATIVE_MAX_DISTANCE, line_num_error_msg, errors) - except ValueError: - add_error(INVALID_MAX_DISTANCE, line_num_error_msg, errors) - - if (min_distance or max_distance) and not distance_type: - add_error(DISTANCE_WITHOUT_DISTANCE_TYPE, line_num_error_msg, errors) - if (not min_distance and not max_distance) and distance_type: - add_error(DISTANCE_TYPE_WITHOUT_DISTANCE, line_num_error_msg, errors) \ No newline at end of file diff --git a/src/fare_product_checkers.py b/src/fare_product_checkers.py deleted file mode 100644 index 5827ab9..0000000 --- a/src/fare_product_checkers.py +++ /dev/null @@ -1,94 +0,0 @@ -from .errors import * -from .warnings import * - -def check_linked_fp_entities(line, line_num_error_msg, rider_categories, rider_category_by_fare_container, linked_entities_by_fare_product, errors): - fare_product_id = line.get('fare_product_id') - rider_category_id = line.get('rider_category_id') - fare_container_id = line.get('fare_container_id') - linked_entities = linked_entities_by_fare_product.get(fare_product_id) - if not linked_entities: - linked_entities = { - 'rider_category_ids': [], - 'fare_container_ids': [], - } - - if rider_category_id: - linked_entities['rider_category_ids'].append(rider_category_id) - if (not rider_category_id in rider_categories): - add_error(NONEXISTENT_RIDER_CATEGORY_ID, line_num_error_msg, errors) - - if fare_container_id: - linked_entities['fare_container_ids'].append(fare_container_id) - if not fare_container_id in rider_category_by_fare_container: - add_error(NONEXISTENT_FARE_CONTAINER_ID, line_num_error_msg, errors, 'fare_products.txt') - - fare_container_rider_cat = rider_category_by_fare_container.get(fare_container_id) - if rider_category_id and fare_container_rider_cat and (rider_category_id != fare_container_rider_cat): - add_error(CONFLICTING_RIDER_CATEGORY_ON_FARE_CONTAINER, line_num_error_msg, errors, 'fare_products.txt') - - linked_entities_by_fare_product[fare_product_id] = linked_entities - -def check_bundle(line, line_num_error_msg, errors): - if line.get('bundle_amount'): - try: - bundle_amt = int(line.get('bundle_amount')) - if bundle_amt < 0: - add_error(INVALID_BUNDLE_AMOUNT, line_num_error_msg, errors) - except ValueError: - add_error(INVALID_BUNDLE_AMOUNT, line_num_error_msg, errors) - -def check_durations_and_offsets(line, line_num_error_msg, errors, warnings): - duration_start = line.get('duration_start') - if duration_start and (duration_start not in ['0', '1']): - add_error(INVALID_DURATION_START, line_num_error_msg, errors) - - duration_unit = line.get('duration_unit') - if duration_unit and (not duration_unit in ['0', '1', '2', '3', '4', '5', '6']): - add_error(INVALID_DURATION_UNIT, line_num_error_msg, errors) - - duration_type = line.get('duration_type') - if duration_type and (not duration_type in ['1', '2']): - add_error(INVALID_DURATION_TYPE, line_num_error_msg, errors) - - if duration_type == '1' and duration_start: - add_error(DURATION_START_WITH_DURATION_TYPE, line_num_error_msg, errors) - - duration_amount = line.get('duration_amount') - if duration_amount: - try: - amt = int(duration_amount) - if amt < 1: - add_error(NEGATIVE_OR_ZERO_DURATION, line_num_error_msg, errors) - except ValueError: - add_error(NON_INT_DURATION_AMOUNT, line_num_error_msg, errors) - - if not duration_unit: - add_error(DURATION_WITHOUT_UNIT, line_num_error_msg, errors) - - if not duration_type: - add_error(DURATION_WITHOUT_TYPE, line_num_error_msg, errors) - else: - if duration_type: - add_error(DURATION_TYPE_WITHOUT_AMOUNT, line_num_error_msg, errors) - if duration_unit: - add_error(DURATION_UNIT_WITHOUT_AMOUNT, line_num_error_msg, errors) - - offset_unit = line.get('offset_unit') - if offset_unit and (not offset_unit in ['0', '1', '2', '3', '4', '5', '6']): - add_error(INVALID_OFFSET_UNIT, line_num_error_msg, errors) - - offset_amt = line.get('offset_amount') - if offset_amt: - try: - amt = int(offset_amt) - except ValueError: - add_error(NON_INT_OFFSET_AMOUNT, line_num_error_msg, errors) - - if duration_type == '2': - add_error(OFFSET_AMOUNT_WITH_DURATION_TYPE, line_num_error_msg, errors) - - if not offset_unit: - add_warning(OFFSET_AMOUNT_WITHOUT_OFFSET_UNIT, line_num_error_msg, warnings) - else: - if offset_unit: - add_error(OFFSET_UNIT_WITHOUT_AMOUNT, line_num_error_msg, errors) diff --git a/src/fare_transfer_rule_checkers.py b/src/fare_transfer_rule_checkers.py deleted file mode 100644 index 51b3fc5..0000000 --- a/src/fare_transfer_rule_checkers.py +++ /dev/null @@ -1,73 +0,0 @@ -from .errors import * - -def check_leg_groups(line, line_num_error_msg, leg_group_ids, unused_leg_groups, errors): - from_leg_group = line.get('from_leg_group_id') - to_leg_group = line.get('to_leg_group_id') - is_symmetrical = line.get('is_symmetrical') - - if is_symmetrical and (not is_symmetrical in ['0', '1']): - add_error(INVALID_IS_SYMMETRICAL_TRANSFER_RULES, line_num_error_msg, errors) - if (from_leg_group or to_leg_group) and not is_symmetrical: - add_error(LEG_GROUP_WITHOUT_IS_SYMMETRICAL, line_num_error_msg, errors) - if (not from_leg_group and not to_leg_group) and is_symmetrical: - add_error(IS_SYMMETRICAL_WITHOUT_FROM_TO_LEG_GROUP, line_num_error_msg, errors) - if from_leg_group and not from_leg_group in leg_group_ids: - add_error(INVALID_FROM_LEG_GROUP, line_num_error_msg, errors) - if to_leg_group and not to_leg_group in leg_group_ids: - add_error(INVALID_TO_LEG_GROUP, line_num_error_msg, errors) - - if from_leg_group in unused_leg_groups: - unused_leg_groups.remove(from_leg_group) - if to_leg_group in unused_leg_groups: - unused_leg_groups.remove(to_leg_group) - -def check_spans_and_transfer_ids(line, line_num_error_msg, errors): - spanning_limit = line.get('spanning_limit') - transfer_id = line.get('transfer_id') - transfer_seq = line.get('transfer_sequence') - - if spanning_limit: - if not (line.get('from_leg_group_id') == line.get('to_leg_group_id')): - add_error(SPANNING_LIMIT_WITH_BAD_LEGS, line_num_error_msg, errors) - if transfer_id: - add_error(SPANNING_LIMIT_WITH_TRANSFER_ID, line_num_error_msg, errors) - try: - limit = int(spanning_limit) - if limit < 0 or limit == 1: - add_error(INVALID_SPANNING_LIMIT, line_num_error_msg, errors) - except ValueError: - add_error(INVALID_SPANNING_LIMIT, line_num_error_msg, errors) - - if transfer_id: - if not transfer_seq: - add_error(TRANSFER_ID_WITHOUT_TRANSFER_SEQUENCE, line_num_error_msg, errors) - - if transfer_seq: - if not transfer_id: - add_error(TRANSFER_SEQUENCE_WITHOUT_TRANSFER_ID, line_num_error_msg, errors) - try: - seq = int(transfer_seq) - if seq < 1: - add_error(INVALID_TRANSFER_SEQUENCE, line_num_error_msg, errors) - except ValueError: - add_error(INVALID_TRANSFER_SEQUENCE, line_num_error_msg, errors) - -def check_durations(line, line_num_error_msg, errors): - duration_limit = line.get('duration_limit') - limit_type = line.get('duration_limit_type') - - if limit_type and (not limit_type in ['0', '1', '2', '3']): - add_error(INVALID_DURATION_LIMIT_TYPE, line_num_error_msg, errors) - - if duration_limit: - if not limit_type: - add_error(DURATION_LIMIT_WITHOUT_LIMIT_TYPE, line_num_error_msg, errors) - try: - limit = int(duration_limit) - if limit < 1: - add_error(INVALID_DURATION_LIMIT, line_num_error_msg, errors) - except ValueError: - add_error(INVALID_DURATION_LIMIT, line_num_error_msg, errors) - else: - if limit_type: - add_error(DURATION_LIMIT_TYPE_WITHOUT_DURATION, line_num_error_msg, errors) \ No newline at end of file diff --git a/src/read_fares_entities.py b/src/read_fares_entities.py deleted file mode 100644 index 1d11e42..0000000 --- a/src/read_fares_entities.py +++ /dev/null @@ -1,344 +0,0 @@ -# Reads files introduced as part of the GTFS fares-v2 specification - -import re -from os import path - -from .utils import check_fare_amount, read_csv_file, check_linked_id, check_amts, check_linked_flr_ftr_entities -from .fare_product_checkers import check_linked_fp_entities, check_bundle, check_durations_and_offsets -from .fare_leg_rule_checkers import check_areas, check_distances -from .fare_transfer_rule_checkers import check_leg_groups, check_spans_and_transfer_ids, check_durations -from .errors import * -from .warnings import * -from .expected_fields import * - -def areas(gtfs_root_dir, errors, warnings): - greater_area_id_by_area_id = {} - def for_each_area(line, line_num_error_msg): - area_id = line.get('area_id') - greater_area_id = line.get('greater_area_id') - - if area_id in greater_area_id_by_area_id: - add_error(DUPLICATE_AREA_ID, line_num_error_msg, errors) - return - - if not area_id: - add_error(EMPTY_AREA_ID, line_num_error_msg, errors) - return - - greater_area_id_by_area_id[area_id] = greater_area_id - - areas_path = path.join(gtfs_root_dir, 'areas.txt') - - if not path.isfile(areas_path): - add_warning(NO_AREAS, '', warnings) - return [] - - read_csv_file(areas_path, ['area_id'], EXPECTED_AREAS_FIELDS, errors, warnings, for_each_area) - - for area_id in greater_area_id_by_area_id: - greater_area_id = greater_area_id_by_area_id[area_id] - - while greater_area_id: - if (greater_area_id == area_id): - error_info = 'area_id: ' + area_id - add_error(GREATER_AREA_ID_LOOP, '', errors, '', error_info) - break - - if not greater_area_id in greater_area_id_by_area_id: - error_info = 'greater_area_id: ' + greater_area_id - add_error(UNDEFINED_GREATER_AREA_ID, '', errors, '', error_info) - break - - greater_area_id = greater_area_id_by_area_id[greater_area_id] - - return list(greater_area_id_by_area_id.keys()) - -def timeframes(gtfs_root_dir, errors, warnings): - timeframes = [] - def for_each_timeframe(line, line_num_error_msg): - timeframe_id = line.get('timeframe_id') - start_time = line.get('start_time') - end_time = line.get('end_time') - - if not timeframe_id: - add_error(EMPTY_TIMEFRAME_ID, line_num_error_msg, errors) - return - if not start_time: - add_error(EMPTY_START_TIME, line_num_error_msg, errors) - return - if not end_time: - add_error(EMPTY_END_TIME, line_num_error_msg, errors) - return - - starttimematch = re.search(r'^\d?\d:\d\d:\d\d$', start_time) - endtimematch = re.search(r'^\d?\d:\d\d:\d\d$', end_time) - - if not starttimematch or not endtimematch: - add_error(INVALID_TIME_FORMAT, line_num_error_msg, errors) - timeframes.append(timeframe_id) - return - - starttime_split = start_time.split(':') - endtime_split = end_time.split(':') - - if int(starttime_split[0]) > 23 or int(endtime_split[0]) > 23: - add_error(INVALID_TIME_FORMAT, line_num_error_msg, errors) - - if int(starttime_split[1]) > 59 or int(endtime_split[1]) > 59: - add_error(INVALID_TIME_FORMAT, line_num_error_msg, errors) - - if int(starttime_split[2]) > 59 or int(endtime_split[2]) > 59: - add_error(INVALID_TIME_FORMAT, line_num_error_msg, errors) - - if timeframe_id in timeframes: - pass - else: - timeframes.append(timeframe_id) - - timeframes_path = path.join(gtfs_root_dir, 'timeframes.txt') - - if not path.isfile(timeframes_path): - add_warning(NO_TIMEFRAMES, '', warnings) - return timeframes - - read_csv_file(timeframes_path, ['timeframe_id', 'start_time', 'end_time'], EXPECTED_TIMEFRAMES_FIELDS, errors, warnings, for_each_timeframe) - - return timeframes - -def rider_categories(gtfs_root_dir, errors, warnings): - rider_categories = [] - def for_each_rider_category(line, line_num_error_msg): - rider_category = line.get('rider_category_id') - min_age = line.get('min_age') - min_age_int = 0 - max_age = line.get('max_age') - - if not rider_category: - add_error(EMPTY_RIDER_CATEGORY_ID, line_num_error_msg, errors) - return - - if not rider_category in rider_categories: - rider_categories.append(rider_category) - - if min_age: - try: - min_age_int = int(min_age) - if min_age_int < 0: - add_error(NEGATIVE_MIN_AGE, line_num_error_msg, errors) - if min_age_int > 100: - add_warning(VERY_LARGE_MIN_AGE, line_num_error_msg, warnings) - except ValueError: - add_error(NON_INT_MIN_AGE, line_num_error_msg, errors) - if max_age: - try: - max_age_int = int(max_age) - if max_age_int < 0: - add_error(NEGATIVE_MAX_AGE, line_num_error_msg, errors) - if max_age_int > 100: - add_warning(VERY_LARGE_MAX_AGE, line_num_error_msg, warnings) - if max_age_int <= min_age_int: - add_warning(MAX_AGE_LESS_THAN_MIN_AGE, line_num_error_msg, warnings) - except ValueError: - add_error(NON_INT_MAX_AGE, line_num_error_msg, errors) - - rider_categories_path = path.join(gtfs_root_dir, 'rider_categories.txt') - - if not path.isfile(rider_categories_path): - add_warning(NO_RIDER_CATEGORIES, '', warnings) - return rider_categories - - read_csv_file(rider_categories_path, ['rider_category_id'], EXPECTED_RIDER_CATEGORIES_FIELDS, errors, warnings, for_each_rider_category) - - return rider_categories - -def fare_containers(gtfs_root_dir, rider_categories, errors, warnings): - rider_category_by_fare_container = {} - fare_containers_path = path.join(gtfs_root_dir, 'fare_containers.txt') - - def for_each_fare_container(line, line_num_error_msg): - fare_container_id = line.get('fare_container_id') - fare_container_name = line.get('fare_container_name') - rider_category_id = line.get('rider_category_id') - - if not fare_container_id: - add_error(EMPTY_FARE_CONTAINER_ID, line_num_error_msg, errors) - return - - if not fare_container_name: - add_error(EMPTY_FARE_CONTAINER_NAME, line_num_error_msg, errors) - return - - amount_exists = check_fare_amount(fare_containers_path, line, line_num_error_msg, 'amount', 'currency', errors) - min_purchase_exists = check_fare_amount(fare_containers_path, line, line_num_error_msg, 'minimum_initial_purchase', 'currency', errors) - if (not amount_exists and not min_purchase_exists) and line.get('currency'): - add_error(CURRENCY_WITHOUT_AMOUNT, line_num_error_msg, errors, 'fare_containers.txt') - - if fare_container_id in rider_category_by_fare_container: - add_error(DUPLICATE_FARE_CONTAINER_ID, line_num_error_msg, errors) - return - - if rider_category_id: - if not rider_category_id in rider_categories: - add_error(NONEXISTENT_RIDER_CATEGORY_ID, line_num_error_msg, errors, 'fare_containers.txt') - - rider_category_by_fare_container[fare_container_id] = rider_category_id - - if not path.isfile(fare_containers_path): - add_warning(NO_FARE_CONTAINERS, '', warnings) - return rider_category_by_fare_container - - read_csv_file(fare_containers_path, ['fare_container_id', 'fare_container_name'], EXPECTED_FARE_CONTAINERS_FIELDS, errors, warnings, for_each_fare_container) - - return rider_category_by_fare_container - -def fare_products(gtfs_root_dir, dependent_entities, unused_timeframes, errors, warnings): - linked_entities_by_fare_product = {} - - service_ids = dependent_entities['service_ids'] - timeframe_ids = dependent_entities['timeframe_ids'] - rider_categories = dependent_entities['rider_category_ids'] - rider_category_by_fare_container = dependent_entities['rider_category_by_fare_container'] - - fare_products_path = path.join(gtfs_root_dir, 'fare_products.txt') - - def for_each_fare_product(line, line_num_error_msg): - if not line.get('fare_product_id'): - add_error(EMPTY_FARE_PRODUCT_ID, line_num_error_msg, errors) - return - if not line.get('fare_product_name'): - add_error(EMPTY_FARE_PRODUCT_NAME, line_num_error_msg, errors) - return - - check_linked_fp_entities(line, line_num_error_msg, rider_categories, rider_category_by_fare_container, linked_entities_by_fare_product, errors) - - min_amt_exists = check_fare_amount(fare_products_path, line, line_num_error_msg, 'min_amount', 'currency', errors) - max_amt_exists = check_fare_amount(fare_products_path, line, line_num_error_msg, 'max_amount', 'currency', errors) - amt_exists = check_fare_amount(fare_products_path, line, line_num_error_msg, 'amount', 'currency', errors) - if (not min_amt_exists and not max_amt_exists and not amt_exists) and line.get('currency'): - add_error(CURRENCY_WITHOUT_AMOUNT, line_num_error_msg, errors, 'fare_products.txt') - check_amts(fare_products_path, line_num_error_msg, min_amt_exists, max_amt_exists, amt_exists, errors) - - check_bundle(line, line_num_error_msg, errors) - check_linked_id(path, line, 'service_id', service_ids, line_num_error_msg, errors) - timeframe_exists = check_linked_id(path, line, 'timeframe_id', timeframe_ids, line_num_error_msg, errors) - if line.get('timeframe_id') in unused_timeframes: - unused_timeframes.remove(line.get('timeframe_id')) - if timeframe_exists: - if not line.get('timeframe_type') in ['0', '1']: - add_error(INVALID_TIMEFRAME_TYPE, line_num_error_msg, errors) - else: - if line.get('timeframe_type'): - add_error(TIMEFRAME_TYPE_WITHOUT_TIMEFRAME, line_num_error_msg, errors) - - check_durations_and_offsets(line, line_num_error_msg, errors, warnings) - - if not path.isfile(fare_products_path): - add_warning(NO_FARE_PRODUCTS, '', warnings) - return linked_entities_by_fare_product - - read_csv_file(fare_products_path, ['fare_product_id', 'fare_product_name'], EXPECTED_FARE_PRODUCTS_FIELDS, errors, warnings, for_each_fare_product) - - return linked_entities_by_fare_product - -def fare_leg_rules(gtfs_root_dir, dependent_entities, unused_timeframes, errors, warnings): - leg_group_ids = [] - - areas = dependent_entities['areas'] - unused_areas = areas.copy() - networks = dependent_entities['networks'] - unused_networks = networks.copy() - service_ids = dependent_entities['service_ids'] - timeframe_ids = dependent_entities['timeframe_ids'] - rider_categories = dependent_entities['rider_category_ids'] - rider_category_by_fare_container = dependent_entities['rider_category_by_fare_container'] - linked_entities_by_fare_product = dependent_entities['linked_entities_by_fare_product'] - - fare_leg_rules_path = path.join(gtfs_root_dir, 'fare_leg_rules.txt') - - def for_each_fare_leg_rule(line, line_num_error_msg): - if line.get('leg_group_id') and not line.get('leg_group_id') in leg_group_ids: - leg_group_ids.append(line.get('leg_group_id')) - - check_areas(fare_leg_rules_path, line, line_num_error_msg, areas, unused_areas, errors) - - check_linked_id(fare_leg_rules_path, line, 'network_id', networks, line_num_error_msg, errors) - if line.get('network_id') in unused_networks: - unused_networks.remove(line.get('network_id')) - - check_linked_id(fare_leg_rules_path, line, 'from_timeframe_id', timeframe_ids, line_num_error_msg, errors) - if line.get('from_timeframe_id') in unused_timeframes: - unused_timeframes.remove(line.get('from_timeframe_id')) - check_linked_id(fare_leg_rules_path, line, 'to_timeframe_id', timeframe_ids, line_num_error_msg, errors) - if line.get('to_timeframe_id') in unused_timeframes: - unused_timeframes.remove(line.get('to_timeframe_id')) - - check_linked_id(fare_leg_rules_path, line, 'service_id', service_ids, line_num_error_msg, errors) - - check_distances(line, line_num_error_msg, errors) - - min_amt_exists = check_fare_amount(fare_leg_rules_path, line, line_num_error_msg, 'min_amount', 'currency', errors) - max_amt_exists = check_fare_amount(fare_leg_rules_path, line, line_num_error_msg, 'max_amount', 'currency', errors) - amt_exists = check_fare_amount(fare_leg_rules_path, line, line_num_error_msg, 'amount', 'currency', errors) - if (not min_amt_exists and not max_amt_exists and not amt_exists) and line.get('currency'): - add_error(CURRENCY_WITHOUT_AMOUNT, line_num_error_msg, errors, 'fare_leg_rules.txt') - check_amts(fare_leg_rules_path, line_num_error_msg, min_amt_exists, max_amt_exists, amt_exists, errors) - if (min_amt_exists or max_amt_exists or amt_exists) and line.get('fare_product_id'): - add_error(AMOUNT_WITH_FARE_PRODUCT, line_num_error_msg, errors) - - if line.get('fare_leg_name') and line.get('fare_product_id'): - add_error(FARE_LEG_NAME_WITH_FARE_PRODUCT, line_num_error_msg, errors) - - check_linked_flr_ftr_entities(fare_leg_rules_path, line, line_num_error_msg, rider_categories, rider_category_by_fare_container, linked_entities_by_fare_product, errors) - - if path.isfile(fare_leg_rules_path): - read_csv_file(fare_leg_rules_path, [], EXPECTED_FARE_LEG_RULES_FIELDS, errors, warnings, for_each_fare_leg_rule) - else: - add_warning(NO_FARE_LEG_RULES, '', warnings) - - if len(unused_areas) > 0: - warning_info = 'Unused areas: ' + str(unused_areas) - add_warning(UNUSED_AREA_IDS, '', warnings, '', warning_info) - if len(unused_networks) > 0: - warning_info = 'Unused networks: ' + str(unused_networks) - add_warning(UNUSED_NETWORK_IDS, '', warnings, '', warning_info) - - return leg_group_ids - -def fare_transfer_rules(gtfs_root_dir, dependent_entities, errors, warnings): - leg_group_ids = dependent_entities['leg_group_ids'] - unused_leg_groups = leg_group_ids.copy() - rider_categories = dependent_entities['rider_category_ids'] - rider_category_by_fare_container = dependent_entities['rider_category_by_fare_container'] - linked_entities_by_fare_product = dependent_entities['linked_entities_by_fare_product'] - - fare_transfer_rules_path = path.join(gtfs_root_dir, 'fare_transfer_rules.txt') - - def for_each_fare_transfer_rule(line, line_num_error_msg): - check_leg_groups(line, line_num_error_msg, leg_group_ids, unused_leg_groups, errors) - check_spans_and_transfer_ids(line, line_num_error_msg, errors) - check_durations(line, line_num_error_msg, errors) - - min_amt_exists = check_fare_amount(fare_transfer_rules_path, line, line_num_error_msg, 'min_amount', 'currency', errors) - max_amt_exists = check_fare_amount(fare_transfer_rules_path, line, line_num_error_msg, 'max_amount', 'currency', errors) - amt_exists = check_fare_amount(fare_transfer_rules_path, line, line_num_error_msg, 'amount', 'currency', errors) - if (not min_amt_exists and not max_amt_exists and not amt_exists) and line.get('currency'): - add_error(CURRENCY_WITHOUT_AMOUNT, line_num_error_msg, errors, 'fare_transfer_rules.txt') - check_amts(fare_transfer_rules_path, line_num_error_msg, min_amt_exists, max_amt_exists, amt_exists, errors) - - if (min_amt_exists or max_amt_exists or amt_exists) and not line.get('fare_transfer_type'): - add_error(AMOUNT_WITHOUT_FARE_TRANSFER_TYPE, line_num_error_msg, errors) - if (not min_amt_exists and not max_amt_exists and not amt_exists) and line.get('fare_transfer_type'): - add_error(FARE_TRANSFER_TYPE_WITHOUT_AMOUNT, line_num_error_msg, errors) - if line.get('fare_transfer_type') and (line.get('fare_transfer_type') not in ['0', '1', '2', '3']): - add_error(INVALID_FARE_TRANSFER_TYPE, line_num_error_msg, errors) - - check_linked_flr_ftr_entities(fare_transfer_rules_path, line, line_num_error_msg, rider_categories, rider_category_by_fare_container, linked_entities_by_fare_product, errors) - - if path.isfile(fare_transfer_rules_path): - read_csv_file(fare_transfer_rules_path, [], EXPECTED_FARE_TRANSFER_RULES_FIELDS, errors, warnings, for_each_fare_transfer_rule) - else: - add_warning(NO_FARE_TRANSFER_RULES, '', warnings) - - if len(unused_leg_groups) > 0: - warning_info = 'Unused leg groups: ' + str(unused_leg_groups) - add_warning(UNUSED_LEG_GROUPS, '', warnings, '', warning_info) \ No newline at end of file diff --git a/src/read_gtfs_entities.py b/src/read_gtfs_entities.py deleted file mode 100644 index 39624d4..0000000 --- a/src/read_gtfs_entities.py +++ /dev/null @@ -1,96 +0,0 @@ -# Reads files introduced as part of the original GTFS specification - -import csv -from os import path, write -from .utils import read_csv_file, check_areas_of_file -from .errors import * -from .warnings import * - -def networks(gtfs_root_dir, warnings): - routes_path = path.join(gtfs_root_dir, 'routes.txt') - - if not path.isfile(routes_path): - add_warning(NO_ROUTES, '', warnings) - return [] - - networks = [] - - with open(routes_path, 'r') as csvfile: - reader = csv.DictReader(csvfile) - - if not 'network_id' in reader.fieldnames: - return networks - - for line in reader: - network_id = line.get('network_id') - - if network_id and (not network_id in networks): - networks.append(network_id) - - return networks - -def stop_areas(gtfs_root_dir, areas, errors, warnings, should_read_stop_times): - stops_path = path.join(gtfs_root_dir, 'stops.txt') - stop_times_path = path.join(gtfs_root_dir, 'stop_times.txt') - - stops_exists = path.isfile(stops_path) - stop_times_exists = False - if should_read_stop_times: - stop_times_exists = path.isfile(stop_times_path) - - if not stops_exists: - add_warning(NO_STOPS, '', warnings) - - unused_areas = areas.copy() - - if stops_exists: - check_areas_of_file(stops_path, 'stop', areas, unused_areas, errors) - if stop_times_exists: - check_areas_of_file(stop_times_path, 'stop_time', areas, unused_areas, errors) - - if len(unused_areas) > 0: - warning_info = 'Unused areas: ' + str(unused_areas) - add_warning(UNUSED_AREAS_IN_STOPS, '', warnings, '', warning_info) - -def service_ids(gtfs_root_dir, errors, warnings): - service_ids = [] - def for_each_calendar(line, line_num_error_msg): - service_id = line.get('service_id') - - if not service_id: - add_error(EMPTY_SERVICE_ID_CALENDAR, line_num_error_msg, errors) - return - - if service_id in service_ids: - error_info = 'service_id: ' + service_id - add_error(DUPLICATE_SERVICE_ID, line_num_error_msg, errors, '', error_info) - - service_ids.append(service_id) - - def for_each_calendar_date(line, line_num_error_msg): - service_id = line.get('service_id') - - if service_id == '': - add_error(EMPTY_SERVICE_ID_CALENDAR_DATES, line_num_error_msg, errors) - return - - if not service_id in service_ids: - service_ids.append(service_id) - - calendar_path = path.join(gtfs_root_dir, 'calendar.txt') - calendar_dates_path = path.join(gtfs_root_dir, 'calendar_dates.txt') - - calendar_exists = path.isfile(calendar_path) - calendar_dates_exists = path.isfile(calendar_dates_path) - - if not calendar_exists and not calendar_dates_exists: - add_warning(NO_SERVICE_IDS, '', warnings) - return service_ids - - if calendar_exists: - read_csv_file(calendar_path, ['service_id'], [], errors, warnings, for_each_calendar) - - if calendar_dates_exists: - read_csv_file(calendar_dates_path, ['service_id'], [], errors, warnings, for_each_calendar_date) - - return service_ids \ No newline at end of file diff --git a/src/utils.py b/src/utils.py deleted file mode 100644 index 8b107e7..0000000 --- a/src/utils.py +++ /dev/null @@ -1,126 +0,0 @@ -import csv -from src.errors import add_error -from .decimals_by_currency import decimals_by_currency -from .errors import * -from .warnings import * - -def get_filename_of_path(path): - path_split = path.split('/') - file = path_split[len(path_split) - 1] - return file - -def read_csv_file(path, required_fields, expected_fields, errors, warnings, func): - filename = get_filename_of_path(path) - with open(path, 'r') as csvfile: - reader = csv.DictReader(csvfile) - - for required_field in required_fields: - if not required_field in reader.fieldnames: - extra_info = 'field: ' + required_field - add_error(REQUIRED_FIELD_MISSING, '', errors, filename, extra_info) - return False - - if len(expected_fields) > 0: - unexpected_fields = [] - for field in reader.fieldnames: - if not field in expected_fields: - unexpected_fields.append(field) - if len(unexpected_fields) > 0: - extra_info = '\nColumn(s): ' + str(unexpected_fields) - add_warning(UNEXPECTED_FIELDS, '', warnings, filename, extra_info) - - for line in reader: - line_num_error_msg = '\nLine: ' + str(reader.line_num) - func(line, line_num_error_msg) - -def check_fare_amount(path, line, line_num_error_msg, fare_field, currency_field, errors): - filename = get_filename_of_path(path) - fare, currency = '', '' - - fare = line.get(fare_field) - currency = line.get(currency_field) - - if fare: - if not currency: - add_error(AMOUNT_WITHOUT_CURRENCY, line_num_error_msg, errors, filename) - return True - if not currency in decimals_by_currency: - add_error(UNRECOGNIZED_CURRENCY_CODE, line_num_error_msg, errors, filename) - return True - try: - float(fare) - if '.' in fare: - num_decimal_points = len(fare.split('.')[1]) - if num_decimal_points > decimals_by_currency[currency]: - add_error(TOO_MANY_AMOUNT_DECIMALS, line_num_error_msg, errors, filename) - except Exception: - add_error(INVALID_AMOUNT_FORMAT, line_num_error_msg, errors, filename) - return True - else: - return False - -def check_amts(path, line_num_error_msg, min_amt_exists, max_amt_exists, amt_exists, errors): - filename = get_filename_of_path(path) - if (min_amt_exists or max_amt_exists) and amt_exists: - add_error(AMOUNT_WITH_MIN_OR_MAX_AMOUNT, line_num_error_msg, errors, filename) - if (min_amt_exists and not max_amt_exists) or (max_amt_exists and not min_amt_exists): - add_error(MISSING_MIN_OR_MAX_AMOUNT, line_num_error_msg, errors, filename) - if (not amt_exists and not min_amt_exists and not max_amt_exists) and filename == 'fare_products.txt': - add_error(NO_AMOUNT_DEFINED, line_num_error_msg, errors) - -def check_areas_of_file(path, stop_or_stop_time, areas, unused_areas, errors): - with open(path, 'r') as csvfile: - reader = csv.DictReader(csvfile) - - if 'area_id' in reader.fieldnames: - for line in reader: - area_id = line.get('area_id') - - if not area_id: - continue - - if not area_id in areas: - line_num_error_msg = '\nLine: ' + str(reader.line_num) - add_error(NONEXISTENT_AREA_ID, line_num_error_msg, errors, stop_or_stop_time) - continue - - if area_id in unused_areas: - unused_areas.remove(area_id) - -def check_linked_id(path, line, fieldname, defined_ids, line_num_error_msg, errors): - filename = get_filename_of_path(path) - if not line.get(fieldname): - return False - - if not line.get(fieldname) in defined_ids: - error_info = fieldname + ': ' + line.get(fieldname) - add_error(FOREIGN_ID_INVALID, line_num_error_msg, errors, filename, error_info) - - return True - -def check_linked_flr_ftr_entities(path, line, line_num_error_msg, rider_categories, rider_category_by_fare_container, linked_entities_by_fare_product, errors): - filename = get_filename_of_path(path) - fare_product_id = line.get('fare_product_id') - rider_category_id = line.get('rider_category_id') - fare_container_id = line.get('fare_container_id') - - if fare_product_id and not fare_product_id in linked_entities_by_fare_product: - add_error(NONEXISTENT_FARE_PRODUCT_ID, line_num_error_msg, errors, filename) - if rider_category_id and not rider_category_id in rider_categories: - add_error(NONEXISTENT_RIDER_CATEGORY_ID, line_num_error_msg, errors, filename) - if fare_container_id and not fare_container_id in rider_category_by_fare_container: - add_error(NONEXISTENT_FARE_CONTAINER_ID, line_num_error_msg, errors, filename) - - if fare_product_id: - if rider_category_id: - fp_rider_cats = linked_entities_by_fare_product[fare_product_id].get('rider_category_ids') - if (len(fp_rider_cats) > 0) and (not rider_category_id in fp_rider_cats): - add_error(CONFLICTING_RIDER_CATEGORY_ON_FARE_PRODUCT, line_num_error_msg, errors, filename) - if fare_container_id: - fp_fare_containers = linked_entities_by_fare_product[fare_product_id].get('fare_container_ids') - if (len(fp_fare_containers) > 0) and (not fare_container_id in fp_fare_containers): - add_error(CONFLICTING_FARE_CONTAINER_ON_FARE_PRODUCT, line_num_error_msg, errors, filename) - if rider_category_id and fare_container_id: - fc_rider_cat = rider_category_by_fare_container[fare_container_id] - if fc_rider_cat and (not fc_rider_cat == rider_category_id): - add_error(CONFLICTING_RIDER_CATEGORY_ON_FARE_CONTAINER, line_num_error_msg, errors, filename) \ No newline at end of file diff --git a/tests/__init__.py b/tests/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/test_errors.py b/tests/test_errors.py deleted file mode 100644 index fdb97d0..0000000 --- a/tests/test_errors.py +++ /dev/null @@ -1,191 +0,0 @@ -from run_validator import run_validator -from src import errors -from os import error, path - -def test_errors_simple_files(): - results = run_validator(path.join('tests', 'test_data', 'bad_gtfs_simple'), True) - errors_list = results['errors'] - - # Areas errors - assert errors.DUPLICATE_AREA_ID in errors_list[0] - assert errors.EMPTY_AREA_ID in errors_list[1] - assert errors.GREATER_AREA_ID_LOOP in errors_list[2] - - # Stops errors - assert errors.NONEXISTENT_AREA_ID in errors_list[5] - - # Stop times errors - assert errors.NONEXISTENT_AREA_ID in errors_list[6] - - # Calendar errors - assert errors.EMPTY_SERVICE_ID_CALENDAR in errors_list[7] - assert errors.DUPLICATE_SERVICE_ID in errors_list[8] - - # Calendar dates errors - assert errors.EMPTY_SERVICE_ID_CALENDAR_DATES in errors_list[9] - - # Timeframes errors - assert errors.INVALID_TIME_FORMAT in errors_list[10] - assert errors.INVALID_TIME_FORMAT in errors_list[11] - assert errors.EMPTY_START_TIME in errors_list[12] - assert errors.EMPTY_END_TIME in errors_list[13] - assert errors.EMPTY_TIMEFRAME_ID in errors_list[14] - - # Rider categories errors - assert errors.EMPTY_RIDER_CATEGORY_ID in errors_list[15] - assert errors.NEGATIVE_MIN_AGE in errors_list[16] - assert errors.NEGATIVE_MAX_AGE in errors_list[17] - assert errors.NON_INT_MIN_AGE in errors_list[18] - assert errors.NON_INT_MAX_AGE in errors_list[19] - - # Fare containers errors - assert errors.EMPTY_FARE_CONTAINER_ID in errors_list[20] - assert errors.EMPTY_FARE_CONTAINER_NAME in errors_list[21] - assert errors.NONEXISTENT_RIDER_CATEGORY_ID in errors_list[22] - assert errors.AMOUNT_WITHOUT_CURRENCY in errors_list[23] - assert errors.INVALID_AMOUNT_FORMAT in errors_list[24] - assert errors.AMOUNT_WITHOUT_CURRENCY in errors_list[25] - assert errors.INVALID_AMOUNT_FORMAT in errors_list[26] - assert errors.CURRENCY_WITHOUT_AMOUNT in errors_list[27] - assert errors.DUPLICATE_FARE_CONTAINER_ID in errors_list[28] - - assert len(errors_list) == 29 - -def test_errors_fare_products(): - results = run_validator(path.join('tests', 'test_data', 'bad_fare_products'), False) - errors_list = results['errors'] - - assert errors.EMPTY_FARE_PRODUCT_ID in errors_list[0] - assert errors.EMPTY_FARE_PRODUCT_NAME in errors_list[1] - assert errors.MISSING_MIN_OR_MAX_AMOUNT in errors_list[2] - assert errors.AMOUNT_WITH_MIN_OR_MAX_AMOUNT in errors_list[3] - assert errors.AMOUNT_WITHOUT_CURRENCY in errors_list[4] - assert errors.AMOUNT_WITHOUT_CURRENCY in errors_list[5] - assert errors.AMOUNT_WITHOUT_CURRENCY in errors_list[6] # this also is for line 7 of fare products - assert errors.NO_AMOUNT_DEFINED in errors_list[7] - assert errors.FOREIGN_ID_INVALID in errors_list[8] - assert errors.INVALID_TIMEFRAME_TYPE in errors_list[9] - assert errors.INVALID_TIMEFRAME_TYPE in errors_list[10] - assert errors.FOREIGN_ID_INVALID in errors_list[11] - assert errors.TIMEFRAME_TYPE_WITHOUT_TIMEFRAME in errors_list[12] - - assert len(errors_list) == 13 - -def test_errors_fare_leg_rules(): - results = run_validator(path.join('tests', 'test_data', 'bad_fare_leg_rules'), False) - errors_list = results['errors'] - - # check areas - assert errors.AREA_WITHOUT_IS_SYMMETRICAL in errors_list[0] - assert errors.CONTAINS_AREA_WITHOUT_FROM_TO_AREA in errors_list[1] - assert errors.IS_SYMMETRICAL_WITHOUT_FROM_TO_AREA in errors_list[2] - assert errors.INVALID_IS_SYMMETRICAL_LEG_RULES in errors_list[3] - assert errors.FOREIGN_ID_INVALID in errors_list[4] - - # check networks - assert errors.FOREIGN_ID_INVALID in errors_list[5] - - # check timeframes - assert errors.FOREIGN_ID_INVALID in errors_list[6] - assert errors.FOREIGN_ID_INVALID in errors_list[7] - - # check service_id - assert errors.FOREIGN_ID_INVALID in errors_list[8] - - # check distances - assert errors.INVALID_MIN_DISTANCE in errors_list[9] - assert errors.INVALID_MAX_DISTANCE in errors_list[10] - assert errors.DISTANCE_WITHOUT_DISTANCE_TYPE in errors_list[11] - assert errors.INVALID_DISTANCE_TYPE in errors_list[12] - assert errors.NEGATIVE_MIN_DISTANCE in errors_list[13] - assert errors.NEGATIVE_MAX_DISTANCE in errors_list[14] - assert errors.DISTANCE_TYPE_WITHOUT_DISTANCE in errors_list[15] - - # check amounts/fare_product/fare_leg_name - assert errors.CURRENCY_WITHOUT_AMOUNT in errors_list[16] - assert errors.AMOUNT_WITH_FARE_PRODUCT in errors_list[17] - assert errors.AMOUNT_WITH_MIN_OR_MAX_AMOUNT in errors_list[18] - assert errors.MISSING_MIN_OR_MAX_AMOUNT in errors_list[19] - assert errors.FARE_LEG_NAME_WITH_FARE_PRODUCT in errors_list[20] - - # check linked entities - assert errors.NONEXISTENT_FARE_PRODUCT_ID in errors_list[21] - assert errors.NONEXISTENT_RIDER_CATEGORY_ID in errors_list[22] - assert errors.NONEXISTENT_FARE_CONTAINER_ID in errors_list[23] - assert errors.CONFLICTING_RIDER_CATEGORY_ON_FARE_PRODUCT in errors_list[24] - assert errors.CONFLICTING_FARE_CONTAINER_ON_FARE_PRODUCT in errors_list[25] - assert errors.CONFLICTING_RIDER_CATEGORY_ON_FARE_CONTAINER in errors_list[26] - - assert len(errors_list) == 27 - -def test_errors_fare_transfer_rules(): - results = run_validator(path.join('tests', 'test_data', 'bad_fare_transfer_rules'), False) - errors_list = results['errors'] - - # check leg groups - assert errors.IS_SYMMETRICAL_WITHOUT_FROM_TO_LEG_GROUP in errors_list[0] - assert errors.LEG_GROUP_WITHOUT_IS_SYMMETRICAL in errors_list[1] - assert errors.INVALID_IS_SYMMETRICAL_TRANSFER_RULES in errors_list[2] - assert errors.INVALID_TO_LEG_GROUP in errors_list[3] - assert errors.INVALID_FROM_LEG_GROUP in errors_list[4] - - # check transfer_id and spans - assert errors.SPANNING_LIMIT_WITH_BAD_LEGS in errors_list[5] - assert errors.INVALID_SPANNING_LIMIT in errors_list[6] - assert errors.INVALID_SPANNING_LIMIT in errors_list[7] - assert errors.SPANNING_LIMIT_WITH_TRANSFER_ID in errors_list[8] - assert errors.TRANSFER_ID_WITHOUT_TRANSFER_SEQUENCE in errors_list[9] - assert errors.TRANSFER_SEQUENCE_WITHOUT_TRANSFER_ID in errors_list[10] - assert errors.INVALID_TRANSFER_SEQUENCE in errors_list[11] - assert errors.INVALID_TRANSFER_SEQUENCE in errors_list[12] - - # check durations - assert errors.INVALID_DURATION_LIMIT_TYPE in errors_list[13] - assert errors.DURATION_LIMIT_WITHOUT_LIMIT_TYPE in errors_list[14] - assert errors.INVALID_DURATION_LIMIT in errors_list[15] - assert errors.DURATION_LIMIT_TYPE_WITHOUT_DURATION in errors_list[16] - - # check amounts - assert errors.CURRENCY_WITHOUT_AMOUNT in errors_list[17] - assert errors.AMOUNT_WITHOUT_CURRENCY in errors_list[18] - assert errors.AMOUNT_WITHOUT_FARE_TRANSFER_TYPE in errors_list[19] - assert errors.INVALID_FARE_TRANSFER_TYPE in errors_list[20] - assert errors.UNRECOGNIZED_CURRENCY_CODE in errors_list[21] - assert errors.FARE_TRANSFER_TYPE_WITHOUT_AMOUNT in errors_list[22] - - # check linked entities - assert errors.NONEXISTENT_FARE_PRODUCT_ID in errors_list[23] - assert errors.NONEXISTENT_RIDER_CATEGORY_ID in errors_list[24] - assert errors.NONEXISTENT_FARE_CONTAINER_ID in errors_list[25] - assert errors.CONFLICTING_RIDER_CATEGORY_ON_FARE_PRODUCT in errors_list[26] - assert errors.CONFLICTING_FARE_CONTAINER_ON_FARE_PRODUCT in errors_list[27] - assert errors.CONFLICTING_RIDER_CATEGORY_ON_FARE_CONTAINER in errors_list[28] - - assert len(errors_list) == 29 - -def test_required_fields(): - results = run_validator(path.join('tests', 'test_data', 'required_fields_test'), False) - errors_list = results['errors'] - - assert errors.REQUIRED_FIELD_MISSING in errors_list[0] - assert 'areas.txt' in errors_list[0] - - assert errors.REQUIRED_FIELD_MISSING in errors_list[1] - assert 'calendar.txt' in errors_list[1] - - assert errors.REQUIRED_FIELD_MISSING in errors_list[2] - assert 'calendar_dates.txt' in errors_list[2] - - assert errors.REQUIRED_FIELD_MISSING in errors_list[3] - assert 'timeframes.txt' in errors_list[3] - - assert errors.REQUIRED_FIELD_MISSING in errors_list[4] - assert 'rider_categories.txt' in errors_list[4] - - assert errors.REQUIRED_FIELD_MISSING in errors_list[5] - assert 'fare_containers.txt' in errors_list[5] - - assert errors.REQUIRED_FIELD_MISSING in errors_list[6] - assert 'fare_products.txt' in errors_list[6] - - assert len(errors_list) == 7 \ No newline at end of file diff --git a/tests/test_warnings.py b/tests/test_warnings.py deleted file mode 100644 index e2b6649..0000000 --- a/tests/test_warnings.py +++ /dev/null @@ -1,47 +0,0 @@ -from run_validator import run_validator -from src import warnings -from os import path - -def test_warnings(): - results = run_validator(path.join('tests', 'test_data', 'warnings_test_gtfs'), True) - warnings_list = results['warnings'] - - # Stops / stop times warnings - assert warnings.UNUSED_AREAS_IN_STOPS in warnings_list[0] - - # Rider categories warnings - assert warnings.MAX_AGE_LESS_THAN_MIN_AGE in warnings_list[1] - assert warnings.VERY_LARGE_MIN_AGE in warnings_list[2] - assert warnings.VERY_LARGE_MAX_AGE in warnings_list[3] - - # Fare products warnings - assert warnings.OFFSET_AMOUNT_WITHOUT_OFFSET_UNIT in warnings_list[4] - - # Fare leg rule warnings - assert warnings.UNUSED_AREA_IDS in warnings_list[5] - assert warnings.UNUSED_NETWORK_IDS in warnings_list[6] - - # Fare transfer rule warnings - assert warnings.UNUSED_LEG_GROUPS in warnings_list[7] - - # generic warnings - assert warnings.UNUSED_TIMEFRAME_IDS in warnings_list[8] - - assert len(warnings_list) == 9 - -def test_warnings_nonexistent_files(): - results = run_validator(path.join('tests', 'test_data', 'no_files'), True) - warnings_list = results['warnings'] - - assert warnings.NO_AREAS in warnings_list[0] - assert warnings.NO_ROUTES in warnings_list[1] - assert warnings.NO_STOPS in warnings_list[2] - assert warnings.NO_SERVICE_IDS in warnings_list[3] - assert warnings.NO_TIMEFRAMES in warnings_list[4] - assert warnings.NO_RIDER_CATEGORIES in warnings_list[5] - assert warnings.NO_FARE_CONTAINERS in warnings_list[6] - assert warnings.NO_FARE_PRODUCTS in warnings_list[7] - assert warnings.NO_FARE_LEG_RULES in warnings_list[8] - assert warnings.NO_FARE_TRANSFER_RULES in warnings_list[9] - - assert len(warnings_list) == 10 \ No newline at end of file diff --git a/validate.py b/validate.py old mode 100644 new mode 100755 index 5a722a6..9d6effd --- a/validate.py +++ b/validate.py @@ -1,50 +1,5 @@ -import argparse -from run_validator import run_validator -from os import path +#!/usr/bin/env python3 +from fares_validator.__main__ import main -def print_results(errors, warnings): - output = '' - if len(errors) > 0: - output += 'ERRORS:\n' - - for error in errors: - output += '\n' + error + '\n' - else: - output += 'No errors detected.\n' - - if len(warnings) > 0: - output += '\n\nWARNINGS:\n' - - for warning in warnings: - output += '\n' + warning + '\n' - else: - output += '\n\nNo warnings to report.' - - return output - -parser = argparse.ArgumentParser(description='Validate GTFS fares-v2 data.') -parser.add_argument("-s", "--read-stop-times", help="scans stop_times for area_ids", action='store_true') -parser.add_argument("-o", "--output-file", type=str, help="export the errors and warnings to a file") -parser.add_argument("input_gtfs_folder", type=str, help="path to unzipped folder containing fares-v2 GTFS") - -args = parser.parse_args() - -gtfs_path = args.input_gtfs_folder -if not path.isdir(gtfs_path): - raise Exception('Input path is not a valid folder.') - -read_stop_times = False -if args.read_stop_times: - read_stop_times = True - -results = run_validator(gtfs_path, read_stop_times) -output = print_results(results['errors'], results['warnings']) - -if args.output_file: - try: - f = open(args.output_file, 'w') - f.write(output) - except Exception: - raise Exception('Writing to output file failed. Please ensure the output file path is valid.') -else: - print(output) +if __name__ == '__main__': + main()