Skip to content

Commit

Permalink
fix tests, format
Browse files Browse the repository at this point in the history
  • Loading branch information
jsteelz committed Jun 1, 2022
1 parent e1e2404 commit 9a6fff7
Show file tree
Hide file tree
Showing 29 changed files with 381 additions and 490 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
.DS_Store
__pycache__
.vscode
2 changes: 2 additions & 0 deletions .style.yapf
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[style]
based_on_style = google
4 changes: 1 addition & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,13 @@ This tool is based on the fares-v2 [draft specification](https://docs.google.com

The tool validates ONLY fares-v2 specific files and dependent files, and does NOT validate GTFS schedule data.

The tool does NOT read areas from stop_times.txt for performance reasons, but can using the -s option defined below.

## Requirements

python 3

## Validate a fares dataset

`python3 validate.py PATH-TO-FOLDER-CONTAINING-FARES-V2-DATASET [-s, --read-stop-times] [-o, --output-file FILE-TO-EXPORT-VALIDATION-REPORT-TO]`
`python3 validate.py PATH-TO-FOLDER-CONTAINING-FARES-V2-DATASET [-o, --output-file FILE-TO-EXPORT-VALIDATION-REPORT-TO]`

For example:

Expand Down
19 changes: 15 additions & 4 deletions fares_validator/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,18 @@

def main():
parser = argparse.ArgumentParser(description='Validate GTFS fares-v2 data.')
parser.add_argument("-s", "--read-stop-times", help="Scan stop_times for area_ids", action='store_true')
parser.add_argument("-o", "--output-file", type=str, help="Export the errors and warnings to a file")
parser.add_argument("input_gtfs_folder", type=str, help="Path to unzipped folder containing the Fares-v2 GTFS")
parser.add_argument("-s",
"--read-stop-times",
help="Scan stop_times for area_ids",
action='store_true')
parser.add_argument("-o",
"--output-file",
type=str,
help="Export the errors and warnings to a file")
parser.add_argument(
"input_gtfs_folder",
type=str,
help="Path to unzipped folder containing the Fares-v2 GTFS")

args = parser.parse_args()

Expand All @@ -25,7 +34,9 @@ def main():
f = open(args.output_file, 'w')
f.write(output)
except Exception:
raise Exception('Writing to output file failed. Please ensure the output file path is valid.')
raise Exception(
'Writing to output file failed. Please ensure the output file path is valid.'
)
else:
print(output)

Expand Down
1 change: 1 addition & 0 deletions fares_validator/diagnostics.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ def format(code, line_context='', path='', extra_info=''):


class Diagnostics:

def __init__(self):
self.errors = []
self.warnings = []
Expand Down
29 changes: 5 additions & 24 deletions fares_validator/errors.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
# generic errors (see utils.py)
AMOUNT_WITH_MIN_OR_MAX_AMOUNT = 'An amount is defined alongside at least one of min_ or max_amount.'
AMOUNT_WITHOUT_CURRENCY = 'An amount field is defined without a currency to accompany it.'
CONFLICTING_FARE_CONTAINER_ON_FARE_PRODUCT = 'A fare_container referenced conflicts with the fare_container on the fare_product.'
CONFLICTING_RIDER_CATEGORY_ON_FARE_CONTAINER = 'A rider_category referenced conflicts with the rider_category on the fare_container.'
CONFLICTING_RIDER_CATEGORY_ON_FARE_PRODUCT = 'A rider_category referenced conflicts with the rider_category on the fare_product.'
CURRENCY_WITHOUT_AMOUNT = 'A currency is defined without an amount field to accompany it.'
FOREIGN_ID_INVALID = 'An id defined in a dependent table is referenced, but does not exist in that table.'
INVALID_AMOUNT_FORMAT = 'An amount field is defined, but is not an integer or float.'
Expand All @@ -16,16 +13,13 @@
UNRECOGNIZED_CURRENCY_CODE = 'A currency code is unrecognized.'

# areas.txt
DUPLICATE_AREAS_TXT_ENTRY = 'There are two entries in areas.txt with the same area_id and greater_area_id.'
DUPLICATE_AREAS_TXT_ENTRY = 'There are two entries in areas.txt with the same area_id.'
EMPTY_AREA_ID_AREAS = 'An entry in areas.txt has empty area_id.'
GREATER_AREA_ID_LOOP = 'Some area_ids have themselves as greater_area_ids.'
UNDEFINED_GREATER_AREA_ID = 'A greater_area_id is not defined as an area_id in areas.txt.'

# stop_areas.txt
DUPLICATE_STOP_AREAS_TXT_ENTRY = 'There are two entries in stop_areas.txt with the same area_id and stop_id.'
EMPTY_AREA_ID_STOP_AREAS = 'An entry in stop_areas.txt has empty area_id.'
EMPTY_STOP_ID_STOP_AREAS = 'An entry in stop_areas.txt has empty stop_id.'
INVALID_AREA_ID = 'An entry in stop_areas.txt references a non-existent area_id.'
INVALID_STOP_ID = 'An entry in stop_areas.txt references a non-existent stop_id.'

# calendar.txt, calendar_dates.txt
DUPLICATE_SERVICE_ID = 'A service_id is defined twice in calendar.txt.'
Expand Down Expand Up @@ -73,36 +67,23 @@
TIMEFRAME_TYPE_WITHOUT_TIMEFRAME = 'A timeframe_type in fare_products.txt is defined without a timeframe_id.'

# fare_leg_rules.txt
AMOUNT_WITH_FARE_PRODUCT = 'An entry in fare_leg_rules.txt has both a fare_product and an amount field defined.'
AREA_WITHOUT_IS_SYMMETRICAL = 'A from_ and/or to_area in fare_leg_rules.txt is defined without is_symmetrical.'
CONTAINS_AREA_WITHOUT_FROM_TO_AREA = 'A contains_area in fare_leg_rules.txt is defined without a from and to area.'
DISTANCE_TYPE_WITHOUT_DISTANCE = 'A distance_type in fare_leg_rules.txt is defined without a min_ or max_distance.'
DISTANCE_WITHOUT_DISTANCE_TYPE = 'A min_ or max_distance in fare_leg_rules.txt is defined without a distance_type.'
FARE_LEG_NAME_WITH_FARE_PRODUCT = 'An entry in fare_leg_rules.txt has both a fare_product and a fare_leg_name field defined.'
INVALID_DISTANCE_TYPE = 'A distance_type in fare_leg_rules.txt has an invalid value.'
INVALID_IS_SYMMETRICAL_LEG_RULES = 'An is_symmetrical in fare_leg_rules.txt is not one of the accepted values.'
INVALID_MAX_DISTANCE = 'A max_distance in fare_leg_rules.txt is not a float.'
INVALID_MIN_DISTANCE = 'A min_distance in fare_leg_rules.txt is not a float.'
IS_SYMMETRICAL_WITHOUT_FROM_TO_AREA = 'An is_symmetrical in fare_leg_rules.txt is defined without a from_ and/or to_area.'
NEGATIVE_MAX_DISTANCE = 'A max_distance in fare_leg_rules.txt is negative.'
NEGATIVE_MIN_DISTANCE = 'A min_distance in fare_leg_rules.txt is negative.'

# fare_transfer_rules.txt
AMOUNT_WITHOUT_FARE_TRANSFER_TYPE = 'An entry in fare_transfer_rules.txt has an amount field defined without fare_transfer_type.'
DURATION_LIMIT_WITHOUT_LIMIT_TYPE = 'An entry in fare_transfer_rules.txt has duration_limit without duration_limit_type.'
DURATION_LIMIT_TYPE_WITHOUT_DURATION = 'An entry in fare_transfer_rules.txt has duration_limit_type without duration_limit.'
FARE_TRANSFER_TYPE_WITHOUT_AMOUNT = 'An entry in fare_transfer_rules.txt has fare_transfer_type defined without an amount field.'
INVALID_DURATION_LIMIT = 'An entry in fare_transfer_rules.txt has duration_limit with invalid value.'
INVALID_DURATION_LIMIT_TYPE = 'An entry in fare_transfer_rules.txt has duration_limit_type with invalid value.'
INVALID_FARE_TRANSFER_TYPE = 'An entry in fare_transfer_rules.txt has fare_transfer_type with invalid value.'
INVALID_FROM_LEG_GROUP = 'A from_leg_group_id in fare_transfer_rules.txt is not defined in fare_leg_rules.txt.'
INVALID_IS_SYMMETRICAL_TRANSFER_RULES = 'An is_symmetrical in fare_transfer_rules.txt is not one of the accepted values.'
INVALID_SPANNING_LIMIT = 'An entry in fare_transfer_rules.txt has spanning_limit with incorrect type or invalid integer value.'
INVALID_TRANSFER_COUNT = 'An entry in fare_transfer_rules.txt has transfer_count with incorrect type or invalid integer value.'
INVALID_TO_LEG_GROUP = 'A to_leg_group_id in fare_transfer_rules.txt is not defined in fare_leg_rules.txt.'
INVALID_TRANSFER_SEQUENCE = 'A transfer_sequence in fare_transfer_rules.txt has incorrect type or invalid integer value.'
IS_SYMMETRICAL_WITHOUT_FROM_TO_LEG_GROUP = 'An is_symmetrical in fare_transfer_rules.txt is defined without a from_ and/or to_leg_group_id.'
LEG_GROUP_WITHOUT_IS_SYMMETRICAL = 'A from_ and/or to_leg_group_id in fare_transfer_rules.txt is defined without is_symmetrical.'
SPANNING_LIMIT_WITH_BAD_LEGS = 'An entry in fare_transfer_rules.txt has spanning_limit with different from and to leg group ids.'
SPANNING_LIMIT_WITH_TRANSFER_ID = 'An entry in fare_transfer_rules.txt has spanning_limit with transfer_id defined.'
TRANSFER_ID_WITHOUT_TRANSFER_SEQUENCE = 'A transfer_id in fare_transfer_rules.txt is defined without a transfer_sequence.'
TRANSFER_SEQUENCE_WITHOUT_TRANSFER_ID = 'A transfer_sequence in fare_transfer_rules.txt is defined without a transfer_id.'
NONEXISTENT_FILTER_FARE_PRODUCT_ID = 'A filter_fare_product referenced is not defined in fare_products.txt.'
TRANSFER_COUNT_WITH_BAD_LEGS = 'An entry in fare_transfer_rules.txt has transfer_count with different from and to leg group ids.'
13 changes: 0 additions & 13 deletions fares_validator/fare_leg_rule_checkers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,26 +3,13 @@


def check_areas(line, areas, unused_areas):
if line.is_symmetrical and line.is_symmetrical not in {'0', '1'}:
line.add_error(INVALID_IS_SYMMETRICAL_LEG_RULES)

if line.contains_area_id and (not line.from_area_id and not line.to_area_id):
line.add_error(CONTAINS_AREA_WITHOUT_FROM_TO_AREA)

if (line.from_area_id or line.to_area_id) and not line.is_symmetrical:
line.add_error(AREA_WITHOUT_IS_SYMMETRICAL)

if (not line.from_area_id and not line.to_area_id) and line.is_symmetrical:
line.add_error(IS_SYMMETRICAL_WITHOUT_FROM_TO_AREA)

if line.from_area_id and line.from_area_id in unused_areas:
unused_areas.remove(line.from_area_id)
if line.to_area_id and line.to_area_id in unused_areas:
unused_areas.remove(line.to_area_id)

utils.check_linked_id(line, 'from_area_id', areas)
utils.check_linked_id(line, 'to_area_id', areas)
utils.check_linked_id(line, 'contains_area_id', areas)


def check_distances(line):
Expand Down
22 changes: 16 additions & 6 deletions fares_validator/fare_product_checkers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,17 @@


class LinkedEntities:

def __init__(self):
self.rider_category_ids = set()
self.fare_container_ids = set()


def check_linked_fp_entities(line, rider_categories, rider_category_by_fare_container, linked_entities_by_fare_product):
linked_entities = linked_entities_by_fare_product.setdefault(line.fare_product_id, LinkedEntities())
def check_linked_fp_entities(line, rider_categories,
rider_category_by_fare_container,
linked_entities_by_fare_product):
linked_entities = linked_entities_by_fare_product.setdefault(
line.fare_product_id, LinkedEntities())

if line.rider_category_id:
linked_entities.rider_category_ids.add(line.rider_category_id)
Expand All @@ -23,8 +27,10 @@ def check_linked_fp_entities(line, rider_categories, rider_category_by_fare_cont
if line.fare_container_id not in rider_category_by_fare_container:
line.add_error(NONEXISTENT_FARE_CONTAINER_ID)

fare_container_rider_cat = rider_category_by_fare_container.get(line.fare_container_id)
if line.rider_category_id and fare_container_rider_cat and (line.rider_category_id != fare_container_rider_cat):
fare_container_rider_cat = rider_category_by_fare_container.get(
line.fare_container_id)
if line.rider_category_id and fare_container_rider_cat and (
line.rider_category_id != fare_container_rider_cat):
line.add_error(CONFLICTING_RIDER_CATEGORY_ON_FARE_CONTAINER)
else:
linked_entities.fare_container_ids.add('')
Expand All @@ -46,7 +52,9 @@ def check_durations_and_offsets(line):
if line.duration_start and line.duration_start not in {'0', '1'}:
line.add_error(INVALID_DURATION_START)

if line.duration_unit and line.duration_unit not in {'0', '1', '2', '3', '4', '5', '6'}:
if line.duration_unit and line.duration_unit not in {
'0', '1', '2', '3', '4', '5', '6'
}:
line.add_error(INVALID_DURATION_UNIT)

if line.duration_type and line.duration_type not in {'1', '2'}:
Expand Down Expand Up @@ -74,7 +82,9 @@ def check_durations_and_offsets(line):
if line.duration_unit:
line.add_error(DURATION_UNIT_WITHOUT_AMOUNT)

if line.offset_unit and line.offset_unit not in {'0', '1', '2', '3', '4', '5', '6'}:
if line.offset_unit and line.offset_unit not in {
'0', '1', '2', '3', '4', '5', '6'
}:
line.add_error(INVALID_OFFSET_UNIT)

if line.offset_amount:
Expand Down
40 changes: 10 additions & 30 deletions fares_validator/fare_transfer_rule_checkers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,6 @@


def check_leg_groups(line, leg_group_ids, unused_leg_groups):
if line.is_symmetrical and line.is_symmetrical not in {'0', '1'}:
line.add_error(INVALID_IS_SYMMETRICAL_TRANSFER_RULES)
if (line.from_leg_group_id or line.to_leg_group_id) and not line.is_symmetrical:
line.add_error(LEG_GROUP_WITHOUT_IS_SYMMETRICAL)
if (not line.from_leg_group_id and not line.to_leg_group_id) and line.is_symmetrical:
line.add_error(IS_SYMMETRICAL_WITHOUT_FROM_TO_LEG_GROUP)
if line.from_leg_group_id and not line.from_leg_group_id in leg_group_ids:
line.add_error(INVALID_FROM_LEG_GROUP)
if line.to_leg_group_id and not line.to_leg_group_id in leg_group_ids:
Expand All @@ -19,36 +13,22 @@ def check_leg_groups(line, leg_group_ids, unused_leg_groups):
unused_leg_groups.remove(line.to_leg_group_id)


def check_spans_and_transfer_ids(line):
if line.spanning_limit:
def check_transfer_count(line):
if line.transfer_count:
if line.from_leg_group_id != line.to_leg_group_id:
line.add_error(SPANNING_LIMIT_WITH_BAD_LEGS)
if line.transfer_id:
line.add_error(SPANNING_LIMIT_WITH_TRANSFER_ID)
line.add_error(TRANSFER_COUNT_WITH_BAD_LEGS)
try:
limit = int(line.spanning_limit)
if limit <= 1:
line.add_error(INVALID_SPANNING_LIMIT)
limit = int(line.transfer_count)
if limit < 1 and limit != -1:
line.add_error(INVALID_TRANSFER_COUNT)
except ValueError:
line.add_error(INVALID_SPANNING_LIMIT)

if line.transfer_id:
if not line.transfer_sequence:
line.add_error(TRANSFER_ID_WITHOUT_TRANSFER_SEQUENCE)

if line.transfer_sequence:
if not line.transfer_id:
line.add_error(TRANSFER_SEQUENCE_WITHOUT_TRANSFER_ID)
try:
seq = int(line.transfer_sequence)
if seq < 1:
line.add_error(INVALID_TRANSFER_SEQUENCE)
except ValueError:
line.add_error(INVALID_TRANSFER_SEQUENCE)
line.add_error(INVALID_TRANSFER_COUNT)


def check_durations(line):
if line.duration_limit_type and line.duration_limit_type not in {'0', '1', '2', '3'}:
if line.duration_limit_type and line.duration_limit_type not in {
'0', '1', '2', '3'
}:
line.add_error(INVALID_DURATION_LIMIT_TYPE)

if line.duration_limit:
Expand Down
23 changes: 10 additions & 13 deletions fares_validator/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,31 +23,28 @@ def run_validator(gtfs_root_dir, should_read_stop_times):

gtfs.networks = read_gtfs_entities.networks(gtfs_root_dir, results)

read_gtfs_entities.read_areas_in_stop_files(gtfs_root_dir, gtfs.areas, results, should_read_stop_times)

gtfs.service_ids = read_gtfs_entities.service_ids(gtfs_root_dir, results)

gtfs.timeframe_ids = read_fares_entities.timeframes(gtfs_root_dir, results)
unused_timeframes = gtfs.timeframe_ids.copy()

gtfs.rider_category_ids = read_fares_entities.rider_categories(gtfs_root_dir, results)
gtfs.rider_category_ids = read_fares_entities.rider_categories(
gtfs_root_dir, results)

gtfs.rider_category_by_fare_container = read_fares_entities.fare_containers(gtfs_root_dir,
gtfs.rider_category_ids,
results)
gtfs.rider_category_by_fare_container = read_fares_entities.fare_containers(
gtfs_root_dir, gtfs.rider_category_ids, results)

gtfs.linked_entities_by_fare_product = read_fares_entities.fare_products(gtfs_root_dir,
gtfs,
unused_timeframes,
results)
gtfs.linked_entities_by_fare_product = read_fares_entities.fare_products(
gtfs_root_dir, gtfs, unused_timeframes, results)

gtfs.leg_group_ids = read_fares_entities.fare_leg_rules(gtfs_root_dir, gtfs,
unused_timeframes, results)
gtfs.leg_group_ids = read_fares_entities.fare_leg_rules(
gtfs_root_dir, gtfs, unused_timeframes, results)

read_fares_entities.fare_transfer_rules(gtfs_root_dir, gtfs, results)

if len(unused_timeframes):
warning_info = 'Unused timeframes: ' + str(unused_timeframes)
results.add_warning(diagnostics.format(warn.UNUSED_TIMEFRAME_IDS, '', '', warning_info))
results.add_warning(
diagnostics.format(warn.UNUSED_TIMEFRAME_IDS, '', '', warning_info))

return results
Loading

0 comments on commit 9a6fff7

Please sign in to comment.