From 122870825085c67b4a60227b0a4404f1a4199839 Mon Sep 17 00:00:00 2001 From: Andrew Vaccaro Date: Wed, 21 Sep 2022 13:01:09 -0400 Subject: [PATCH] print stuff for testing help --- airflow/dags/check_feed_aggregators/check_aggregators.yml | 1 + .../gtfs_aggregator_checker/__init__.py | 3 +++ .../gtfs_aggregator_checker/transitfeeds.py | 5 +++-- .../gtfs_aggregator_checker/transitland.py | 5 +++-- 4 files changed, 10 insertions(+), 4 deletions(-) diff --git a/airflow/dags/check_feed_aggregators/check_aggregators.yml b/airflow/dags/check_feed_aggregators/check_aggregators.yml index d0e5af4dab..b91b145237 100644 --- a/airflow/dags/check_feed_aggregators/check_aggregators.yml +++ b/airflow/dags/check_feed_aggregators/check_aggregators.yml @@ -11,6 +11,7 @@ arguments: - "/secrets/agencies-data/data_agencies.yaml" - "--output={{get_bucket()}}/feed_aggregator_checks/dt={{execution_date.to_date_string()}}/checks.jsonl" - "--output-format=JSONL" + - "--progress" is_delete_operator_pod: true get_logs: true diff --git a/jobs/gtfs-aggregator-checker/gtfs_aggregator_checker/__init__.py b/jobs/gtfs-aggregator-checker/gtfs_aggregator_checker/__init__.py index 768565ce45..85a2a2a440 100644 --- a/jobs/gtfs-aggregator-checker/gtfs_aggregator_checker/__init__.py +++ b/jobs/gtfs-aggregator-checker/gtfs_aggregator_checker/__init__.py @@ -3,6 +3,7 @@ import urllib.request from collections import OrderedDict +import typer import yaml from .transitfeeds import get_transitfeeds_urls @@ -35,6 +36,7 @@ def check_feeds(yml_file=None, csv_file=None, url=None, progress=False): "transitland": {"status": "missing"}, } elif csv_file: + typer.echo(f"reading urls from {csv_file}") with open(csv_file, "r") as f: urls = f.read().strip().splitlines() for url in urls: @@ -44,6 +46,7 @@ def check_feeds(yml_file=None, csv_file=None, url=None, progress=False): "transitland": {"status": "missing"}, } else: + typer.echo(f"reading urls from {yml_file}") with open(yml_file, "r") as f: agencies_obj = yaml.load(f, Loader=yaml.SafeLoader) for agency in agencies_obj.values(): diff --git a/jobs/gtfs-aggregator-checker/gtfs_aggregator_checker/transitfeeds.py b/jobs/gtfs-aggregator-checker/gtfs_aggregator_checker/transitfeeds.py index 596041c555..51affa0133 100644 --- a/jobs/gtfs-aggregator-checker/gtfs_aggregator_checker/transitfeeds.py +++ b/jobs/gtfs-aggregator-checker/gtfs_aggregator_checker/transitfeeds.py @@ -1,5 +1,6 @@ from urllib.error import HTTPError +import typer from bs4 import BeautifulSoup from tqdm import tqdm @@ -18,7 +19,7 @@ def resolve_url(url): def get_transitfeeds_urls(progress=False): - print("fetching transit feeds URLs") + typer.echo("fetching transit feeds URLs") page_urls = [] provider_urls = [] @@ -49,7 +50,7 @@ def get_transitfeeds_urls(progress=False): try: html = curl_cached(feed_url) except HTTPError: - print("failed to fetch:", feed_url) + typer.echo(f"failed to fetch: {feed_url}") continue soup = BeautifulSoup(html, "html.parser") diff --git a/jobs/gtfs-aggregator-checker/gtfs_aggregator_checker/transitland.py b/jobs/gtfs-aggregator-checker/gtfs_aggregator_checker/transitland.py index ad91e5b1bb..9329aca9ff 100644 --- a/jobs/gtfs-aggregator-checker/gtfs_aggregator_checker/transitland.py +++ b/jobs/gtfs-aggregator-checker/gtfs_aggregator_checker/transitland.py @@ -1,6 +1,7 @@ import json from typing import List, Tuple +import typer from tqdm import tqdm from .cache import curl_cached @@ -33,7 +34,7 @@ def get_feeds(after=None): def get_transitland_urls(progress=False) -> List[Tuple[str, str]]: - print("fetching transitland URLs") + typer.echo("fetching transitland URLs") if not API_KEY: raise RuntimeError("TRANSITLAND_API_KEY must be set") @@ -52,5 +53,5 @@ def get_transitland_urls(progress=False) -> List[Tuple[str, str]]: if not after: break else: - print("WARNING: hit loop limit for transitland") + typer.echo("WARNING: hit loop limit for transitland") return urls