Skip to content

Commit

Permalink
start addressing dependabot-flagged issues and add missing image buil…
Browse files Browse the repository at this point in the history
…d workflow (#1824)

* start addressing dependabot-flagged issues

* create feed checker image CI and adjust other names to be consistent

* print stuff for testing help

* bump image and deploy to test

* also update reqs
  • Loading branch information
atvaccaro authored Sep 21, 2022
1 parent af4c6a8 commit 0358519
Show file tree
Hide file tree
Showing 21 changed files with 214 additions and 1,844 deletions.
28 changes: 28 additions & 0 deletions .github/workflows/build-gtfs-aggregator-checker-image.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
name: Build and push gtfs-aggregator-checker image

on:
push:
branches:
- 'main'
paths:
- '.github/workflows/build-gtfs-aggregator-checker.yml'
- 'jobs/gtfs-aggregator-checker/**'

jobs:
build_push:
name: Package docker image
runs-on: ubuntu-18.04
steps:
- uses: actions/checkout@v2
- name: Login to GitHub Container Registry
uses: docker/login-action@v1
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build and push
uses: docker/build-push-action@v2
with:
context: jobs/gtfs-aggregator-checker
push: true
tags: ghcr.io/${{github.repository}}/gtfs-aggregator-checker:latest
File renamed without changes.
File renamed without changes.
3 changes: 2 additions & 1 deletion airflow/dags/check_feed_aggregators/check_aggregators.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
operator: operators.PodOperator
name: 'check-aggregators'
image: 'ghcr.io/cal-itp/data-infra/gtfs-aggregator-checker:latest'
image: 'ghcr.io/cal-itp/data-infra/gtfs-aggregator-checker:{{ image_tag() }}'

cmds:
- python3
Expand All @@ -11,6 +11,7 @@ arguments:
- "/secrets/agencies-data/data_agencies.yaml"
- "--output={{get_bucket()}}/feed_aggregator_checks/dt={{execution_date.to_date_string()}}/checks.jsonl"
- "--output-format=JSONL"
- "--progress"

is_delete_operator_pod: true
get_logs: true
Expand Down
4 changes: 2 additions & 2 deletions jobs/gtfs-aggregator-checker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ FROM python:3.7-buster

LABEL org.opencontainers.image.source https://github.com/cal-itp/data-infra

RUN curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/get-poetry.py | python -
ENV PATH="${PATH}:/root/.poetry/bin"
RUN curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/install-poetry.py | python3 -
ENV PATH="/root/.local/bin:${PATH}"

RUN mkdir /app
WORKDIR /app
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import urllib.request
from collections import OrderedDict

import typer
import yaml

from .transitfeeds import get_transitfeeds_urls
Expand Down Expand Up @@ -35,6 +36,7 @@ def check_feeds(yml_file=None, csv_file=None, url=None, progress=False):
"transitland": {"status": "missing"},
}
elif csv_file:
typer.echo(f"reading urls from {csv_file}")
with open(csv_file, "r") as f:
urls = f.read().strip().splitlines()
for url in urls:
Expand All @@ -44,6 +46,7 @@ def check_feeds(yml_file=None, csv_file=None, url=None, progress=False):
"transitland": {"status": "missing"},
}
else:
typer.echo(f"reading urls from {yml_file}")
with open(yml_file, "r") as f:
agencies_obj = yaml.load(f, Loader=yaml.SafeLoader)
for agency in agencies_obj.values():
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from urllib.error import HTTPError

import typer
from bs4 import BeautifulSoup
from tqdm import tqdm

Expand All @@ -18,7 +19,7 @@ def resolve_url(url):


def get_transitfeeds_urls(progress=False):
print("fetching transit feeds URLs")
typer.echo("fetching transit feeds URLs")

page_urls = []
provider_urls = []
Expand Down Expand Up @@ -49,7 +50,7 @@ def get_transitfeeds_urls(progress=False):
try:
html = curl_cached(feed_url)
except HTTPError:
print("failed to fetch:", feed_url)
typer.echo(f"failed to fetch: {feed_url}")
continue

soup = BeautifulSoup(html, "html.parser")
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json
from typing import List, Tuple

import typer
from tqdm import tqdm

from .cache import curl_cached
Expand Down Expand Up @@ -33,7 +34,7 @@ def get_feeds(after=None):


def get_transitland_urls(progress=False) -> List[Tuple[str, str]]:
print("fetching transitland URLs")
typer.echo("fetching transitland URLs")
if not API_KEY:
raise RuntimeError("TRANSITLAND_API_KEY must be set")

Expand All @@ -52,5 +53,5 @@ def get_transitland_urls(progress=False) -> List[Tuple[str, str]]:
if not after:
break
else:
print("WARNING: hit loop limit for transitland")
typer.echo("WARNING: hit loop limit for transitland")
return urls
668 changes: 54 additions & 614 deletions jobs/gtfs-aggregator-checker/poetry.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion jobs/gtfs-rt-parser-v2/poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions jobs/gtfs-rt-parser/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ ENV GTFS_VALIDATOR_VERSION=v1.0.0
RUN apt-get update -y \
&& apt-get install -y python3 python3-pip

RUN curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/get-poetry.py | python3 -
ENV PATH="${PATH}:/root/.poetry/bin"
RUN curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/install-poetry.py | python3 -
ENV PATH="/root/.local/bin:${PATH}"

# formerly the "1.0.0-SNAPSHOT" from S3
COPY ./rt-validator.jar ${GTFS_RT_VALIDATOR_JAR}
Expand Down
Loading

0 comments on commit 0358519

Please sign in to comment.