diff --git a/.github/workflows/static_python_checks.yaml b/.github/workflows/ci.yaml similarity index 60% rename from .github/workflows/static_python_checks.yaml rename to .github/workflows/ci.yaml index db765bc3..52807ac9 100644 --- a/.github/workflows/static_python_checks.yaml +++ b/.github/workflows/ci.yaml @@ -1,7 +1,7 @@ # https://help.github.com/en/github/automating-your-workflow-with-github-actions # https://help.github.com/en/github/automating-your-workflow-with-github-actions/workflow-syntax-for-github-actions -name: Static Python checks +name: CI on: [push] # push: @@ -9,8 +9,8 @@ on: [push] # - "*.py" jobs: - flake8: - name: Flake8 + precommit: + name: Pre-commit runs-on: ubuntu-18.04 # https://help.github.com/en/github/automating-your-workflow-with-github-actions/workflow-syntax-for-github-actions#jobsjob_idruns-on steps: - uses: actions/checkout@v1 # https://help.github.com/en/github/automating-your-workflow-with-github-actions/configuring-a-workflow#using-the-checkout-action @@ -20,17 +20,7 @@ jobs: python-version: 3.7 architecture: x64 - - name: Install Flake8 - run: pip install flake8==3.7.9 - - name: Run Flake8 - run: flake8 - - # - name: Install Pylint - # run: pip install pylint - # - name: Run Pylint - # run: find . -name "*.py" | xargs pylint --output-format=colorized - - - name: Install Black - run: pip install black - - name: Run Black - run: find . -name "*.py" | xargs black --check + - name: Install pre-commit + run: pip install pre-commit==2.7.1 + - name: Run pre-commit + run: pre-commit run --all diff --git a/.gitignore b/.gitignore index ec42de47..0bb2f91c 100644 --- a/.gitignore +++ b/.gitignore @@ -144,4 +144,8 @@ dmypy.json # Data folders data +# Pyenv +.python-version + +# Mac .DS_Store diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..f305e910 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,14 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: 'v2.3.0' + hooks: + - id: end-of-file-fixer + - id: trailing-whitespace + - repo: https://github.com/psf/black + rev: '19.3b0' + hooks: + - id: black + - repo: https://gitlab.com/pycqa/flake8 + rev: '3.8.4' # pick a git hash / tag to point to + hooks: + - id: flake8 diff --git a/Makefile b/Makefile index bf694612..e78acf57 100644 --- a/Makefile +++ b/Makefile @@ -52,4 +52,3 @@ docker: ## Build the Airflow Docker image .PHONY: flake8 flake8: flake8 . - diff --git a/chapters/chapter10/dags/sla_misses.py b/chapters/chapter10/dags/sla_misses.py index 0e09af79..0fd77ec3 100644 --- a/chapters/chapter10/dags/sla_misses.py +++ b/chapters/chapter10/dags/sla_misses.py @@ -13,12 +13,10 @@ def send_slack_message(message): pass -default_args = { - "sla": timedelta(seconds=10), -} +default_args = {"sla": timedelta(seconds=10)} with DAG( - dag_id=f"chapter10_sla", + dag_id="chapter10_sla", start_date=date_utils.days_ago(2), schedule_interval="@daily", default_args=default_args, diff --git a/chapters/chapter13/aws/.env.template b/chapters/chapter13/aws/.env.template index da8419ee..da7bc6fe 100644 --- a/chapters/chapter13/aws/.env.template +++ b/chapters/chapter13/aws/.env.template @@ -3,4 +3,4 @@ AWS_ACCESS_SECRET= RATINGS_BUCKET= RANKINGS_BUCKET= -CRAWLER_NAME= \ No newline at end of file +CRAWLER_NAME= diff --git a/chapters/chapter13/aws/scripts/fetch_data.py b/chapters/chapter13/aws/scripts/fetch_data.py index a579dd4e..2b8b3259 100644 --- a/chapters/chapter13/aws/scripts/fetch_data.py +++ b/chapters/chapter13/aws/scripts/fetch_data.py @@ -30,7 +30,7 @@ def fetch_ratings(url): with zipfile.ZipFile(tmp_path) as zip_: logging.info(f"Downloaded zip file with contents: {zip_.namelist()}") - logging.info(f"Reading ml-20m/ratings.csv from zip file") + logging.info("Reading ml-20m/ratings.csv from zip file") with zip_.open("ml-20m/ratings.csv") as file_: ratings = pd.read_csv(file_) diff --git a/chapters/chapter13/azure/dags/azure_usecase.py b/chapters/chapter13/azure/dags/azure_usecase.py index b10509e1..4b45e0b1 100644 --- a/chapters/chapter13/azure/dags/azure_usecase.py +++ b/chapters/chapter13/azure/dags/azure_usecase.py @@ -56,7 +56,7 @@ def _upload_ratings(wasb_conn_id, container, **context): logging.info(f"Writing results to {container}/{year}/{month:02d}.csv") hook = WasbHook(wasb_conn_id) hook.load_file( - tmp_path, container_name=container, blob_name=f"{year}/{month:02d}.csv", + tmp_path, container_name=container, blob_name=f"{year}/{month:02d}.csv" ) diff --git a/chapters/chapter13/azure/scripts/fetch_data.py b/chapters/chapter13/azure/scripts/fetch_data.py index a579dd4e..2b8b3259 100644 --- a/chapters/chapter13/azure/scripts/fetch_data.py +++ b/chapters/chapter13/azure/scripts/fetch_data.py @@ -30,7 +30,7 @@ def fetch_ratings(url): with zipfile.ZipFile(tmp_path) as zip_: logging.info(f"Downloaded zip file with contents: {zip_.namelist()}") - logging.info(f"Reading ml-20m/ratings.csv from zip file") + logging.info("Reading ml-20m/ratings.csv from zip file") with zip_.open("ml-20m/ratings.csv") as file_: ratings = pd.read_csv(file_) diff --git a/chapters/chapter13/gcp/.env.template b/chapters/chapter13/gcp/.env.template index c494cf5d..e6cae80d 100644 --- a/chapters/chapter13/gcp/.env.template +++ b/chapters/chapter13/gcp/.env.template @@ -2,4 +2,4 @@ BIGQUERY_DATASET= GCP_KEY= GCP_PROJECT= RATINGS_BUCKET= -RESULT_BUCKET= \ No newline at end of file +RESULT_BUCKET= diff --git a/chapters/chapter13/gcp/scripts/fetch_data.py b/chapters/chapter13/gcp/scripts/fetch_data.py index a579dd4e..2b8b3259 100644 --- a/chapters/chapter13/gcp/scripts/fetch_data.py +++ b/chapters/chapter13/gcp/scripts/fetch_data.py @@ -30,7 +30,7 @@ def fetch_ratings(url): with zipfile.ZipFile(tmp_path) as zip_: logging.info(f"Downloaded zip file with contents: {zip_.namelist()}") - logging.info(f"Reading ml-20m/ratings.csv from zip file") + logging.info("Reading ml-20m/ratings.csv from zip file") with zip_.open("ml-20m/ratings.csv") as file_: ratings = pd.read_csv(file_) diff --git a/chapters/chapter14/rbac/README.md b/chapters/chapter14/rbac/README.md index 0cb8bc49..20d06530 100644 --- a/chapters/chapter14/rbac/README.md +++ b/chapters/chapter14/rbac/README.md @@ -10,7 +10,7 @@ docker-compose up -d Wait 5 seconds or so for the webserver to come up (Both the webserver & init containers must run a command, which must be done in the correct order. Normally this is done by a human, but in the scripts we used a -`sleep(5 seconds)` to ensure the correct ordering, which delays the webserver startup by 5 seconds). +`sleep(5 seconds)` to ensure the correct ordering, which delays the webserver startup by 5 seconds). Login in Airflow username/password `airflow`/`airflow`. diff --git a/chapters/chapter15/README.md b/chapters/chapter15/README.md index db64a4ba..eb725a87 100644 --- a/chapters/chapter15/README.md +++ b/chapters/chapter15/README.md @@ -64,4 +64,4 @@ A snippet of the data: 917,"2019-01-01 13:03:44.7760","2019-01-01 13:19:02.7690",3183,"Exchange Place",40.7162469,-74.0334588,3277,"Communipaw & Berry Lane",40.71435836870427,-74.06661093235016,29299,"Subscriber",1986,1 3248,"2019-01-01 13:12:03.1280","2019-01-01 14:06:12.0400",3183,"Exchange Place",40.7162469,-74.0334588,3196,"Riverview Park",40.7443187,-74.0439909,29495,"Subscriber",1992,1 3168,"2019-01-01 13:13:12.0450","2019-01-01 14:06:00.4110",3183,"Exchange Place",40.7162469,-74.0334588,3196,"Riverview Park",40.7443187,-74.0439909,26312,"Customer",1969,0 -``` \ No newline at end of file +``` diff --git a/chapters/chapter15/services/nyc_transportation_api/app.py b/chapters/chapter15/services/nyc_transportation_api/app.py index 7bcfc170..44407306 100644 --- a/chapters/chapter15/services/nyc_transportation_api/app.py +++ b/chapters/chapter15/services/nyc_transportation_api/app.py @@ -290,7 +290,7 @@ def index(): ) cursor = conn.cursor() cursor.execute( - f"""with citibike as ( + """with citibike as ( select start_location_id, end_location_id, diff --git a/chapters/chapter15/services/nyc_transportation_api/templates/index.html b/chapters/chapter15/services/nyc_transportation_api/templates/index.html index ec1c7d1e..70e1b3a1 100644 --- a/chapters/chapter15/services/nyc_transportation_api/templates/index.html +++ b/chapters/chapter15/services/nyc_transportation_api/templates/index.html @@ -44,4 +44,4 @@ - \ No newline at end of file + diff --git a/chapters/chapter4/create_table.sql b/chapters/chapter4/create_table.sql index 0c86677f..ead970f9 100644 --- a/chapters/chapter4/create_table.sql +++ b/chapters/chapter4/create_table.sql @@ -14,4 +14,4 @@ FROM ( FROM pageview_counts GROUP BY pagename, hr ) AS x -WHERE row_number=1; \ No newline at end of file +WHERE row_number=1; diff --git a/chapters/chapter5/dags/chapter5_02_branch_in_function.py b/chapters/chapter5/dags/chapter5_02_branch_in_function.py index 7f5cbfa8..1a683d53 100644 --- a/chapters/chapter5/dags/chapter5_02_branch_in_function.py +++ b/chapters/chapter5/dags/chapter5_02_branch_in_function.py @@ -48,7 +48,7 @@ def _clean_sales_new(**context): task_id="fetch_sales", python_callable=_fetch_sales, provide_context=True ) clean_sales = PythonOperator( - task_id="clean_sales", python_callable=_clean_sales, provide_context=True, + task_id="clean_sales", python_callable=_clean_sales, provide_context=True ) fetch_weather = DummyOperator(task_id="fetch_weather") diff --git a/chapters/chapter5/dags/chapter5_06_condition_in_dag.py b/chapters/chapter5/dags/chapter5_06_condition_in_dag.py index 2a56071a..fef270c0 100644 --- a/chapters/chapter5/dags/chapter5_06_condition_in_dag.py +++ b/chapters/chapter5/dags/chapter5_06_condition_in_dag.py @@ -53,7 +53,7 @@ def _latest_only(**context): train_model = DummyOperator(task_id="train_model") latest_only = PythonOperator( - task_id="latest_only", python_callable=_latest_only, provide_context=True, + task_id="latest_only", python_callable=_latest_only, provide_context=True ) deploy_model = DummyOperator(task_id="deploy_model") diff --git a/chapters/chapter5/dags/chapter5_08_xcoms.py b/chapters/chapter5/dags/chapter5_08_xcoms.py index 54645632..d624119c 100644 --- a/chapters/chapter5/dags/chapter5_08_xcoms.py +++ b/chapters/chapter5/dags/chapter5_08_xcoms.py @@ -35,11 +35,11 @@ def _deploy_model(**context): join_datasets = DummyOperator(task_id="join_datasets") train_model = PythonOperator( - task_id="train_model", python_callable=_train_model, provide_context=True, + task_id="train_model", python_callable=_train_model, provide_context=True ) deploy_model = PythonOperator( - task_id="deploy_model", python_callable=_deploy_model, provide_context=True, + task_id="deploy_model", python_callable=_deploy_model, provide_context=True ) start >> [fetch_sales, fetch_weather] diff --git a/chapters/chapter6/dags/couponing_app_split_ingest_metrics.py b/chapters/chapter6/dags/couponing_app_split_ingest_metrics.py index 372690a8..9ffbf585 100644 --- a/chapters/chapter6/dags/couponing_app_split_ingest_metrics.py +++ b/chapters/chapter6/dags/couponing_app_split_ingest_metrics.py @@ -41,7 +41,7 @@ def _wait_for_supermarket(supermarket_id_): ) wait >> copy >> process >> trigger_create_metrics_dag -compute_differences = DummyOperator(task_id=f"compute_differences", dag=dag2) -update_dashboard = DummyOperator(task_id=f"update_dashboard", dag=dag2) -notify_new_data = DummyOperator(task_id=f"notify_new_data", dag=dag2) +compute_differences = DummyOperator(task_id="compute_differences", dag=dag2) +update_dashboard = DummyOperator(task_id="update_dashboard", dag=dag2) +notify_new_data = DummyOperator(task_id="notify_new_data", dag=dag2) compute_differences >> update_dashboard diff --git a/chapters/chapter7/dags/movielens_python_operator.py b/chapters/chapter7/dags/movielens_python_operator.py index 40ce2b38..330629b1 100644 --- a/chapters/chapter7/dags/movielens_python_operator.py +++ b/chapters/chapter7/dags/movielens_python_operator.py @@ -87,7 +87,7 @@ def _fetch_ratings(templates_dict, batch_size=1000, **_): logger.info(f"Fetching ratings for {start_date} to {end_date}") ratings = list( _get_ratings( - start_date=start_date, end_date=end_date, batch_size=batch_size, + start_date=start_date, end_date=end_date, batch_size=batch_size ) ) logger.info(f"Fetched {len(ratings)} ratings") diff --git a/docker/Dockerfile b/docker/Dockerfile index 54c3eec0..7268e5d0 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -17,9 +17,9 @@ RUN apt update && \ curl https://packages.microsoft.com/config/debian/10/prod.list > /etc/apt/sources.list.d/mssql-release.list && \ apt-get update && \ ACCEPT_EULA=Y apt-get install -y msodbcsql17 mssql-tools - + # Install other dependencies. -RUN apt install -y postgresql-client +RUN apt install -y postgresql-client # Install Airflow + Python dependencies. RUN conda install -y pytest setproctitle psutil pyodbc && \ @@ -32,7 +32,7 @@ RUN conda install -y pytest setproctitle psutil pyodbc && \ apache-airflow-backport-providers-microsoft-azure \ apache-airflow-backport-providers-amazon -# Install kubectl. +# Install kubectl. RUN curl -LO https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/linux/amd64/kubectl RUN chmod +x ./kubectl RUN mv ./kubectl /usr/local/bin diff --git a/environment.yml b/environment.yml deleted file mode 100644 index 9a683320..00000000 --- a/environment.yml +++ /dev/null @@ -1,5 +0,0 @@ -name: manning-airflow -dependencies: - - python==3.8.2 - - black - - flake8==3.7.9 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..5433ea41 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +black==19.3b0 +flake8==3.8.4 +pre-commit==2.7.1