Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -144,4 +144,8 @@ dmypy.json
# Data folders
data

# Pyenv
.python-version

# Mac
.DS_Store
14 changes: 14 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: 'v2.3.0'
hooks:
- id: end-of-file-fixer
- id: trailing-whitespace
- repo: https://github.com/psf/black
rev: '19.3b0'
hooks:
- id: black
- repo: https://gitlab.com/pycqa/flake8
rev: '3.8.4' # pick a git hash / tag to point to
hooks:
- id: flake8
1 change: 0 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -52,4 +52,3 @@ docker: ## Build the Airflow Docker image
.PHONY: flake8
flake8:
flake8 .

6 changes: 2 additions & 4 deletions chapters/chapter10/dags/sla_misses.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,10 @@ def send_slack_message(message):
pass


default_args = {
"sla": timedelta(seconds=10),
}
default_args = {"sla": timedelta(seconds=10)}

with DAG(
dag_id=f"chapter10_sla",
dag_id="chapter10_sla",
start_date=date_utils.days_ago(2),
schedule_interval="@daily",
default_args=default_args,
Expand Down
2 changes: 1 addition & 1 deletion chapters/chapter13/aws/.env.template
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ AWS_ACCESS_SECRET=

RATINGS_BUCKET=
RANKINGS_BUCKET=
CRAWLER_NAME=
CRAWLER_NAME=
2 changes: 1 addition & 1 deletion chapters/chapter13/aws/scripts/fetch_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def fetch_ratings(url):
with zipfile.ZipFile(tmp_path) as zip_:
logging.info(f"Downloaded zip file with contents: {zip_.namelist()}")

logging.info(f"Reading ml-20m/ratings.csv from zip file")
logging.info("Reading ml-20m/ratings.csv from zip file")
with zip_.open("ml-20m/ratings.csv") as file_:
ratings = pd.read_csv(file_)

Expand Down
2 changes: 1 addition & 1 deletion chapters/chapter13/azure/dags/azure_usecase.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def _upload_ratings(wasb_conn_id, container, **context):
logging.info(f"Writing results to {container}/{year}/{month:02d}.csv")
hook = WasbHook(wasb_conn_id)
hook.load_file(
tmp_path, container_name=container, blob_name=f"{year}/{month:02d}.csv",
tmp_path, container_name=container, blob_name=f"{year}/{month:02d}.csv"
)


Expand Down
2 changes: 1 addition & 1 deletion chapters/chapter13/azure/scripts/fetch_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def fetch_ratings(url):
with zipfile.ZipFile(tmp_path) as zip_:
logging.info(f"Downloaded zip file with contents: {zip_.namelist()}")

logging.info(f"Reading ml-20m/ratings.csv from zip file")
logging.info("Reading ml-20m/ratings.csv from zip file")
with zip_.open("ml-20m/ratings.csv") as file_:
ratings = pd.read_csv(file_)

Expand Down
2 changes: 1 addition & 1 deletion chapters/chapter13/gcp/.env.template
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ BIGQUERY_DATASET=
GCP_KEY=
GCP_PROJECT=
RATINGS_BUCKET=
RESULT_BUCKET=
RESULT_BUCKET=
2 changes: 1 addition & 1 deletion chapters/chapter13/gcp/scripts/fetch_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def fetch_ratings(url):
with zipfile.ZipFile(tmp_path) as zip_:
logging.info(f"Downloaded zip file with contents: {zip_.namelist()}")

logging.info(f"Reading ml-20m/ratings.csv from zip file")
logging.info("Reading ml-20m/ratings.csv from zip file")
with zip_.open("ml-20m/ratings.csv") as file_:
ratings = pd.read_csv(file_)

Expand Down
2 changes: 1 addition & 1 deletion chapters/chapter14/rbac/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ docker-compose up -d

Wait 5 seconds or so for the webserver to come up (Both the webserver & init containers must run a command,
which must be done in the correct order. Normally this is done by a human, but in the scripts we used a
`sleep(5 seconds)` to ensure the correct ordering, which delays the webserver startup by 5 seconds).
`sleep(5 seconds)` to ensure the correct ordering, which delays the webserver startup by 5 seconds).

Login in Airflow username/password `airflow`/`airflow`.

Expand Down
2 changes: 1 addition & 1 deletion chapters/chapter15/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,4 +64,4 @@ A snippet of the data:
917,"2019-01-01 13:03:44.7760","2019-01-01 13:19:02.7690",3183,"Exchange Place",40.7162469,-74.0334588,3277,"Communipaw & Berry Lane",40.71435836870427,-74.06661093235016,29299,"Subscriber",1986,1
3248,"2019-01-01 13:12:03.1280","2019-01-01 14:06:12.0400",3183,"Exchange Place",40.7162469,-74.0334588,3196,"Riverview Park",40.7443187,-74.0439909,29495,"Subscriber",1992,1
3168,"2019-01-01 13:13:12.0450","2019-01-01 14:06:00.4110",3183,"Exchange Place",40.7162469,-74.0334588,3196,"Riverview Park",40.7443187,-74.0439909,26312,"Customer",1969,0
```
```
2 changes: 1 addition & 1 deletion chapters/chapter15/services/nyc_transportation_api/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ def index():
)
cursor = conn.cursor()
cursor.execute(
f"""with citibike as (
"""with citibike as (
select
start_location_id,
end_location_id,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,4 @@
<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/umd/popper.min.js" integrity="sha384-Q6E9RHvbIyZFJoft+2mJbHaEWldlvI9IOYy5n3zV9zzTtmI3UksdQRVvoxMfooAo" crossorigin="anonymous"></script>
<script src="https://stackpath.bootstrapcdn.com/bootstrap/4.5.0/js/bootstrap.min.js" integrity="sha384-OgVRvuATP1z7JjHLkuOU7Xw704+h835Lr+6QL9UvYjZE3Ipu6Tp75j7Bh/kR0JKI" crossorigin="anonymous"></script>
</body>
</html>
</html>
2 changes: 1 addition & 1 deletion chapters/chapter4/create_table.sql
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@ FROM (
FROM pageview_counts
GROUP BY pagename, hr
) AS x
WHERE row_number=1;
WHERE row_number=1;
2 changes: 1 addition & 1 deletion chapters/chapter5/dags/chapter5_02_branch_in_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def _clean_sales_new(**context):
task_id="fetch_sales", python_callable=_fetch_sales, provide_context=True
)
clean_sales = PythonOperator(
task_id="clean_sales", python_callable=_clean_sales, provide_context=True,
task_id="clean_sales", python_callable=_clean_sales, provide_context=True
)

fetch_weather = DummyOperator(task_id="fetch_weather")
Expand Down
2 changes: 1 addition & 1 deletion chapters/chapter5/dags/chapter5_06_condition_in_dag.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def _latest_only(**context):
train_model = DummyOperator(task_id="train_model")

latest_only = PythonOperator(
task_id="latest_only", python_callable=_latest_only, provide_context=True,
task_id="latest_only", python_callable=_latest_only, provide_context=True
)

deploy_model = DummyOperator(task_id="deploy_model")
Expand Down
4 changes: 2 additions & 2 deletions chapters/chapter5/dags/chapter5_08_xcoms.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,11 @@ def _deploy_model(**context):
join_datasets = DummyOperator(task_id="join_datasets")

train_model = PythonOperator(
task_id="train_model", python_callable=_train_model, provide_context=True,
task_id="train_model", python_callable=_train_model, provide_context=True
)

deploy_model = PythonOperator(
task_id="deploy_model", python_callable=_deploy_model, provide_context=True,
task_id="deploy_model", python_callable=_deploy_model, provide_context=True
)

start >> [fetch_sales, fetch_weather]
Expand Down
6 changes: 3 additions & 3 deletions chapters/chapter6/dags/couponing_app_split_ingest_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def _wait_for_supermarket(supermarket_id_):
)
wait >> copy >> process >> trigger_create_metrics_dag

compute_differences = DummyOperator(task_id=f"compute_differences", dag=dag2)
update_dashboard = DummyOperator(task_id=f"update_dashboard", dag=dag2)
notify_new_data = DummyOperator(task_id=f"notify_new_data", dag=dag2)
compute_differences = DummyOperator(task_id="compute_differences", dag=dag2)
update_dashboard = DummyOperator(task_id="update_dashboard", dag=dag2)
notify_new_data = DummyOperator(task_id="notify_new_data", dag=dag2)
compute_differences >> update_dashboard
2 changes: 1 addition & 1 deletion chapters/chapter7/dags/movielens_python_operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def _fetch_ratings(templates_dict, batch_size=1000, **_):
logger.info(f"Fetching ratings for {start_date} to {end_date}")
ratings = list(
_get_ratings(
start_date=start_date, end_date=end_date, batch_size=batch_size,
start_date=start_date, end_date=end_date, batch_size=batch_size
)
)
logger.info(f"Fetched {len(ratings)} ratings")
Expand Down
6 changes: 3 additions & 3 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@ RUN apt update && \
curl https://packages.microsoft.com/config/debian/10/prod.list > /etc/apt/sources.list.d/mssql-release.list && \
apt-get update && \
ACCEPT_EULA=Y apt-get install -y msodbcsql17 mssql-tools

# Install other dependencies.
RUN apt install -y postgresql-client
RUN apt install -y postgresql-client

# Install Airflow + Python dependencies.
RUN conda install -y pytest setproctitle psutil pyodbc && \
Expand All @@ -32,7 +32,7 @@ RUN conda install -y pytest setproctitle psutil pyodbc && \
apache-airflow-backport-providers-microsoft-azure \
apache-airflow-backport-providers-amazon

# Install kubectl.
# Install kubectl.
RUN curl -LO https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/linux/amd64/kubectl
RUN chmod +x ./kubectl
RUN mv ./kubectl /usr/local/bin
Expand Down