From b9ca19f6dcc6c57cf01820c1d84325bbe6b9fcbf Mon Sep 17 00:00:00 2001 From: Sean Rose Date: Tue, 25 Mar 2025 17:06:30 -0700 Subject: [PATCH 1/8] feat: Upgrade Airflow from 2.9.3 to 2.10.5. --- Dockerfile | 2 +- constraints.txt | 973 +++++++++++++++++++------------------- docker-compose.yml | 2 +- requirements-dev.in | 4 +- requirements-dev.txt | 8 +- requirements-override.txt | 1 - requirements.in | 4 +- requirements.txt | 396 ++++++++-------- 8 files changed, 706 insertions(+), 684 deletions(-) diff --git a/Dockerfile b/Dockerfile index b7fb1b503..5b1bbc389 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM apache/airflow:slim-2.9.3-python3.11 +FROM apache/airflow:slim-2.10.5-python3.11 ARG PROJECT_DIR="/opt/airflow" diff --git a/constraints.txt b/constraints.txt index 62c811284..f406c3052 100644 --- a/constraints.txt +++ b/constraints.txt @@ -1,6 +1,6 @@ # -# This constraints file was automatically generated on 2024-07-12T13:56:52.653035 -# via "eager-upgrade" mechanism of PIP. For the "v2-9-test" branch of Airflow. +# This constraints file was automatically generated on 2025-02-03T06:50:28.451790 +# via "eager-upgrade" mechanism of PIP. For the "v2-10-test" branch of Airflow. # This variant of constraints install uses the HEAD of the branch version for 'apache-airflow' but installs # the providers from PIP-released packages at the moment of the constraint generation. # @@ -30,555 +30,570 @@ # pip install "apache-airflow==X.Y.Z" "snowflake-connector-python[pandas]=N.M.O" # Authlib==1.3.1 -Babel==2.15.0 ConfigUpdater==3.2 -Deprecated==1.2.14 +Deprecated==1.2.18 Events==0.5 -Flask-AppBuilder==4.5.0 +Flask-AppBuilder==4.5.2 Flask-Babel==2.0.0 Flask-Bcrypt==1.0.1 Flask-Caching==2.3.0 -Flask-JWT-Extended==4.6.0 -Flask-Limiter==3.7.0 +Flask-JWT-Extended==4.7.1 +Flask-Limiter==3.10.1 Flask-Login==0.6.3 Flask-SQLAlchemy==2.5.1 Flask-Session==0.5.0 -Flask-WTF==1.2.1 +Flask-WTF==1.2.2 Flask==2.2.5 -GitPython==3.1.43 -JPype1==1.5.0 +GitPython==3.1.44 JayDeBeApi==1.2.3 -Jinja2==3.1.4 +Jinja2==3.1.5 Js2Py==0.74 -Mako==1.3.5 -Markdown==3.6 -MarkupSafe==2.1.5 -PyAthena==3.8.3 -PyGithub==2.3.0 +Mako==1.3.8 +Markdown==3.7 +MarkupSafe==3.0.2 +PyAthena==3.12.2 +PyGithub==2.5.0 PyHive==0.7.0 -PyJWT==2.8.0 +PyJWT==2.10.1 +PyMySQL==1.1.1 PyNaCl==1.5.0 -PyYAML==6.0.1 -Pygments==2.18.0 +PyYAML==6.0.2 +Pygments==2.19.1 SQLAlchemy-JSONField==1.0.2 SQLAlchemy-Utils==0.41.2 -SQLAlchemy==1.4.52 +SQLAlchemy==1.4.54 SecretStorage==3.3.3 -Sphinx==5.3.0 -WTForms==3.1.2 +Sphinx==8.1.3 +WTForms==3.2.1 Werkzeug==2.2.3 adal==1.2.7 -adlfs==2024.4.1 -aiobotocore==2.13.1 +adlfs==2024.12.0 +aiobotocore==2.19.0 aiofiles==23.2.1 -aiohttp==3.9.5 -aioitertools==0.11.0 -aioresponses==0.7.6 -aiosignal==1.3.1 -alabaster==0.7.16 -alembic==1.13.2 +aiohappyeyeballs==2.4.4 +aiohttp==3.11.11 +aioitertools==0.12.0 +aiomysql==0.2.0 +aioresponses==0.7.8 +aiosignal==1.3.2 +aiosqlite==0.20.0 +airbyte-api==0.52.2 +alabaster==1.0.0 +alembic==1.14.1 alibabacloud-adb20211201==2.0.0 -alibabacloud-tea==0.3.9 -alibabacloud_credentials==0.3.4 +alibabacloud-tea==0.4.0 +alibabacloud_credentials==0.3.6 alibabacloud_endpoint_util==0.0.3 -alibabacloud_gateway_spi==0.0.1 +alibabacloud_gateway_spi==0.0.2 alibabacloud_openapi_util==0.2.2 -alibabacloud_tea_openapi==0.3.10 -alibabacloud_tea_util==0.3.12 +alibabacloud_tea_openapi==0.3.12 +alibabacloud_tea_util==0.3.13 alibabacloud_tea_xml==0.0.2 -aliyun-python-sdk-core==2.15.1 -aliyun-python-sdk-kms==2.16.3 -amqp==5.2.0 +aliyun-python-sdk-core==2.16.0 +aliyun-python-sdk-kms==2.16.5 +amqp==5.3.1 analytics-python==1.2.9 annotated-types==0.7.0 ansicolors==1.1.8 -anyascii==0.3.2 -anyio==4.4.0 -apache-airflow-providers-airbyte==3.8.1 -apache-airflow-providers-alibaba==2.8.1 -apache-airflow-providers-amazon==8.25.0 -apache-airflow-providers-apache-beam==5.7.1 -apache-airflow-providers-apache-cassandra==3.5.1 -apache-airflow-providers-apache-drill==2.7.2 -apache-airflow-providers-apache-druid==3.10.1 -apache-airflow-providers-apache-flink==1.4.2 -apache-airflow-providers-apache-hdfs==4.4.2 -apache-airflow-providers-apache-hive==8.1.2 -apache-airflow-providers-apache-iceberg==1.0.0 -apache-airflow-providers-apache-impala==1.4.1 -apache-airflow-providers-apache-kafka==1.5.0 -apache-airflow-providers-apache-kylin==3.6.2 -apache-airflow-providers-apache-livy==3.8.1 -apache-airflow-providers-apache-pig==4.4.1 -apache-airflow-providers-apache-pinot==4.4.1 -apache-airflow-providers-apache-spark==4.8.2 -apache-airflow-providers-apprise==1.3.1 -apache-airflow-providers-arangodb==2.5.1 -apache-airflow-providers-asana==2.5.1 -apache-airflow-providers-atlassian-jira==2.6.1 -apache-airflow-providers-celery==3.7.2 -apache-airflow-providers-cloudant==3.5.2 -apache-airflow-providers-cncf-kubernetes==8.3.3 -apache-airflow-providers-cohere==1.2.1 -apache-airflow-providers-common-io==1.3.2 -apache-airflow-providers-common-sql==1.14.2 -apache-airflow-providers-databricks==6.7.0 -apache-airflow-providers-datadog==3.6.1 -apache-airflow-providers-dbt-cloud==3.9.0 -apache-airflow-providers-dingding==3.5.1 -apache-airflow-providers-discord==3.7.1 -apache-airflow-providers-docker==3.12.2 -apache-airflow-providers-elasticsearch==5.4.1 -apache-airflow-providers-exasol==4.5.2 -apache-airflow-providers-fab==1.2.2 -apache-airflow-providers-facebook==3.5.2 -apache-airflow-providers-ftp==3.10.0 -apache-airflow-providers-github==2.6.2 -apache-airflow-providers-google==10.21.0 -apache-airflow-providers-grpc==3.5.2 -apache-airflow-providers-hashicorp==3.7.1 -apache-airflow-providers-http==4.12.0 -apache-airflow-providers-imap==3.6.1 -apache-airflow-providers-influxdb==2.6.0 -apache-airflow-providers-jdbc==4.3.1 -apache-airflow-providers-jenkins==3.6.1 -apache-airflow-providers-microsoft-azure==10.2.0 -apache-airflow-providers-microsoft-mssql==3.7.2 -apache-airflow-providers-microsoft-psrp==2.7.1 -apache-airflow-providers-microsoft-winrm==3.5.1 -apache-airflow-providers-mongo==4.1.2 -apache-airflow-providers-mysql==5.6.2 -apache-airflow-providers-neo4j==3.6.1 -apache-airflow-providers-odbc==4.6.2 -apache-airflow-providers-openai==1.2.2 -apache-airflow-providers-openfaas==3.5.1 -apache-airflow-providers-openlineage==1.9.1 -apache-airflow-providers-opensearch==1.3.0 -apache-airflow-providers-opsgenie==5.6.1 -apache-airflow-providers-oracle==3.10.3 -apache-airflow-providers-pagerduty==3.7.2 -apache-airflow-providers-papermill==3.7.2 -apache-airflow-providers-pgvector==1.2.1 -apache-airflow-providers-pinecone==2.0.1 -apache-airflow-providers-postgres==5.11.2 -apache-airflow-providers-presto==5.5.2 -apache-airflow-providers-qdrant==1.1.1 -apache-airflow-providers-redis==3.7.1 -apache-airflow-providers-salesforce==5.7.2 -apache-airflow-providers-samba==4.7.1 -apache-airflow-providers-segment==3.5.1 -apache-airflow-providers-sendgrid==3.5.1 -apache-airflow-providers-sftp==4.10.2 -apache-airflow-providers-singularity==3.5.1 -apache-airflow-providers-slack==8.7.1 -apache-airflow-providers-smtp==1.7.1 -apache-airflow-providers-snowflake==5.6.0 -apache-airflow-providers-sqlite==3.8.1 -apache-airflow-providers-ssh==3.11.2 -apache-airflow-providers-tableau==4.5.2 -apache-airflow-providers-tabular==1.5.1 -apache-airflow-providers-telegram==4.5.2 -apache-airflow-providers-teradata==2.4.0 -apache-airflow-providers-trino==5.7.2 -apache-airflow-providers-vertica==3.8.2 -apache-airflow-providers-weaviate==1.4.2 -apache-airflow-providers-yandex==3.11.2 -apache-airflow-providers-zendesk==4.7.1 -apache-beam==2.57.0 -apispec==6.6.1 -apprise==1.8.0 -argcomplete==3.4.0 +anyio==4.8.0 +apache-airflow-providers-airbyte==5.0.0 +apache-airflow-providers-alibaba==3.0.0 +apache-airflow-providers-amazon==9.2.0 +apache-airflow-providers-apache-beam==6.0.0 +apache-airflow-providers-apache-cassandra==3.7.0 +apache-airflow-providers-apache-drill==3.0.0 +apache-airflow-providers-apache-druid==4.0.0 +apache-airflow-providers-apache-flink==1.6.0 +apache-airflow-providers-apache-hdfs==4.7.0 +apache-airflow-providers-apache-hive==9.0.0 +apache-airflow-providers-apache-iceberg==1.2.0 +apache-airflow-providers-apache-impala==1.6.0 +apache-airflow-providers-apache-kafka==1.7.0 +apache-airflow-providers-apache-kylin==3.8.0 +apache-airflow-providers-apache-livy==4.0.0 +apache-airflow-providers-apache-pig==4.6.0 +apache-airflow-providers-apache-pinot==4.6.0 +apache-airflow-providers-apache-spark==5.0.0 +apache-airflow-providers-apprise==2.0.0 +apache-airflow-providers-arangodb==2.7.0 +apache-airflow-providers-asana==2.7.0 +apache-airflow-providers-atlassian-jira==3.0.0 +apache-airflow-providers-celery==3.10.0 +apache-airflow-providers-cloudant==4.1.0 +apache-airflow-providers-cncf-kubernetes==10.1.0 +apache-airflow-providers-cohere==1.4.0 +apache-airflow-providers-common-compat==1.3.0 +apache-airflow-providers-common-io==1.5.0 +apache-airflow-providers-common-sql==1.21.0 +apache-airflow-providers-databricks==7.0.0 +apache-airflow-providers-datadog==3.8.0 +apache-airflow-providers-dbt-cloud==4.0.0 +apache-airflow-providers-dingding==3.7.0 +apache-airflow-providers-discord==3.9.0 +apache-airflow-providers-docker==4.0.0 +apache-airflow-providers-elasticsearch==6.0.0 +apache-airflow-providers-exasol==4.7.0 +apache-airflow-providers-fab==1.5.2 +apache-airflow-providers-facebook==3.7.0 +apache-airflow-providers-ftp==3.12.0 +apache-airflow-providers-github==2.8.0 +apache-airflow-providers-google==12.0.0 +apache-airflow-providers-grpc==3.7.0 +apache-airflow-providers-hashicorp==4.0.0 +apache-airflow-providers-http==5.0.0 +apache-airflow-providers-imap==3.8.0 +apache-airflow-providers-influxdb==2.8.0 +apache-airflow-providers-jdbc==5.0.0 +apache-airflow-providers-jenkins==4.0.0 +apache-airflow-providers-microsoft-azure==12.0.0 +apache-airflow-providers-microsoft-mssql==4.0.0 +apache-airflow-providers-microsoft-psrp==3.0.0 +apache-airflow-providers-microsoft-winrm==3.7.0 +apache-airflow-providers-mongo==5.0.0 +apache-airflow-providers-mysql==6.0.0 +apache-airflow-providers-neo4j==3.8.0 +apache-airflow-providers-odbc==4.9.0 +apache-airflow-providers-openai==1.5.0 +apache-airflow-providers-openfaas==3.7.0 +apache-airflow-providers-openlineage==2.0.0 +apache-airflow-providers-opensearch==1.6.0 +apache-airflow-providers-opsgenie==5.8.0 +apache-airflow-providers-oracle==4.0.0 +apache-airflow-providers-pagerduty==4.0.0 +apache-airflow-providers-papermill==3.9.0 +apache-airflow-providers-pgvector==1.4.0 +apache-airflow-providers-pinecone==2.2.0 +apache-airflow-providers-postgres==6.0.0 +apache-airflow-providers-presto==5.8.0 +apache-airflow-providers-qdrant==1.3.0 +apache-airflow-providers-redis==4.0.0 +apache-airflow-providers-salesforce==5.9.0 +apache-airflow-providers-samba==4.9.0 +apache-airflow-providers-segment==3.7.0 +apache-airflow-providers-sendgrid==4.0.0 +apache-airflow-providers-sftp==5.0.0 +apache-airflow-providers-singularity==3.7.0 +apache-airflow-providers-slack==9.0.0 +apache-airflow-providers-smtp==1.9.0 +apache-airflow-providers-snowflake==6.0.0 +apache-airflow-providers-sqlite==4.0.0 +apache-airflow-providers-ssh==4.0.0 +apache-airflow-providers-tableau==5.0.0 +apache-airflow-providers-tabular==1.6.1 +apache-airflow-providers-telegram==4.7.0 +apache-airflow-providers-teradata==3.0.0 +apache-airflow-providers-trino==6.0.0 +apache-airflow-providers-vertica==4.0.0 +apache-airflow-providers-weaviate==3.0.0 +apache-airflow-providers-yandex==4.0.0 +apache-airflow-providers-ydb==2.1.0 +apache-airflow-providers-zendesk==4.9.0 +apache-beam==2.59.0 +apispec==6.8.1 +apprise==1.9.2 +argcomplete==3.5.3 asana==3.2.3 asgiref==3.8.1 asn1crypto==1.5.1 -astroid==2.15.8 -asttokens==2.4.1 -asyncssh==2.15.0 +astroid==3.3.8 +asttokens==3.0.0 +asyncpg==0.30.0 +asyncssh==2.19.0 atlasclient==1.0.0 -atlassian-python-api==3.41.14 -attrs==23.2.0 -aws-sam-translator==1.89.0 +atlassian-python-api==3.41.19 +attrs==25.1.0 +aws-sam-translator==1.94.0 aws-xray-sdk==2.14.0 azure-batch==14.2.0 azure-common==1.1.28 -azure-core==1.30.2 -azure-cosmos==4.7.0 +azure-core==1.32.0 +azure-cosmos==4.9.0 azure-datalake-store==0.0.53 -azure-identity==1.17.1 -azure-keyvault-secrets==4.8.0 -azure-kusto-data==4.5.1 +azure-identity==1.19.0 +azure-keyvault-secrets==4.9.0 +azure-kusto-data==4.6.3 azure-mgmt-containerinstance==10.1.0 azure-mgmt-containerregistry==10.3.0 -azure-mgmt-core==1.4.0 -azure-mgmt-cosmosdb==9.5.1 -azure-mgmt-datafactory==8.0.0 +azure-mgmt-core==1.5.0 +azure-mgmt-cosmosdb==9.7.0 +azure-mgmt-datafactory==9.1.0 azure-mgmt-datalake-nspkg==3.0.1 azure-mgmt-datalake-store==0.5.0 azure-mgmt-nspkg==3.0.2 -azure-mgmt-resource==23.1.1 -azure-mgmt-storage==21.2.1 +azure-mgmt-resource==23.2.0 +azure-mgmt-storage==22.0.0 azure-nspkg==3.0.2 -azure-servicebus==7.12.2 -azure-storage-blob==12.20.0 -azure-storage-file-datalake==12.15.0 -azure-storage-file-share==12.16.0 +azure-servicebus==7.13.0 +azure-storage-blob==12.24.1 +azure-storage-file-datalake==12.18.1 +azure-storage-file-share==12.20.1 azure-synapse-artifacts==0.19.0 azure-synapse-spark==0.7.0 +babel==2.17.0 backoff==2.2.1 backports.tarfile==1.2.0 -bcrypt==4.1.3 -beautifulsoup4==4.12.3 -billiard==4.2.0 -bitarray==2.9.2 -black==24.4.2 -blinker==1.8.2 -boto3==1.34.131 -botocore==1.34.131 +bcrypt==4.2.1 +beautifulsoup4==4.13.0 +billiard==4.2.1 +bitarray==2.9.3 +black==25.1.0 +blinker==1.9.0 +boto3==1.36.3 +botocore==1.36.3 cachelib==0.9.0 -cachetools==5.3.3 -cassandra-driver==3.29.1 -cattrs==23.2.3 +cachetools==5.5.1 +cassandra-driver==3.29.2 +cattrs==24.1.2 celery==5.4.0 -certifi==2024.7.4 -cffi==1.16.0 +certifi==2025.1.31 +cffi==1.17.1 cfgv==3.4.0 -cfn-lint==1.6.0 +cfn-lint==1.23.1 cgroupspy==0.2.3 chardet==5.2.0 -charset-normalizer==3.3.2 +charset-normalizer==3.4.1 checksumdir==1.2.0 -ciso8601==2.3.1 +ciso8601==2.3.2 click-didyoumean==0.3.1 click-plugins==1.1.1 click-repl==0.3.0 -click==8.1.7 +click==8.1.8 clickclick==20.10.2 cloudant==2.15.0 cloudpickle==2.2.1 cohere==4.57 colorama==0.4.6 -colorlog==4.8.0 +colorlog==6.9.0 comm==0.2.2 -confluent-kafka==2.5.0 +confluent-kafka==2.8.0 connexion==2.14.2 -coverage==7.6.0 +coverage==7.6.10 crcmod==1.7 -cron-descriptor==1.4.3 -croniter==2.0.5 -cryptography==41.0.7 +cron-descriptor==1.4.5 +croniter==6.0.0 +cryptography==42.0.8 curlify==2.2.1 -databricks-sql-connector==2.9.6 -datadog==0.49.1 -db-dtypes==1.2.0 -debugpy==1.8.2 +databricks-sql-connector==4.0.0 +dataclasses-json==0.6.7 +datadog==0.51.0 +db-dtypes==1.4.0 +debugpy==1.8.12 decorator==5.1.1 defusedxml==0.7.1 -deltalake==0.18.2 -diagrams==0.23.4 +deltalake==0.24.0 +diagrams==0.24.1 dill==0.3.1.1 -distlib==0.3.8 +distlib==0.3.9 distro==1.9.0 -dnspython==2.6.1 +dnspython==2.7.0 docker==7.1.0 docopt==0.6.2 docstring_parser==0.16 -docutils==0.16 -duckdb==1.0.0 -elastic-transport==8.13.1 -elasticsearch==8.14.0 +docutils==0.21.2 +duckdb==1.1.3 +elastic-transport==8.17.0 +elasticsearch==8.17.1 email_validator==2.2.0 entrypoints==0.4 eralchemy2==1.4.1 -et-xmlfile==1.1.0 -eventlet==0.36.1 +et_xmlfile==2.0.0 +eventlet==0.39.0 execnet==2.1.1 -executing==2.0.1 -facebook_business==20.0.0 -fastavro==1.9.5 +executing==2.2.0 +facebook_business==21.0.5 +fastavro==1.10.0 fasteners==0.19 -fastjsonschema==2.20.0 -filelock==3.15.4 +fastjsonschema==2.21.1 +filelock==3.17.0 flower==2.0.1 -frozenlist==1.4.1 -fsspec==2023.12.2 +frozenlist==1.5.0 +fsspec==2025.2.0 future==1.0.0 -gcloud-aio-auth==4.2.3 +gcloud-aio-auth==5.3.2 gcloud-aio-bigquery==7.1.0 -gcloud-aio-storage==9.2.0 -gcsfs==2023.12.2.post1 +gcloud-aio-storage==9.3.0 +gcsfs==2025.2.0 geomet==0.2.1.post1 -gevent==24.2.1 -gitdb==4.0.11 -google-ads==24.1.0 -google-analytics-admin==0.22.8 -google-api-core==2.19.1 -google-api-python-client==2.137.0 +gevent==24.11.1 +gitdb==4.0.12 +google-ads==25.1.0 +google-analytics-admin==0.23.3 +google-api-core==2.24.1 +google-api-python-client==2.160.0 google-auth-httplib2==0.2.0 google-auth-oauthlib==1.2.1 -google-auth==2.32.0 -google-cloud-aiplatform==1.59.0 -google-cloud-appengine-logging==1.4.4 -google-cloud-audit-log==0.2.5 -google-cloud-automl==2.13.4 -google-cloud-batch==0.17.22 -google-cloud-bigquery-datatransfer==3.15.4 +google-auth==2.38.0 +google-cloud-aiplatform==1.79.0 +google-cloud-alloydb==0.4.1 +google-cloud-appengine-logging==1.5.0 +google-cloud-audit-log==0.3.0 +google-cloud-automl==2.15.0 +google-cloud-batch==0.17.33 +google-cloud-bigquery-datatransfer==3.18.0 google-cloud-bigquery==3.20.1 -google-cloud-bigtable==2.24.0 -google-cloud-build==3.24.1 -google-cloud-compute==1.19.1 -google-cloud-container==2.49.0 +google-cloud-bigtable==2.28.1 +google-cloud-build==3.29.0 +google-cloud-compute==1.24.0 +google-cloud-container==2.55.1 google-cloud-core==2.4.1 -google-cloud-datacatalog==3.19.1 -google-cloud-dataflow-client==0.8.11 -google-cloud-dataform==0.5.10 -google-cloud-dataplex==2.2.1 -google-cloud-dataproc-metastore==1.15.4 -google-cloud-dataproc==5.10.1 -google-cloud-dlp==3.18.1 -google-cloud-kms==2.24.1 -google-cloud-language==2.13.4 -google-cloud-logging==3.10.0 -google-cloud-memcache==1.9.4 -google-cloud-monitoring==2.22.1 -google-cloud-orchestration-airflow==1.13.0 -google-cloud-os-login==2.14.5 -google-cloud-pubsub==2.22.0 -google-cloud-redis==2.15.4 -google-cloud-resource-manager==1.12.4 -google-cloud-run==0.10.7 -google-cloud-secret-manager==2.20.1 -google-cloud-spanner==3.47.0 -google-cloud-speech==2.26.1 -google-cloud-storage-transfer==1.11.4 -google-cloud-storage==2.17.0 -google-cloud-tasks==2.16.4 -google-cloud-texttospeech==2.16.4 -google-cloud-translate==3.15.4 -google-cloud-videointelligence==2.13.4 -google-cloud-vision==3.7.3 -google-cloud-workflows==1.14.4 -google-crc32c==1.5.0 +google-cloud-datacatalog==3.24.1 +google-cloud-dataflow-client==0.8.15 +google-cloud-dataform==0.5.14 +google-cloud-dataplex==2.6.0 +google-cloud-dataproc-metastore==1.17.0 +google-cloud-dataproc==5.16.0 +google-cloud-dlp==3.26.0 +google-cloud-kms==3.2.2 +google-cloud-language==2.16.0 +google-cloud-logging==3.11.4 +google-cloud-memcache==1.11.0 +google-cloud-monitoring==2.26.0 +google-cloud-orchestration-airflow==1.16.1 +google-cloud-os-login==2.16.0 +google-cloud-pubsub==2.28.0 +google-cloud-redis==2.17.0 +google-cloud-resource-manager==1.14.0 +google-cloud-run==0.10.14 +google-cloud-secret-manager==2.22.1 +google-cloud-spanner==3.51.0 +google-cloud-speech==2.30.0 +google-cloud-storage-transfer==1.15.0 +google-cloud-storage==2.19.0 +google-cloud-tasks==2.18.0 +google-cloud-texttospeech==2.24.0 +google-cloud-translate==3.19.0 +google-cloud-videointelligence==2.15.0 +google-cloud-vision==3.9.0 +google-cloud-workflows==1.16.0 +google-crc32c==1.6.0 google-re2==1.1.20240702 -google-resumable-media==2.7.1 -googleapis-common-protos==1.63.2 -graphql-core==3.2.3 +google-resumable-media==2.7.2 +googleapis-common-protos==1.66.0 +graphql-core==3.2.6 graphviz==0.20.3 -greenlet==3.0.3 -grpc-google-iam-v1==0.13.1 +greenlet==3.1.1 +grpc-google-iam-v1==0.14.0 grpc-interceptor==0.15.4 grpcio-gcp==0.2.2 -grpcio-health-checking==1.62.2 -grpcio-status==1.62.2 -grpcio-tools==1.62.2 -grpcio==1.64.1 -gssapi==1.8.3 -gunicorn==22.0.0 +grpcio-health-checking==1.62.3 +grpcio-status==1.62.3 +grpcio-tools==1.62.3 +grpcio==1.70.0 +gssapi==1.9.0 +gunicorn==23.0.0 h11==0.14.0 -h2==4.1.0 -hatch==1.12.0 -hatchling==1.25.0 +h2==4.2.0 +hatch==1.14.0 +hatchling==1.27.0 hdfs==2.7.3 hmsclient==0.1.1 -hpack==4.0.0 -httpcore==1.0.5 +hpack==4.1.0 +httpcore==1.0.7 httplib2==0.22.0 httpx==0.27.0 -humanize==4.10.0 +humanize==4.11.0 hvac==2.3.0 -hyperframe==6.0.1 +hyperframe==6.1.0 hyperlink==21.0.0 +ibm-cloud-sdk-core==3.20.3 +ibmcloudant==0.9.1 icdiff==2.0.7 -identify==2.6.0 -idna==3.7 +id==1.5.0 +identify==2.6.6 +idna==3.10 ijson==3.3.0 imagesize==1.4.1 +immutabledict==4.2.1 importlib-metadata==6.11.0 -importlib_resources==6.4.0 -impyla==0.19.0 -incremental==22.10.0 +impyla==0.20.0 +incremental==24.7.2 inflection==0.5.1 -influxdb-client==1.44.0 +influxdb-client==1.48.0 iniconfig==2.0.0 ipdb==0.13.13 ipykernel==6.29.5 -ipython==8.26.0 -isodate==0.6.1 +ipython==8.32.0 +isodate==0.7.2 itsdangerous==2.2.0 jaraco.classes==3.4.0 -jaraco.context==5.3.0 -jaraco.functools==4.0.1 -jedi==0.19.1 +jaraco.context==6.0.1 +jaraco.functools==4.1.0 +jedi==0.19.2 jeepney==0.8.0 +jiter==0.8.2 jmespath==0.10.0 -joserfc==0.12.0 +joserfc==1.0.2 +jpype1==1.5.2 json-merge-patch==0.2 -jsondiff==2.1.1 +jsondiff==2.2.1 jsonpatch==1.33 -jsonpath-ng==1.6.1 -jsonpickle==3.2.2 +jsonpath-ng==1.7.0 +jsonpath-python==1.0.6 +jsonpickle==3.4.2 jsonpointer==3.0.0 -jsonschema-path==0.3.3 -jsonschema-specifications==2023.12.1 +jsonschema-path==0.3.4 +jsonschema-specifications==2024.10.1 jsonschema==4.23.0 -jupyter_client==8.6.2 +jupyter_client==8.6.3 jupyter_core==5.7.2 -keyring==25.2.1 -kombu==5.3.7 -krb5==0.5.1 -kubernetes==29.0.0 -kubernetes_asyncio==29.0.0 +keyring==25.6.0 +kombu==5.4.2 +krb5==0.7.0 +kubernetes==30.1.0 +kubernetes_asyncio==30.1.0 kylinpy==2.8.4 lazy-object-proxy==1.10.0 ldap3==2.9.1 -limits==3.13.0 +limits==4.0.1 linkify-it-py==2.0.3 lockfile==0.12.2 -loguru==0.7.2 -looker-sdk==24.10.0 -lxml==5.2.2 -lz4==4.3.3 +looker-sdk==25.0.0 +lxml==5.3.0 +lz4==4.4.3 markdown-it-py==3.0.0 marshmallow-oneofschema==3.1.1 marshmallow-sqlalchemy==0.28.2 -marshmallow==3.21.3 +marshmallow==3.26.0 matplotlib-inline==0.1.7 -mdit-py-plugins==0.4.1 +mdit-py-plugins==0.4.2 mdurl==0.1.2 mergedeep==1.3.4 methodtools==0.4.7 microsoft-kiota-abstractions==1.3.3 -microsoft-kiota-authentication-azure==1.0.0 -microsoft-kiota-http==1.3.2 -mmhash3==3.0.1 -mongomock==4.1.2 -more-itertools==10.3.0 +microsoft-kiota-authentication-azure==1.1.0 +microsoft-kiota-http==1.3.3 +microsoft-kiota-serialization-json==1.0.0 +microsoft-kiota-serialization-text==1.0.0 +mmh3==5.1.0 +mongomock==4.3.0 +more-itertools==10.6.0 moto==5.0.11 mpmath==1.3.0 msal-extensions==1.2.0 -msal==1.29.0 -msgraph-core==1.1.1 +msal==1.31.1 +msgraph-core==1.2.1 msrest==0.7.1 msrestazure==0.6.4.post1 multi_key_dict==2.0.3 -multidict==6.0.5 -mypy-boto3-appflow==1.34.0 -mypy-boto3-rds==1.34.135 -mypy-boto3-redshift-data==1.34.0 -mypy-boto3-s3==1.34.138 +multidict==6.1.0 +mypy-boto3-appflow==1.36.0 +mypy-boto3-rds==1.36.11 +mypy-boto3-redshift-data==1.36.0 +mypy-boto3-s3==1.36.9 mypy-extensions==1.0.0 mypy==1.9.0 -mysql-connector-python==9.0.0 -mysqlclient==2.2.4 -nbclient==0.10.0 +mysql-connector-python==9.2.0 +mysqlclient==2.2.7 +nbclient==0.10.2 nbformat==5.10.4 -neo4j==5.22.0 +neo4j==5.27.0 nest-asyncio==1.6.0 -networkx==3.3 -nh3==0.2.18 +networkx==3.4.2 +nh3==0.2.20 nodeenv==1.9.1 numpy==1.26.4 oauthlib==3.2.2 -objsize==0.7.0 -openai==1.35.13 -openapi-schema-validator==0.6.2 +objsize==0.7.1 +openai==1.61.0 +openapi-schema-validator==0.6.3 openapi-spec-validator==0.7.1 -openlineage-integration-common==1.18.0 -openlineage-python==1.18.0 -openlineage_sql==1.18.0 +openlineage-integration-common==1.27.0 +openlineage-python==1.27.0 +openlineage_sql==1.27.0 openpyxl==3.1.5 -opensearch-py==2.6.0 -opentelemetry-api==1.25.0 -opentelemetry-exporter-otlp-proto-common==1.25.0 -opentelemetry-exporter-otlp-proto-grpc==1.25.0 -opentelemetry-exporter-otlp-proto-http==1.25.0 -opentelemetry-exporter-otlp==1.25.0 -opentelemetry-exporter-prometheus==0.46b0 -opentelemetry-proto==1.25.0 -opentelemetry-sdk==1.25.0 -opentelemetry-semantic-conventions==0.46b0 +opensearch-py==2.8.0 +opentelemetry-api==1.27.0 +opentelemetry-exporter-otlp-proto-common==1.27.0 +opentelemetry-exporter-otlp-proto-grpc==1.27.0 +opentelemetry-exporter-otlp-proto-http==1.27.0 +opentelemetry-exporter-otlp==1.27.0 +opentelemetry-exporter-prometheus==0.48b0 +opentelemetry-proto==1.27.0 +opentelemetry-sdk==1.27.0 +opentelemetry-semantic-conventions==0.48b0 opsgenie-sdk==2.1.5 -oracledb==2.2.1 +oracledb==2.5.1 ordered-set==4.1.0 -orjson==3.10.6 -oss2==2.18.6 -packaging==24.1 -pandas-gbq==0.23.1 -pandas-stubs==2.2.2.240603 +orjson==3.10.15 +oss2==2.19.1 +packaging==24.2 +pandas-gbq==0.26.1 +pandas-stubs==2.2.3.241126 pandas==2.1.4 papermill==2.6.0 -paramiko==3.4.0 +paramiko==3.5.0 parso==0.8.4 -pathable==0.4.3 +pathable==0.4.4 pathspec==0.12.1 -pbr==6.0.0 -pdpyras==5.2.0 +pbr==6.1.0 +pdpyras==5.4.0 pendulum==3.0.0 pexpect==4.9.0 -pgvector==0.3.1 -pinecone-client==4.1.2 +pgvector==0.3.6 +pinecone-client==5.0.1 +pinecone-plugin-inference==1.1.0 pinecone-plugin-interface==0.0.7 -pinotdb==5.3.0 -pipdeptree==2.23.1 -pipx==1.6.0 -pkginfo==1.10.0 -platformdirs==4.2.2 +pinotdb==5.6.0 +pipdeptree==2.25.0 +platformdirs==4.3.6 pluggy==1.5.0 ply==3.11 plyvel==1.5.1 -portalocker==2.10.0 +portalocker==2.10.1 pprintpp==0.4.0 -pre-commit==3.7.1 +pre_commit==4.1.0 presto-python-client==0.8.4 prison==0.2.1 -prometheus_client==0.20.0 -prompt_toolkit==3.0.47 -proto-plus==1.24.0 -protobuf==4.25.3 -psutil==6.0.0 -psycopg2-binary==2.9.9 +prometheus_client==0.21.1 +prompt_toolkit==3.0.50 +propcache==0.2.1 +proto-plus==1.26.0 +protobuf==4.25.6 +psutil==6.1.1 +psycopg2-binary==2.9.10 ptyprocess==0.7.0 -pure-eval==0.2.2 pure-sasl==0.6.2 +pure_eval==0.2.3 py-partiql-parser==0.5.5 py4j==0.10.9.7 -pyOpenSSL==24.1.0 +pyOpenSSL==24.3.0 pyarrow-hotfix==0.6 pyarrow==16.1.0 -pyasn1-modules==0.3.0 -pyasn1==0.5.1 +pyasn1==0.6.1 +pyasn1_modules==0.4.0 pycountry==24.6.1 pycparser==2.22 -pycryptodome==3.20.0 -pydantic==2.8.2 -pydantic_core==2.20.1 -pydata-google-auth==1.8.2 +pycryptodome==3.21.0 +pydantic==2.10.6 +pydantic_core==2.27.2 +pydata-google-auth==1.9.1 pydot==1.4.2 pydruid==0.6.9 pyenchant==3.2.2 -pyexasol==0.26.0 -pygraphviz==1.13 -pyiceberg==0.6.1 +pyexasol==0.27.0 +pygraphviz==1.14 +pyiceberg==0.8.1 pyjsparser==2.7.1 pykerberos==1.2.4 -pymongo==4.8.0 -pymssql==2.3.0 -pyodbc==5.1.0 -pyparsing==3.1.2 +pymongo==4.11 +pymssql==2.3.2 +pyodbc==5.2.0 +pyparsing==3.2.1 pypsrp==0.8.1 -pyspark==3.5.1 -pyspnego==0.11.0 -pytest-asyncio==0.23.7 -pytest-cov==5.0.0 +pyspark==3.5.4 +pyspnego==0.11.2 +pytest-asyncio==0.25.3 +pytest-cov==6.0.0 pytest-custom-exit-code==0.3.0 pytest-icdiff==0.9 pytest-instafail==0.5.0 pytest-mock==3.14.0 -pytest-rerunfailures==14.0 +pytest-rerunfailures==15.0 pytest-timeouts==1.2.1 pytest-xdist==3.6.1 -pytest==7.4.4 -python-arango==8.0.0 -python-daemon==3.0.1 +pytest==8.3.4 +python-arango==8.1.4 +python-daemon==3.1.2 python-dateutil==2.9.0.post0 python-dotenv==1.0.1 python-http-client==3.3.7 @@ -586,157 +601,159 @@ python-jenkins==1.8.2 python-ldap==3.4.4 python-nvd3==0.16.0 python-slugify==8.0.4 -python-telegram-bot==21.3 +python-telegram-bot==21.10 python3-saml==1.16.0 -pytz==2024.1 -pywinrm==0.4.3 -pyzmq==26.0.3 -qdrant-client==1.10.1 +pytz==2025.1 +pywinrm==0.5.0 +pyzmq==26.2.1 +qdrant-client==1.13.2 reactivex==4.0.4 -readme_renderer==43.0 -redis==5.0.7 -redshift-connector==2.1.2 -referencing==0.35.1 -regex==2024.5.15 +readme_renderer==44.0 +redis==5.2.1 +redshift-connector==2.1.5 +referencing==0.36.2 +regex==2024.11.6 requests-file==2.1.0 requests-kerberos==0.15.0 requests-mock==1.12.1 requests-oauthlib==1.3.1 requests-toolbelt==1.0.0 -requests==2.31.0 +requests==2.32.3 requests_ntlm==1.3.0 -responses==0.25.3 +responses==0.25.6 restructuredtext_lint==1.4.0 rfc3339-validator==0.1.4 rfc3986==2.0.0 -rich-argparse==1.5.2 -rich-click==1.8.3 -rich==13.7.1 -rpds-py==0.19.0 +rich-argparse==1.6.0 +rich-click==1.8.5 +rich==13.9.4 +rpds-py==0.22.3 rsa==4.9 -ruff==0.3.3 -s3fs==2023.12.2 -s3transfer==0.10.2 +ruff==0.5.5 +s3fs==2025.2.0 +s3transfer==0.11.2 scramp==1.4.5 scrapbook==0.5.0 -semver==3.0.2 +semver==3.0.4 sendgrid==6.11.0 sentinels==1.0.0 -sentry-sdk==2.9.0 -setproctitle==1.3.3 -shapely==2.0.4 +sentry-sdk==2.20.0 +setproctitle==1.3.4 +shapely==2.0.7 shellingham==1.5.4 simple-salesforce==1.12.6 -six==1.16.0 -slack_sdk==3.31.0 -smbprotocol==1.13.0 -smmap==5.0.1 +six==1.17.0 +slack_sdk==3.34.0 +smbprotocol==1.15.0 +smmap==5.0.2 sniffio==1.3.1 snowballstemmer==2.2.0 -snowflake-connector-python==3.11.0 -snowflake-sqlalchemy==1.6.1 +snowflake-connector-python==3.13.2 +snowflake-snowpark-python==1.26.0 +snowflake-sqlalchemy==1.7.3 sortedcontainers==2.4.0 -soupsieve==2.5 -sphinx-airflow-theme==0.0.12 -sphinx-argparse==0.4.0 -sphinx-autoapi==2.1.1 +soupsieve==2.6 +sphinx-airflow-theme==0.2.1 +sphinx-argparse==0.5.2 +sphinx-autoapi==3.4.0 sphinx-copybutton==0.5.2 sphinx-jinja==2.0.2 -sphinx-rtd-theme==2.0.0 -sphinx_design==0.6.0 -sphinxcontrib-applehelp==1.0.8 -sphinxcontrib-devhelp==1.0.6 -sphinxcontrib-htmlhelp==2.0.5 +sphinx-rtd-theme==3.0.2 +sphinx_design==0.6.1 +sphinxcontrib-applehelp==2.0.0 +sphinxcontrib-devhelp==2.0.0 +sphinxcontrib-htmlhelp==2.1.0 sphinxcontrib-httpdomain==1.8.1 sphinxcontrib-jquery==4.1 sphinxcontrib-jsmath==1.0.1 -sphinxcontrib-qthelp==1.0.7 +sphinxcontrib-qthelp==2.0.0 sphinxcontrib-redoc==1.6.0 -sphinxcontrib-serializinghtml==1.1.5 -sphinxcontrib-spelling==8.0.0 -spython==0.3.13 -sqlalchemy-bigquery==1.11.0 +sphinxcontrib-serializinghtml==2.0.0 +sphinxcontrib-spelling==8.0.1 +spython==0.3.14 +sqlalchemy-bigquery==1.12.1 sqlalchemy-redshift==0.8.14 -sqlalchemy-spanner==1.7.0 +sqlalchemy-spanner==1.8.0 sqlalchemy_drill==1.1.5 -sqlparse==0.5.0 +sqlparse==0.5.3 sshtunnel==0.4.0 stack-data==0.6.3 starkbank-ecdsa==2.2.0 statsd==4.0.1 -std-uritemplate==1.0.3 +std-uritemplate==2.0.1 strictyaml==1.7.3 -sympy==1.13.0 -tableauserverclient==0.31 +sympy==1.13.3 +tableauserverclient==0.36 tabulate==0.9.0 -tenacity==8.5.0 -teradatasql==20.0.0.13 -teradatasqlalchemy==20.0.0.1 -termcolor==2.4.0 +tenacity==9.0.0 +teradatasql==20.0.0.23 +teradatasqlalchemy==20.0.0.3 +termcolor==2.5.0 text-unidecode==1.3 thrift-sasl==0.4.3 thrift==0.16.0 -time-machine==2.14.2 -tomli_w==1.0.0 -tomlkit==0.13.0 -tornado==6.4.1 -towncrier==23.11.0 -tqdm==4.66.4 +time-machine==2.16.0 +tomli_w==1.2.0 +tomlkit==0.13.2 +tornado==6.4.2 +towncrier==24.8.0 +tqdm==4.67.1 traitlets==5.14.3 -trino==0.329.0 -trove-classifiers==2024.7.2 -twine==5.1.1 -typed-ast==1.5.5 -types-Deprecated==1.2.9.20240311 -types-Markdown==3.6.0.20240316 -types-PyMySQL==1.1.0.20240524 -types-PyYAML==6.0.12.20240311 -types-aiofiles==24.1.0.20240626 +trino==0.332.0 +trove-classifiers==2025.1.15.22 +twine==6.1.0 +types-Deprecated==1.2.15.20241117 +types-Markdown==3.7.0.20241204 +types-PyMySQL==1.1.0.20241103 +types-PyYAML==6.0.12.20241230 +types-aiofiles==24.1.0.20241221 types-certifi==2021.10.8.3 -types-cffi==1.16.0.20240331 -types-croniter==2.0.0.20240423 -types-docutils==0.21.0.20240711 -types-paramiko==3.4.0.20240423 -types-protobuf==5.27.0.20240626 -types-pyOpenSSL==24.1.0.20240425 -types-python-dateutil==2.9.0.20240316 +types-cffi==1.16.0.20241221 +types-croniter==5.0.1.20241205 +types-docutils==0.21.0.20241128 +types-paramiko==3.5.0.20240928 +types-protobuf==5.29.1.20241207 +types-pyOpenSSL==24.1.0.20240722 +types-python-dateutil==2.9.0.20241206 types-python-slugify==8.0.2.20240310 -types-pytz==2024.1.0.20240417 -types-redis==4.6.0.20240425 -types-requests==2.32.0.20240712 -types-setuptools==70.3.0.20240710 -types-tabulate==0.9.0.20240106 +types-pytz==2024.2.0.20241221 +types-redis==4.6.0.20241004 +types-requests==2.32.0.20241016 +types-setuptools==75.8.0.20250110 +types-tabulate==0.9.0.20241207 types-termcolor==1.1.6.2 types-toml==0.10.8.20240310 +typing-inspect==0.9.0 typing_extensions==4.12.2 -tzdata==2024.1 +tzdata==2025.1 tzlocal==5.2 uc-micro-py==1.0.3 -unicodecsv==0.14.1 -universal_pathlib==0.2.2 +universal_pathlib==0.2.6 uritemplate==4.1.1 -urllib3==2.0.7 +urllib3==2.3.0 userpath==1.9.2 -uv==0.2.22 -validators==0.28.3 -vertica-python==1.3.8 +uv==0.5.24 +validators==0.34.0 +vertica-python==1.4.0 vine==5.1.0 -virtualenv==20.26.3 -watchtower==3.2.0 +virtualenv==20.29.1 +watchtower==3.3.1 wcwidth==0.2.13 -weaviate-client==4.6.5 +weaviate-client==4.9.6 websocket-client==1.8.0 -wirerope==0.4.7 -wrapt==1.16.0 -xmlsec==1.3.13 -xmltodict==0.13.0 +wirerope==1.0.0 +wrapt==1.17.2 +xmlsec==1.3.14 +xmltodict==0.14.2 yamllint==1.35.1 yandex-query-client==0.1.4 -yandexcloud==0.291.0 -yarl==1.9.4 -zeep==4.2.1 -zenpy==2.0.49 -zipp==3.19.2 +yandexcloud==0.328.0 +yarl==1.18.3 +ydb-dbapi==0.1.7 +ydb==3.18.15 +zeep==4.3.1 +zenpy==2.0.56 +zipp==3.21.0 zope.event==5.0 -zope.interface==6.4.post2 -zstandard==0.22.0 +zope.interface==7.2 +zstandard==0.23.0 diff --git a/docker-compose.yml b/docker-compose.yml index f57d923aa..cf78b7c9d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,5 +1,5 @@ # Adapted from official docker-compose.yaml -# https://airflow.apache.org/docs/apache-airflow/2.9.3/docker-compose.yaml +# https://airflow.apache.org/docs/apache-airflow/2.10.5/docker-compose.yaml # # WARNING: This configuration is for local development. Do not use it in a production deployment. # diff --git a/requirements-dev.in b/requirements-dev.in index 8e368101f..8f35861da 100644 --- a/requirements-dev.in +++ b/requirements-dev.in @@ -6,6 +6,6 @@ pip-tools==7.4.1 # Code quality -pytest==7.4.4 +pytest==8.3.4 pytest-mock==3.14.0 -ruff==0.3.3 +ruff==0.5.5 diff --git a/requirements-dev.txt b/requirements-dev.txt index f5ed080cc..b321fff07 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -5,15 +5,15 @@ # pip-compile --no-annotate --strip-extras requirements-dev.in # build==1.1.1 -click==8.1.7 +click==8.1.8 iniconfig==2.0.0 -packaging==24.1 +packaging==24.2 pip-tools==7.4.1 pluggy==1.5.0 pyproject-hooks==1.0.0 -pytest==7.4.4 +pytest==8.3.4 pytest-mock==3.14.0 -ruff==0.3.3 +ruff==0.5.5 wheel==0.43.0 # The following packages are considered to be unsafe in a requirements file: diff --git a/requirements-override.txt b/requirements-override.txt index 9250a4dbc..e69de29bb 100644 --- a/requirements-override.txt +++ b/requirements-override.txt @@ -1 +0,0 @@ -apache-airflow-providers-cncf-kubernetes==8.3.4 \ No newline at end of file diff --git a/requirements.in b/requirements.in index f1d59b66b..c9fd174b1 100644 --- a/requirements.in +++ b/requirements.in @@ -1,10 +1,10 @@ # Official Airflow constraints file # Doc: https://airflow.apache.org/docs/apache-airflow/stable/installation/installing-from-pypi.html#constraints-files -# File: https://raw.githubusercontent.com/apache/airflow/constraints-2.9.3/constraints-3.11.txt +# File: https://raw.githubusercontent.com/apache/airflow/constraints-2.10.5/constraints-3.11.txt --constraint ./constraints.txt # Airflow dependencies -apache-airflow[async,google-auth,password,statsd]==2.9.3 +apache-airflow[async,google-auth,password,statsd]==2.10.5 apache-airflow-providers-amazon apache-airflow-providers-celery apache-airflow-providers-cncf-kubernetes diff --git a/requirements.txt b/requirements.txt index 944a7ecca..d918171f7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,58 +7,62 @@ acryl-datahub==0.13.2.4 acryl-datahub-airflow-plugin==0.13.2.4 aiofiles==23.2.1 -aiohttp==3.9.5 -aiosignal==1.3.1 +aiohappyeyeballs==2.4.4 +aiohttp==3.11.11 +aiosignal==1.3.2 +aiosqlite==0.20.0 airflow-provider-fivetran-async==2.0.2 -alembic==1.13.2 -amqp==5.2.0 +alembic==1.14.1 +amqp==5.3.1 annotated-types==0.7.0 -anyio==4.4.0 -apache-airflow==2.9.3 -apache-airflow-providers-amazon==8.25.0 -apache-airflow-providers-atlassian-jira==2.6.1 -apache-airflow-providers-celery==3.7.2 -apache-airflow-providers-cncf-kubernetes==8.3.3 -apache-airflow-providers-common-io==1.3.2 -apache-airflow-providers-common-sql==1.14.2 -apache-airflow-providers-dbt-cloud==3.9.0 -apache-airflow-providers-fab==1.2.2 -apache-airflow-providers-ftp==3.10.0 -apache-airflow-providers-google==10.21.0 -apache-airflow-providers-http==4.12.0 -apache-airflow-providers-imap==3.6.1 -apache-airflow-providers-postgres==5.11.2 -apache-airflow-providers-redis==3.7.1 -apache-airflow-providers-slack==8.7.1 -apache-airflow-providers-smtp==1.7.1 -apache-airflow-providers-sqlite==3.8.1 -apispec==6.6.1 -argcomplete==3.4.0 +anyio==4.8.0 +apache-airflow==2.10.5 +apache-airflow-providers-amazon==9.2.0 +apache-airflow-providers-atlassian-jira==3.0.0 +apache-airflow-providers-celery==3.10.0 +apache-airflow-providers-cncf-kubernetes==10.1.0 +apache-airflow-providers-common-compat==1.3.0 +apache-airflow-providers-common-io==1.5.0 +apache-airflow-providers-common-sql==1.21.0 +apache-airflow-providers-dbt-cloud==4.0.0 +apache-airflow-providers-fab==1.5.2 +apache-airflow-providers-ftp==3.12.0 +apache-airflow-providers-google==12.0.0 +apache-airflow-providers-http==5.0.0 +apache-airflow-providers-imap==3.8.0 +apache-airflow-providers-postgres==6.0.0 +apache-airflow-providers-redis==4.0.0 +apache-airflow-providers-slack==9.0.0 +apache-airflow-providers-smtp==1.9.0 +apache-airflow-providers-sqlite==4.0.0 +apispec==6.8.1 +argcomplete==3.5.3 asgiref==3.8.1 asn1crypto==1.5.1 -atlassian-python-api==3.41.14 -attrs==23.2.0 +asyncpg==0.30.0 +atlassian-python-api==3.41.19 +attrs==25.1.0 authlib==1.3.1 avro==1.11.3 avro-gen3==0.7.12 -babel==2.15.0 +babel==2.17.0 backoff==2.2.1 -bcrypt==4.1.3 -beautifulsoup4==4.12.3 -billiard==4.2.0 -blinker==1.8.2 -boto3==1.34.131 -botocore==1.34.131 +bcrypt==4.2.1 +beautifulsoup4==4.13.0 +billiard==4.2.1 +blinker==1.9.0 +boto3==1.36.3 +botocore==1.36.3 cached-property==1.5.2 cachelib==0.9.0 -cachetools==5.3.3 -cattrs==23.2.3 +cachetools==5.5.1 +cattrs==24.1.2 celery==5.4.0 -certifi==2024.7.4 -cffi==1.16.0 +certifi==2025.1.31 +cffi==1.17.1 chardet==5.2.0 -charset-normalizer==3.3.2 -click==8.1.7 +charset-normalizer==3.4.1 +click==8.1.8 click-default-group==1.2.4 click-didyoumean==0.3.1 click-plugins==1.1.1 @@ -66,248 +70,250 @@ click-repl==0.3.0 click-spinner==0.1.10 clickclick==20.10.2 colorama==0.4.6 -colorlog==4.8.0 +colorlog==6.9.0 configupdater==3.2 connexion==2.14.2 -cron-descriptor==1.4.3 -croniter==2.0.5 -cryptography==41.0.7 -db-dtypes==1.2.0 +cron-descriptor==1.4.5 +croniter==6.0.0 +cryptography==42.0.8 +db-dtypes==1.4.0 decorator==5.1.1 -deprecated==1.2.14 +deprecated==1.2.18 dill==0.3.1.1 -dnspython==2.6.1 +dnspython==2.7.0 docker==7.1.0 docstring-parser==0.16 -docutils==0.16 email-validator==2.2.0 -eventlet==0.36.1 +eventlet==0.39.0 expandvars==0.12.0 flask==2.2.5 -flask-appbuilder==4.5.0 +flask-appbuilder==4.5.2 flask-babel==2.0.0 flask-bcrypt==1.0.1 flask-caching==2.3.0 -flask-jwt-extended==4.6.0 -flask-limiter==3.7.0 +flask-jwt-extended==4.7.1 +flask-limiter==3.10.1 flask-login==0.6.3 flask-session==0.5.0 flask-sqlalchemy==2.5.1 -flask-wtf==1.2.1 +flask-wtf==1.2.2 flower==2.0.1 -frozenlist==1.4.1 -fsspec==2023.12.2 -gcloud-aio-auth==4.2.3 +frozenlist==1.5.0 +fsspec==2025.2.0 +gcloud-aio-auth==5.3.2 gcloud-aio-bigquery==7.1.0 -gcloud-aio-storage==9.2.0 -gcsfs==2023.12.2.post1 -gevent==24.2.1 -google-ads==24.1.0 -google-analytics-admin==0.22.8 -google-api-core==2.19.1 -google-api-python-client==2.137.0 -google-auth==2.32.0 +gcloud-aio-storage==9.3.0 +gcsfs==2025.2.0 +gevent==24.11.1 +google-ads==25.1.0 +google-analytics-admin==0.23.3 +google-api-core==2.24.1 +google-api-python-client==2.160.0 +google-auth==2.38.0 google-auth-httplib2==0.2.0 google-auth-oauthlib==1.2.1 -google-cloud-aiplatform==1.59.0 -google-cloud-appengine-logging==1.4.4 -google-cloud-audit-log==0.2.5 -google-cloud-automl==2.13.4 -google-cloud-batch==0.17.22 +google-cloud-aiplatform==1.79.0 +google-cloud-alloydb==0.4.1 +google-cloud-appengine-logging==1.5.0 +google-cloud-audit-log==0.3.0 +google-cloud-automl==2.15.0 +google-cloud-batch==0.17.33 google-cloud-bigquery==3.20.1 -google-cloud-bigquery-datatransfer==3.15.4 -google-cloud-bigtable==2.24.0 -google-cloud-build==3.24.1 -google-cloud-compute==1.19.1 -google-cloud-container==2.49.0 +google-cloud-bigquery-datatransfer==3.18.0 +google-cloud-bigtable==2.28.1 +google-cloud-build==3.29.0 +google-cloud-compute==1.24.0 +google-cloud-container==2.55.1 google-cloud-core==2.4.1 -google-cloud-datacatalog==3.19.1 -google-cloud-dataflow-client==0.8.11 -google-cloud-dataform==0.5.10 -google-cloud-dataplex==2.2.1 -google-cloud-dataproc==5.10.1 -google-cloud-dataproc-metastore==1.15.4 -google-cloud-dlp==3.18.1 -google-cloud-kms==2.24.1 -google-cloud-language==2.13.4 -google-cloud-logging==3.10.0 -google-cloud-memcache==1.9.4 -google-cloud-monitoring==2.22.1 -google-cloud-orchestration-airflow==1.13.0 -google-cloud-os-login==2.14.5 -google-cloud-pubsub==2.22.0 -google-cloud-redis==2.15.4 -google-cloud-resource-manager==1.12.4 -google-cloud-run==0.10.7 -google-cloud-secret-manager==2.20.1 -google-cloud-spanner==3.47.0 -google-cloud-speech==2.26.1 -google-cloud-storage==2.17.0 -google-cloud-storage-transfer==1.11.4 -google-cloud-tasks==2.16.4 -google-cloud-texttospeech==2.16.4 -google-cloud-translate==3.15.4 -google-cloud-videointelligence==2.13.4 -google-cloud-vision==3.7.3 -google-cloud-workflows==1.14.4 -google-crc32c==1.5.0 +google-cloud-datacatalog==3.24.1 +google-cloud-dataflow-client==0.8.15 +google-cloud-dataform==0.5.14 +google-cloud-dataplex==2.6.0 +google-cloud-dataproc==5.16.0 +google-cloud-dataproc-metastore==1.17.0 +google-cloud-dlp==3.26.0 +google-cloud-kms==3.2.2 +google-cloud-language==2.16.0 +google-cloud-logging==3.11.4 +google-cloud-memcache==1.11.0 +google-cloud-monitoring==2.26.0 +google-cloud-orchestration-airflow==1.16.1 +google-cloud-os-login==2.16.0 +google-cloud-pubsub==2.28.0 +google-cloud-redis==2.17.0 +google-cloud-resource-manager==1.14.0 +google-cloud-run==0.10.14 +google-cloud-secret-manager==2.22.1 +google-cloud-spanner==3.51.0 +google-cloud-speech==2.30.0 +google-cloud-storage==2.19.0 +google-cloud-storage-transfer==1.15.0 +google-cloud-tasks==2.18.0 +google-cloud-texttospeech==2.24.0 +google-cloud-translate==3.19.0 +google-cloud-videointelligence==2.15.0 +google-cloud-vision==3.9.0 +google-cloud-workflows==1.16.0 +google-crc32c==1.6.0 google-re2==1.1.20240702 -google-resumable-media==2.7.1 -googleapis-common-protos==1.63.2 +google-resumable-media==2.7.2 +googleapis-common-protos==1.66.0 gql==3.5.0 -graphql-core==3.2.3 -greenlet==3.0.3 -grpc-google-iam-v1==0.13.1 +graphql-core==3.2.6 +greenlet==3.1.1 +grpc-google-iam-v1==0.14.0 grpc-interceptor==0.15.4 -grpcio==1.64.1 +grpcio==1.70.0 grpcio-gcp==0.2.2 -grpcio-status==1.62.2 -gunicorn==22.0.0 +grpcio-status==1.62.3 +gunicorn==23.0.0 h11==0.14.0 -httpcore==1.0.5 +httpcore==1.0.7 httplib2==0.22.0 httpx==0.27.0 humanfriendly==10.0 -humanize==4.10.0 -idna==3.7 +humanize==4.11.0 +idna==3.10 ijson==3.3.0 +immutabledict==4.2.1 importlib-metadata==6.11.0 -importlib-resources==6.4.0 inflection==0.5.1 +isodate==0.7.2 itsdangerous==2.2.0 -jinja2==3.1.4 +jinja2==3.1.5 jmespath==0.10.0 json-merge-patch==0.2 -jsonpath-ng==1.6.1 +jsonpath-ng==1.7.0 jsonref==1.1.0 jsonschema==4.23.0 -jsonschema-specifications==2023.12.1 -kombu==5.3.7 -kubernetes==29.0.0 -kubernetes-asyncio==29.0.0 +jsonschema-specifications==2024.10.1 +kombu==5.4.2 +kubernetes==30.1.0 +kubernetes-asyncio==30.1.0 lazy-object-proxy==1.10.0 -limits==3.13.0 +limits==4.0.1 linkify-it-py==2.0.3 lockfile==0.12.2 -looker-sdk==24.10.0 -lxml==5.2.2 -mako==1.3.5 +looker-sdk==25.0.0 +lxml==5.3.0 +mako==1.3.8 markdown-it-py==3.0.0 -markupsafe==2.1.5 -marshmallow==3.21.3 +markupsafe==3.0.2 +marshmallow==3.26.0 marshmallow-oneofschema==3.1.1 marshmallow-sqlalchemy==0.28.2 -mdit-py-plugins==0.4.1 +mdit-py-plugins==0.4.2 mdurl==0.1.2 methodtools==0.4.7 mixpanel==4.10.1 -more-itertools==10.3.0 -multidict==6.0.5 +more-itertools==10.6.0 +multidict==6.1.0 mypy-extensions==1.0.0 numpy==1.26.4 oauthlib==3.2.2 -opentelemetry-api==1.25.0 -opentelemetry-exporter-otlp==1.25.0 -opentelemetry-exporter-otlp-proto-common==1.25.0 -opentelemetry-exporter-otlp-proto-grpc==1.25.0 -opentelemetry-exporter-otlp-proto-http==1.25.0 -opentelemetry-proto==1.25.0 -opentelemetry-sdk==1.25.0 -opentelemetry-semantic-conventions==0.46b0 +opentelemetry-api==1.27.0 +opentelemetry-exporter-otlp==1.27.0 +opentelemetry-exporter-otlp-proto-common==1.27.0 +opentelemetry-exporter-otlp-proto-grpc==1.27.0 +opentelemetry-exporter-otlp-proto-http==1.27.0 +opentelemetry-proto==1.27.0 +opentelemetry-sdk==1.27.0 +opentelemetry-semantic-conventions==0.48b0 ordered-set==4.1.0 -packaging==24.1 +packaging==24.2 pandas==2.1.4 -pandas-gbq==0.23.1 +pandas-gbq==0.26.1 pathspec==0.12.1 pendulum==3.0.0 pluggy==1.5.0 ply==3.11 prison==0.2.1 progressbar2==4.4.2 -prometheus-client==0.20.0 -prompt-toolkit==3.0.47 -proto-plus==1.24.0 -protobuf==4.25.3 -psutil==6.0.0 -psycopg2-binary==2.9.9 +prometheus-client==0.21.1 +prompt-toolkit==3.0.50 +propcache==0.2.1 +proto-plus==1.26.0 +protobuf==4.25.6 +psutil==6.1.1 +psycopg2-binary==2.9.10 pyarrow==16.1.0 -pyasn1==0.5.1 -pyasn1-modules==0.3.0 -pyathena==3.8.3 +pyasn1==0.6.1 +pyasn1-modules==0.4.0 +pyathena==3.12.2 pycparser==2.22 -pydantic==2.8.2 -pydantic-core==2.20.1 -pydata-google-auth==1.8.2 -pygments==2.18.0 -pyjwt==2.8.0 -pyopenssl==24.1.0 -pyparsing==3.1.2 -python-daemon==3.0.1 +pydantic==2.10.6 +pydantic-core==2.27.2 +pydata-google-auth==1.9.1 +pygments==2.19.1 +pyjwt==2.10.1 +pyopenssl==24.3.0 +pyparsing==3.2.1 +python-daemon==3.1.2 python-dateutil==2.9.0.post0 python-nvd3==0.16.0 python-slugify==8.0.4 python-utils==3.8.2 -pytz==2024.1 -pyyaml==6.0.1 -redis==5.0.7 -redshift-connector==2.1.2 -referencing==0.35.1 -requests==2.31.0 +python3-saml==1.16.0 +pytz==2025.1 +pyyaml==6.0.2 +redis==5.2.1 +redshift-connector==2.1.5 +referencing==0.36.2 +requests==2.32.3 requests-file==2.1.0 requests-oauthlib==1.3.1 requests-toolbelt==1.0.0 rfc3339-validator==0.1.4 -rich==13.7.1 -rich-argparse==1.5.2 -rpds-py==0.19.0 +rich==13.9.4 +rich-argparse==1.6.0 +rpds-py==0.22.3 rsa==4.9 ruamel-yaml==0.18.6 ruamel-yaml-clib==0.2.8 -s3transfer==0.10.2 +s3transfer==0.11.2 scramp==1.4.5 -sentry-sdk==2.9.0 -setproctitle==1.3.3 -shapely==2.0.4 -six==1.16.0 -slack-sdk==3.31.0 +sentry-sdk==2.20.0 +setproctitle==1.3.4 +shapely==2.0.7 +six==1.17.0 +slack-sdk==3.34.0 sniffio==1.3.1 -soupsieve==2.5 -sqlalchemy==1.4.52 -sqlalchemy-bigquery==1.11.0 +soupsieve==2.6 +sqlalchemy==1.4.54 +sqlalchemy-bigquery==1.12.1 sqlalchemy-jsonfield==1.0.2 -sqlalchemy-redshift==0.8.14 -sqlalchemy-spanner==1.7.0 +sqlalchemy-spanner==1.8.0 sqlalchemy-utils==0.41.2 -sqlparse==0.5.0 +sqlparse==0.5.3 statsd==4.0.1 tabulate==0.9.0 -tenacity==8.5.0 -termcolor==2.4.0 +tenacity==9.0.0 +termcolor==2.5.0 text-unidecode==1.3 -time-machine==2.14.2 +time-machine==2.16.0 toml==0.10.2 -tornado==6.4.1 +tornado==6.4.2 typing-extensions==4.12.2 typing-inspect==0.9.0 -tzdata==2024.1 +tzdata==2025.1 uc-micro-py==1.0.3 -unicodecsv==0.14.1 -universal-pathlib==0.2.2 +universal-pathlib==0.2.6 uritemplate==4.1.1 -urllib3==2.0.7 +urllib3==2.3.0 vine==5.1.0 -watchtower==3.2.0 +watchtower==3.3.1 wcwidth==0.2.13 websocket-client==1.8.0 werkzeug==2.2.3 -wirerope==0.4.7 -wrapt==1.16.0 -wtforms==3.1.2 -xmltodict==0.13.0 -yarl==1.9.4 -zipp==3.19.2 +wirerope==1.0.0 +wrapt==1.17.2 +wtforms==3.2.1 +xmlsec==1.3.14 +xmltodict==0.14.2 +yarl==1.18.3 +zipp==3.21.0 zope-event==5.0 -zope-interface==6.4.post2 +zope-interface==7.2 # The following packages are considered to be unsafe in a requirements file: # setuptools From 837ab2f5b5ad86bb26dda65e2db4e3a6ae25d92e Mon Sep 17 00:00:00 2001 From: Sean Rose Date: Tue, 25 Mar 2025 21:42:02 -0700 Subject: [PATCH 2/8] fix: Replace use of removed `SimpleHttpOperator` with `HttpOperator`. --- dags/probe_scraper.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dags/probe_scraper.py b/dags/probe_scraper.py index 3e86b3a87..f58ea4406 100644 --- a/dags/probe_scraper.py +++ b/dags/probe_scraper.py @@ -9,7 +9,7 @@ from airflow.operators.python import PythonOperator from airflow.operators.trigger_dagrun import TriggerDagRunOperator from airflow.providers.cncf.kubernetes.secret import Secret -from airflow.providers.http.operators.http import SimpleHttpOperator +from airflow.providers.http.operators.http import HttpOperator from airflow.sensors.external_task import ExternalTaskSensor from airflow.utils.weekday import WeekDay from kubernetes.client import models as k8s @@ -384,7 +384,7 @@ def choose_branch(self, context): # build of the glean dictionary. We do this after the schema generator has # finished running as the dictionary uses the new schema files as part of # said build. - glean_dictionary_netlify_build = SimpleHttpOperator( + glean_dictionary_netlify_build = HttpOperator( http_conn_id="http_netlify_build_webhook", endpoint=Variable.get("glean_dictionary_netlify_build_webhook_id"), method="POST", From b0bc472f071648c10fb4f2b5dc039f0d41647f89 Mon Sep 17 00:00:00 2001 From: Sean Rose Date: Tue, 25 Mar 2025 21:45:39 -0700 Subject: [PATCH 3/8] fix: Upgrade `apache-airflow-providers-google` to v14.0.0 to fix a bug present in v12.0.0. The bug was Dataproc operators failing to import without OpenLineage, fixed in apache/airflow#46561. --- requirements-override.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements-override.txt b/requirements-override.txt index e69de29bb..a8a2b4db1 100644 --- a/requirements-override.txt +++ b/requirements-override.txt @@ -0,0 +1 @@ +apache-airflow-providers-google==14.0.0 From 78507a02fb98632675d76edd0d73d3e06f2d64dd Mon Sep 17 00:00:00 2001 From: Sean Rose Date: Tue, 25 Mar 2025 21:52:13 -0700 Subject: [PATCH 4/8] Fix: Replace uses of removed Dataproc job operators with `DataprocSubmitJobOperator`. --- utils/dataproc.py | 66 +++++++++++++++++++++++++++++++++-------------- utils/gcp.py | 30 +++++++++++++++------ 2 files changed, 68 insertions(+), 28 deletions(-) diff --git a/utils/dataproc.py b/utils/dataproc.py index 7ed8a7169..3e588e61d 100644 --- a/utils/dataproc.py +++ b/utils/dataproc.py @@ -5,6 +5,7 @@ from airflow.exceptions import AirflowException from airflow.operators.bash import BashOperator from airflow.providers.amazon.aws.hooks.base_aws import AwsBaseHook +from airflow.providers.google.cloud.hooks.dataproc import DataProcJobBuilder # When google deprecates dataproc_v1beta2 in DataprocHook/Operator classes # We can import these from our patched code, rather than upgrading/deploying @@ -14,8 +15,7 @@ ClusterGenerator, DataprocCreateClusterOperator, DataprocDeleteClusterOperator, - DataprocSubmitPySparkJobOperator, - DataprocSubmitSparkJobOperator, + DataprocSubmitJobOperator, ) @@ -251,7 +251,7 @@ def moz_dataproc_pyspark_runner( """ Create a GCP Dataproc cluster with Anaconda/Jupyter/Component gateway. - Then we call DataprocSubmitPySparkJobOperator to execute the pyspark script defined by the argument + Then we call DataprocSubmitJobOperator to execute the pyspark script defined by the argument python_driver_code. Once that succeeds, we teardown the cluster. **Example**: :: @@ -396,13 +396,21 @@ def moz_dataproc_pyspark_runner( with models.DAG(_dag_name, default_args=default_args) as dag: create_dataproc_cluster = dataproc_helper.create_cluster() - run_pyspark_on_dataproc = DataprocSubmitPySparkJobOperator( + dataproc_job_builder = DataProcJobBuilder( + job_type="pyspark_job", task_id="run_dataproc_pyspark", - job_name=job_name, cluster_name=cluster_name, + project_id=project_id, + ) + dataproc_job_builder.set_job_name(job_name) + dataproc_job_builder.set_python_main(python_driver_code) + dataproc_job_builder.add_args(py_args) + dataproc_job = dataproc_job_builder.build() + + run_pyspark_on_dataproc = DataprocSubmitJobOperator( + task_id="run_dataproc_pyspark", + job=dataproc_job, region=region, - main=python_driver_code, - arguments=py_args, gcp_conn_id=gcp_conn_id, project_id=project_id, ) @@ -452,7 +460,7 @@ def moz_dataproc_jar_runner( """ Create a GCP Dataproc cluster with Anaconda/Jupyter/Component gateway. - Then we call DataprocSubmitSparkJobOperator to execute the jar defined by the arguments + Then we call DataprocSubmitJobOperator to execute the jar defined by the arguments jar_urls and main_class. Once that succeeds, we teardown the cluster. **Example**: :: @@ -535,14 +543,22 @@ def moz_dataproc_jar_runner( with models.DAG(_dag_name, default_args=default_args) as dag: create_dataproc_cluster = dataproc_helper.create_cluster() - run_jar_on_dataproc = DataprocSubmitSparkJobOperator( + dataproc_job_builder = DataProcJobBuilder( + job_type="spark_job", + task_id="run_jar_on_dataproc", cluster_name=cluster_name, + project_id=project_id, + ) + dataproc_job_builder.set_job_name(job_name) + dataproc_job_builder.add_jar_file_uris(jar_urls) + dataproc_job_builder.set_main(main_class=main_class) + dataproc_job_builder.add_args(jar_args) + dataproc_job = dataproc_job_builder.build() + + run_jar_on_dataproc = DataprocSubmitJobOperator( region=region, task_id="run_jar_on_dataproc", - job_name=job_name, - dataproc_jars=jar_urls, - main_class=main_class, - arguments=jar_args, + job=dataproc_job, gcp_conn_id=gcp_conn_id, project_id=project_id, ) @@ -598,7 +614,7 @@ def moz_dataproc_scriptrunner( Create a GCP Dataproc cluster with Anaconda/Jupyter/Component gateway. Then we execute a script uri (either https or gcs) similar to how we use our custom AWS - EmrSparkOperator. This will call DataprocSubmitSparkJobOperator using EMR's script-runner.jar, which + EmrSparkOperator. This will call DataprocSubmitJobOperator using EMR's script-runner.jar, which then executes the airflow_gcp.sh entrypoint script. The entrypoint script expects another script uri, along with it's arguments, as parameters. Once that succeeds, we teardown the cluster. @@ -703,16 +719,26 @@ def moz_dataproc_scriptrunner( with models.DAG(_dag_name, default_args=default_args) as dag: create_dataproc_cluster = dataproc_helper.create_cluster() - # Run DataprocSubmitSparkJobOperator with script-runner.jar pointing to airflow_gcp.sh. + # Run DataprocSubmitJobOperator with script-runner.jar pointing to airflow_gcp.sh. - run_script_on_dataproc = DataprocSubmitSparkJobOperator( + dataproc_job_builder = DataProcJobBuilder( + job_type="spark_job", + task_id="run_script_on_dataproc", cluster_name=cluster_name, + project_id=project_id, + ) + dataproc_job_builder.set_job_name(job_name) + dataproc_job_builder.add_jar_file_uris([jar_url]) + dataproc_job_builder.set_main( + main_class="com.amazon.elasticmapreduce.scriptrunner.ScriptRunner" + ) + dataproc_job_builder.add_args(args) + dataproc_job = dataproc_job_builder.build() + + run_script_on_dataproc = DataprocSubmitJobOperator( region=region, task_id="run_script_on_dataproc", - job_name=job_name, - dataproc_jars=[jar_url], - main_class="com.amazon.elasticmapreduce.scriptrunner.ScriptRunner", - arguments=args, + job=dataproc_job, gcp_conn_id=gcp_conn_id, project_id=project_id, ) diff --git a/utils/gcp.py b/utils/gcp.py index 06fd74681..b85383000 100644 --- a/utils/gcp.py +++ b/utils/gcp.py @@ -2,11 +2,12 @@ from airflow import models from airflow.providers.cncf.kubernetes.secret import Secret +from airflow.providers.google.cloud.hooks.dataproc import DataProcJobBuilder from airflow.providers.google.cloud.operators.dataproc import ( ClusterGenerator, DataprocCreateClusterOperator, DataprocDeleteClusterOperator, - DataprocSubmitPySparkJobOperator, + DataprocSubmitJobOperator, ) from airflow.providers.google.cloud.operators.gcs import GCSDeleteObjectsOperator from airflow.providers.google.cloud.transfers.bigquery_to_gcs import ( @@ -117,16 +118,23 @@ def export_to_parquet( ).make(), ) - run_dataproc_pyspark = DataprocSubmitPySparkJobOperator( + dataproc_job_builder = DataProcJobBuilder( + job_type="pyspark_job", task_id="run_dataproc_pyspark", cluster_name=cluster_name, - dataproc_jars=["gs://spark-lib/bigquery/spark-2.4-bigquery-latest.jar"], - dataproc_properties={ + project_id=project_id, + properties={ "spark.jars.packages": "org.apache.spark:spark-avro_2.11:2.4.4", }, - main="https://raw.githubusercontent.com/mozilla/bigquery-etl/main" - "/script/legacy/export_to_parquet.py", - arguments=[table] + ) + dataproc_job_builder.add_jar_file_uris( + ["gs://spark-lib/bigquery/spark-2.4-bigquery-latest.jar"] + ) + dataproc_job_builder.set_python_main( + "https://raw.githubusercontent.com/mozilla/bigquery-etl/main/script/legacy/export_to_parquet.py" + ) + dataproc_job_builder.add_args( + [table] + [ "--" + key + "=" + value for key, value in { @@ -138,7 +146,13 @@ def export_to_parquet( ] + (["--static-partitions"] if static_partitions else []) + (static_partitions if static_partitions else []) - + arguments, + + arguments + ) + dataproc_job = dataproc_job_builder.build() + + run_dataproc_pyspark = DataprocSubmitJobOperator( + task_id="run_dataproc_pyspark", + job=dataproc_job, gcp_conn_id=gcp_conn_id, project_id=project_id, region=region, From 4fc89621afc1343d1309cd8b217159757e4dee18 Mon Sep 17 00:00:00 2001 From: Sean Rose Date: Tue, 25 Mar 2025 21:54:34 -0700 Subject: [PATCH 5/8] fix: ruff format modified Python files. --- dags/probe_scraper.py | 5 ++++- utils/dataproc.py | 12 ++++++------ utils/gcp.py | 15 +++++++++++++-- 3 files changed, 23 insertions(+), 9 deletions(-) diff --git a/dags/probe_scraper.py b/dags/probe_scraper.py index f58ea4406..045b25a49 100644 --- a/dags/probe_scraper.py +++ b/dags/probe_scraper.py @@ -203,7 +203,10 @@ for name, url in ( ("gecko-dev", "https://github.com/mozilla/gecko-dev"), ("phabricator", "https://github.com/mozilla-conduit/review"), - ("releases-comm-central", "https://github.com/mozilla/releases-comm-central"), + ( + "releases-comm-central", + "https://github.com/mozilla/releases-comm-central", + ), ) ] diff --git a/utils/dataproc.py b/utils/dataproc.py index 3e588e61d..123d03f59 100644 --- a/utils/dataproc.py +++ b/utils/dataproc.py @@ -170,16 +170,16 @@ def create_cluster(self): if self.master_num_local_ssds > 0: master_instance_group_config = cluster_config["master_config"] - master_instance_group_config["disk_config"][ - "num_local_ssds" - ] = self.master_num_local_ssds + master_instance_group_config["disk_config"]["num_local_ssds"] = ( + self.master_num_local_ssds + ) cluster_config.update({"master_config": master_instance_group_config}) if self.worker_num_local_ssds > 0: worker_instance_group_config = cluster_config["worker_config"] - worker_instance_group_config["disk_config"][ - "num_local_ssds" - ] = self.worker_num_local_ssds + worker_instance_group_config["disk_config"]["num_local_ssds"] = ( + self.worker_num_local_ssds + ) cluster_config.update({"worker_config": worker_instance_group_config}) return DataprocCreateClusterOperator( diff --git a/utils/gcp.py b/utils/gcp.py index b85383000..b6cd2435a 100644 --- a/utils/gcp.py +++ b/utils/gcp.py @@ -21,6 +21,7 @@ DATAPROC_PROJECT_ID = "airflow-dataproc" BIGQUERY_ETL_DOCKER_IMAGE = "gcr.io/moz-fx-data-airflow-prod-88e0/bigquery-etl:latest" + def export_to_parquet( table, destination_table=None, @@ -419,6 +420,7 @@ def bigquery_dq_check( **kwargs, ) + def bigquery_bigeye_check( task_id, table_id, @@ -452,13 +454,22 @@ def bigquery_bigeye_check( kwargs["task_id"] = kwargs.get("task_id", task_id) kwargs["name"] = kwargs.get("name", task_id.replace("_", "-")) - args = ["script/bqetl", "monitoring", "run", table_id, "--warehouse_id", warehouse_id, "--project_id", project_id] + args = [ + "script/bqetl", + "monitoring", + "run", + table_id, + "--warehouse_id", + warehouse_id, + "--project_id", + project_id, + ] bigeye_api_key = Secret( deploy_type="env", deploy_target="BIGEYE_API_KEY", secret="airflow-gke-secrets", - key="bqetl_artifact_deployment__bigeye_api_key" + key="bqetl_artifact_deployment__bigeye_api_key", ) return GKEPodOperator( From 4a562ae844908ea42278a97f98abfb449e8a6fbc Mon Sep 17 00:00:00 2001 From: Sean Rose Date: Wed, 26 Mar 2025 17:26:34 -0700 Subject: [PATCH 6/8] chore: Opt out of Airflow's usage telemetry in the local Docker Compose setup. --- docker-compose.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker-compose.yml b/docker-compose.yml index cf78b7c9d..66f00bef7 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -34,6 +34,8 @@ x-airflow-common: # yamllint enable rule:line-length AIRFLOW__SCHEDULER__ENABLE_HEALTH_CHECK: 'true' DEPLOY_ENVIRONMENT: dev + # Opt out of Airflow's usage telemetry. + SCARF_ANALYTICS: 'false' volumes: - ./dags:/opt/airflow/dags - ./logs:/opt/airflow/logs From 0b8f09f42f9e9b7994565ba2d13fc4f571c5f76f Mon Sep 17 00:00:00 2001 From: Sean Rose Date: Thu, 27 Mar 2025 12:03:48 -0700 Subject: [PATCH 7/8] chore: Add comment explaining `apache-airflow-providers-google` override. --- requirements-override.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/requirements-override.txt b/requirements-override.txt index a8a2b4db1..09e722699 100644 --- a/requirements-override.txt +++ b/requirements-override.txt @@ -1 +1,3 @@ +# There's a bug in apache-airflow-providers-google 12.0.0 where Dataproc operators fail to import +# without OpenLineage installed, which was fixed in 14.0.0 (https://github.com/apache/airflow/pull/46561). apache-airflow-providers-google==14.0.0 From 2c0b38a2b5cc1deaa9ef44bdfb7d7058cc450ba4 Mon Sep 17 00:00:00 2001 From: Sean Rose Date: Thu, 27 Mar 2025 21:15:25 -0700 Subject: [PATCH 8/8] fix: Unnest Dataproc job dictionaries. --- utils/dataproc.py | 6 +++--- utils/gcp.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/utils/dataproc.py b/utils/dataproc.py index 123d03f59..904c17ce8 100644 --- a/utils/dataproc.py +++ b/utils/dataproc.py @@ -409,7 +409,7 @@ def moz_dataproc_pyspark_runner( run_pyspark_on_dataproc = DataprocSubmitJobOperator( task_id="run_dataproc_pyspark", - job=dataproc_job, + job=dataproc_job["job"], region=region, gcp_conn_id=gcp_conn_id, project_id=project_id, @@ -558,7 +558,7 @@ def moz_dataproc_jar_runner( run_jar_on_dataproc = DataprocSubmitJobOperator( region=region, task_id="run_jar_on_dataproc", - job=dataproc_job, + job=dataproc_job["job"], gcp_conn_id=gcp_conn_id, project_id=project_id, ) @@ -738,7 +738,7 @@ def moz_dataproc_scriptrunner( run_script_on_dataproc = DataprocSubmitJobOperator( region=region, task_id="run_script_on_dataproc", - job=dataproc_job, + job=dataproc_job["job"], gcp_conn_id=gcp_conn_id, project_id=project_id, ) diff --git a/utils/gcp.py b/utils/gcp.py index b6cd2435a..d711fd194 100644 --- a/utils/gcp.py +++ b/utils/gcp.py @@ -153,7 +153,7 @@ def export_to_parquet( run_dataproc_pyspark = DataprocSubmitJobOperator( task_id="run_dataproc_pyspark", - job=dataproc_job, + job=dataproc_job["job"], gcp_conn_id=gcp_conn_id, project_id=project_id, region=region,