From d866a46ec2f9c0c425414fad718c76b263e98b81 Mon Sep 17 00:00:00 2001 From: Pankaj Koti Date: Tue, 19 Nov 2024 13:33:51 +0530 Subject: [PATCH 1/9] Enable GCP remote manifest task --- dev/dags/cosmos_manifest_example.py | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/dev/dags/cosmos_manifest_example.py b/dev/dags/cosmos_manifest_example.py index 3905fcea81..bf475b3216 100644 --- a/dev/dags/cosmos_manifest_example.py +++ b/dev/dags/cosmos_manifest_example.py @@ -71,19 +71,19 @@ def cosmos_manifest_example() -> None: # [END aws_s3_example] # [START gcp_gs_example] - # gcp_gs_example = DbtTaskGroup( - # group_id="gcp_gs_example", - # project_config=ProjectConfig( - # manifest_path="gs://cosmos_remote_target/manifest.json", - # manifest_conn_id="gcp_gs_conn", - # # `manifest_conn_id` is optional. If not provided, the default connection ID `google_cloud_default` is used. - # project_name="jaffle_shop", - # ), - # profile_config=profile_config, - # render_config=render_config, - # execution_config=execution_config, - # operator_args={"install_deps": True}, - # ) + gcp_gs_example = DbtTaskGroup( + group_id="gcp_gs_example", + project_config=ProjectConfig( + manifest_path="gs://cosmos_remote_target/manifest.json", + manifest_conn_id="gcp_gs_conn", + # `manifest_conn_id` is optional. If not provided, the default connection ID `google_cloud_default` is used. + project_name="jaffle_shop", + ), + profile_config=profile_config, + render_config=render_config, + execution_config=execution_config, + operator_args={"install_deps": True}, + ) # [END gcp_gs_example] # [START azure_abfs_example] @@ -104,7 +104,8 @@ def cosmos_manifest_example() -> None: post_dbt = EmptyOperator(task_id="post_dbt") - (pre_dbt >> local_example >> aws_s3_example >> post_dbt) + # (pre_dbt >> local_example >> aws_s3_example >> post_dbt) + (pre_dbt >> local_example >> aws_s3_example >> gcp_gs_example >> post_dbt) # TODO: re-enable the following # (pre_dbt >> local_example >> aws_s3_example >> gcp_gs_example >> azure_abfs_example >> post_dbt) From 91f88168512d3229aa08d3ae017d0172b7ca25f7 Mon Sep 17 00:00:00 2001 From: Pankaj Koti Date: Tue, 19 Nov 2024 14:41:45 +0530 Subject: [PATCH 2/9] Upgrade google provider --- dev/dags/cosmos_manifest_example.py | 1 - scripts/test/pre-install-airflow.sh | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/dev/dags/cosmos_manifest_example.py b/dev/dags/cosmos_manifest_example.py index bf475b3216..543fe641c2 100644 --- a/dev/dags/cosmos_manifest_example.py +++ b/dev/dags/cosmos_manifest_example.py @@ -104,7 +104,6 @@ def cosmos_manifest_example() -> None: post_dbt = EmptyOperator(task_id="post_dbt") - # (pre_dbt >> local_example >> aws_s3_example >> post_dbt) (pre_dbt >> local_example >> aws_s3_example >> gcp_gs_example >> post_dbt) # TODO: re-enable the following # (pre_dbt >> local_example >> aws_s3_example >> gcp_gs_example >> azure_abfs_example >> post_dbt) diff --git a/scripts/test/pre-install-airflow.sh b/scripts/test/pre-install-airflow.sh index e54a327062..6d9c9882b7 100755 --- a/scripts/test/pre-install-airflow.sh +++ b/scripts/test/pre-install-airflow.sh @@ -44,7 +44,7 @@ elif [ "$AIRFLOW_VERSION" = "2.7" ] ; then else uv pip install "apache-airflow-providers-amazon[s3fs]" --constraint /tmp/constraint.txt uv pip install "apache-airflow-providers-cncf-kubernetes" --constraint /tmp/constraint.txt - uv pip install "apache-airflow-providers-google>=10.11.0" --constraint /tmp/constraint.txt + uv pip install "apache-airflow-providers-google>=10.17.0" --constraint /tmp/constraint.txt uv pip install apache-airflow-providers-microsoft-azure --constraint /tmp/constraint.txt fi From af60edbc49331a6b77040bc89d5064edf394699e Mon Sep 17 00:00:00 2001 From: Pankaj Koti Date: Tue, 19 Nov 2024 14:46:52 +0530 Subject: [PATCH 3/9] Remove constraints on google provider --- pyproject.toml | 4 ++-- scripts/test/pre-install-airflow.sh | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 416dd0f195..912dd83eac 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -69,7 +69,7 @@ openlineage = ["openlineage-integration-common!=1.15.0", "openlineage-airflow"] amazon = [ "apache-airflow-providers-amazon[s3fs]>=3.0.0", ] -google = ["apache-airflow-providers-google"] +google = ["apache-airflow-providers-google>=10.17.0"] microsoft = ["apache-airflow-providers-microsoft-azure"] all = [ "astronomer-cosmos[dbt-all]", @@ -186,7 +186,7 @@ dependencies = [ "aenum", "apache-airflow-providers-amazon[s3fs]>=3.0.0", "apache-airflow-providers-cncf-kubernetes>=5.1.1", - "apache-airflow-providers-google>=10.11.0", + "apache-airflow-providers-google>=10.17.0", "apache-airflow-providers-microsoft-azure", "msgpack", "openlineage-airflow", diff --git a/scripts/test/pre-install-airflow.sh b/scripts/test/pre-install-airflow.sh index 6d9c9882b7..73ab3531b9 100755 --- a/scripts/test/pre-install-airflow.sh +++ b/scripts/test/pre-install-airflow.sh @@ -44,7 +44,7 @@ elif [ "$AIRFLOW_VERSION" = "2.7" ] ; then else uv pip install "apache-airflow-providers-amazon[s3fs]" --constraint /tmp/constraint.txt uv pip install "apache-airflow-providers-cncf-kubernetes" --constraint /tmp/constraint.txt - uv pip install "apache-airflow-providers-google>=10.17.0" --constraint /tmp/constraint.txt + uv pip install "apache-airflow-providers-google>=10.17.0" uv pip install apache-airflow-providers-microsoft-azure --constraint /tmp/constraint.txt fi From 98ac03bebd71f23672539b9874a678cac11fadf4 Mon Sep 17 00:00:00 2001 From: Pankaj Koti Date: Tue, 19 Nov 2024 15:18:28 +0530 Subject: [PATCH 4/9] Add comment on why not use constraints for google-provider --- scripts/test/pre-install-airflow.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/scripts/test/pre-install-airflow.sh b/scripts/test/pre-install-airflow.sh index 73ab3531b9..c486324819 100755 --- a/scripts/test/pre-install-airflow.sh +++ b/scripts/test/pre-install-airflow.sh @@ -44,6 +44,11 @@ elif [ "$AIRFLOW_VERSION" = "2.7" ] ; then else uv pip install "apache-airflow-providers-amazon[s3fs]" --constraint /tmp/constraint.txt uv pip install "apache-airflow-providers-cncf-kubernetes" --constraint /tmp/constraint.txt + # The Airflow 2.9 constraints file at https://raw.githubusercontent.com/apache/airflow/constraints-2.9.0/constraints-3.11.txt + # specifies apache-airflow-providers-google==10.16.0. However, our CI setup uses a Google connection without a token, + # which previously led to authentication issues when the token was None. This issue was resolved in PR #38102 and + # fixed in apache-airflow-providers-google==10.17.0. Consequently, we are using apache-airflow-providers-google>=10.17.0 + # and skipping constraints installation, as the specified version does not meet our requirements. uv pip install "apache-airflow-providers-google>=10.17.0" uv pip install apache-airflow-providers-microsoft-azure --constraint /tmp/constraint.txt fi From 94ea7b8f83a6f90cf13f06c4ee1cbe09fa73d710 Mon Sep 17 00:00:00 2001 From: Pankaj Koti Date: Tue, 19 Nov 2024 15:23:29 +0530 Subject: [PATCH 5/9] Add comment on why not use constraints for google-provider --- scripts/test/pre-install-airflow.sh | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/scripts/test/pre-install-airflow.sh b/scripts/test/pre-install-airflow.sh index c486324819..7017fc628c 100755 --- a/scripts/test/pre-install-airflow.sh +++ b/scripts/test/pre-install-airflow.sh @@ -44,11 +44,13 @@ elif [ "$AIRFLOW_VERSION" = "2.7" ] ; then else uv pip install "apache-airflow-providers-amazon[s3fs]" --constraint /tmp/constraint.txt uv pip install "apache-airflow-providers-cncf-kubernetes" --constraint /tmp/constraint.txt - # The Airflow 2.9 constraints file at https://raw.githubusercontent.com/apache/airflow/constraints-2.9.0/constraints-3.11.txt + # The Airflow 2.9 constraints file at + # https://raw.githubusercontent.com/apache/airflow/constraints-2.9.0/constraints-3.11.txt # specifies apache-airflow-providers-google==10.16.0. However, our CI setup uses a Google connection without a token, - # which previously led to authentication issues when the token was None. This issue was resolved in PR #38102 and - # fixed in apache-airflow-providers-google==10.17.0. Consequently, we are using apache-airflow-providers-google>=10.17.0 - # and skipping constraints installation, as the specified version does not meet our requirements. + # which previously led to authentication issues when the token was None. This issue was resolved in PR + # https://github.com/apache/airflow/pull/38102 and fixed in apache-airflow-providers-google==10.17.0. Consequently, + # we are using apache-airflow-providers-google>=10.17.0 and skipping constraints installation, as the specified + # version does not meet our requirements. uv pip install "apache-airflow-providers-google>=10.17.0" uv pip install apache-airflow-providers-microsoft-azure --constraint /tmp/constraint.txt fi From 59df66ae6434c5ec8aad53c60d25edbe142fd5d4 Mon Sep 17 00:00:00 2001 From: Pankaj Koti Date: Tue, 19 Nov 2024 16:24:01 +0530 Subject: [PATCH 6/9] Enable Azure remote manifest task --- dev/dags/cosmos_manifest_example.py | 30 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/dev/dags/cosmos_manifest_example.py b/dev/dags/cosmos_manifest_example.py index 543fe641c2..8e35208b85 100644 --- a/dev/dags/cosmos_manifest_example.py +++ b/dev/dags/cosmos_manifest_example.py @@ -87,26 +87,24 @@ def cosmos_manifest_example() -> None: # [END gcp_gs_example] # [START azure_abfs_example] - # azure_abfs_example = DbtTaskGroup( - # group_id="azure_abfs_example", - # project_config=ProjectConfig( - # manifest_path="abfs://cosmos-manifest-test/manifest.json", - # manifest_conn_id="azure_abfs_conn", - # # `manifest_conn_id` is optional. If not provided, the default connection ID `wasb_default` is used. - # project_name="jaffle_shop", - # ), - # profile_config=profile_config, - # render_config=render_config, - # execution_config=execution_config, - # operator_args={"install_deps": True}, - # ) + azure_abfs_example = DbtTaskGroup( + group_id="azure_abfs_example", + project_config=ProjectConfig( + manifest_path="abfs://cosmos-manifest-test/manifest.json", + manifest_conn_id="azure_abfs_conn", + # `manifest_conn_id` is optional. If not provided, the default connection ID `wasb_default` is used. + project_name="jaffle_shop", + ), + profile_config=profile_config, + render_config=render_config, + execution_config=execution_config, + operator_args={"install_deps": True}, + ) # [END azure_abfs_example] post_dbt = EmptyOperator(task_id="post_dbt") - (pre_dbt >> local_example >> aws_s3_example >> gcp_gs_example >> post_dbt) - # TODO: re-enable the following - # (pre_dbt >> local_example >> aws_s3_example >> gcp_gs_example >> azure_abfs_example >> post_dbt) + (pre_dbt >> local_example >> aws_s3_example >> gcp_gs_example >> azure_abfs_example >> post_dbt) cosmos_manifest_example() From d34c6d0b80736a064e47b789bff6e6675b0480b8 Mon Sep 17 00:00:00 2001 From: Pankaj Koti Date: Tue, 19 Nov 2024 17:05:26 +0530 Subject: [PATCH 7/9] Upgrade azure provider --- pyproject.toml | 6 +++--- scripts/test/pre-install-airflow.sh | 4 +++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 912dd83eac..11290b1a50 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -70,7 +70,7 @@ amazon = [ "apache-airflow-providers-amazon[s3fs]>=3.0.0", ] google = ["apache-airflow-providers-google>=10.17.0"] -microsoft = ["apache-airflow-providers-microsoft-azure"] +microsoft = ["apache-airflow-providers-microsoft-azure>=8.5.0"] all = [ "astronomer-cosmos[dbt-all]", "astronomer-cosmos[openlineage]", @@ -95,7 +95,7 @@ aws_eks = [ "apache-airflow-providers-amazon>=8.0.0", ] azure-container-instance = [ - "apache-airflow-providers-microsoft-azure>=8.4.0", + "apache-airflow-providers-microsoft-azure>=8.5.0", ] gcp-cloud-run-job = [ "apache-airflow-providers-google>=10.11.0", @@ -187,7 +187,7 @@ dependencies = [ "apache-airflow-providers-amazon[s3fs]>=3.0.0", "apache-airflow-providers-cncf-kubernetes>=5.1.1", "apache-airflow-providers-google>=10.17.0", - "apache-airflow-providers-microsoft-azure", + "apache-airflow-providers-microsoft-azure>=8.5.0", "msgpack", "openlineage-airflow", "pydantic>=1.10.0", diff --git a/scripts/test/pre-install-airflow.sh b/scripts/test/pre-install-airflow.sh index 7017fc628c..207aa5a138 100755 --- a/scripts/test/pre-install-airflow.sh +++ b/scripts/test/pre-install-airflow.sh @@ -44,6 +44,7 @@ elif [ "$AIRFLOW_VERSION" = "2.7" ] ; then else uv pip install "apache-airflow-providers-amazon[s3fs]" --constraint /tmp/constraint.txt uv pip install "apache-airflow-providers-cncf-kubernetes" --constraint /tmp/constraint.txt + # The Airflow 2.9 constraints file at # https://raw.githubusercontent.com/apache/airflow/constraints-2.9.0/constraints-3.11.txt # specifies apache-airflow-providers-google==10.16.0. However, our CI setup uses a Google connection without a token, @@ -52,7 +53,8 @@ else # we are using apache-airflow-providers-google>=10.17.0 and skipping constraints installation, as the specified # version does not meet our requirements. uv pip install "apache-airflow-providers-google>=10.17.0" - uv pip install apache-airflow-providers-microsoft-azure --constraint /tmp/constraint.txt + + uv pip install "apache-airflow-providers-microsoft-azure>=8.5.0" --constraint /tmp/constraint.txt fi rm /tmp/constraint.txt From a86db267c416a719d77b8a24e9d2b923d018d8d5 Mon Sep 17 00:00:00 2001 From: Pankaj Koti Date: Tue, 19 Nov 2024 17:07:15 +0530 Subject: [PATCH 8/9] Drop using constraints for AF >= 2.8 --- scripts/test/pre-install-airflow.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/test/pre-install-airflow.sh b/scripts/test/pre-install-airflow.sh index 207aa5a138..2e9b6e280d 100755 --- a/scripts/test/pre-install-airflow.sh +++ b/scripts/test/pre-install-airflow.sh @@ -54,7 +54,7 @@ else # version does not meet our requirements. uv pip install "apache-airflow-providers-google>=10.17.0" - uv pip install "apache-airflow-providers-microsoft-azure>=8.5.0" --constraint /tmp/constraint.txt + uv pip install "apache-airflow-providers-microsoft-azure>=8.5.0" fi rm /tmp/constraint.txt From cc01f68420c6eacdf7a8c04e47152aa6d7bc59dd Mon Sep 17 00:00:00 2001 From: Pankaj Koti Date: Tue, 19 Nov 2024 17:40:38 +0530 Subject: [PATCH 9/9] Add comment in the pre-install script --- scripts/test/pre-install-airflow.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/scripts/test/pre-install-airflow.sh b/scripts/test/pre-install-airflow.sh index 2e9b6e280d..da8e72fe3d 100755 --- a/scripts/test/pre-install-airflow.sh +++ b/scripts/test/pre-install-airflow.sh @@ -54,6 +54,12 @@ else # version does not meet our requirements. uv pip install "apache-airflow-providers-google>=10.17.0" + # The Airflow 2.8 constraints file at + # https://raw.githubusercontent.com/apache/airflow/constraints-2.8.0/constraints-3.11.txt + # specifies apache-airflow-providers-microsoft-azure==8.4.0. However, our Azure connection setup in the CI, + # previously led to authentication issues with this version. This issue got resolved in + # apache-airflow-providers-microsoft-azure==8.5.0. Hence, we are using apache-airflow-providers-microsoft-azure>=8.5.0 + # and skipping installation with constraints, as the specified version does not meet our requirements. uv pip install "apache-airflow-providers-microsoft-azure>=8.5.0" fi