From 93b44a116e30abc241ee0affa7ee904581d0223d Mon Sep 17 00:00:00 2001 From: pankajastro Date: Thu, 16 Oct 2025 02:33:06 +0530 Subject: [PATCH 1/9] Add DBT project to run benchmark for async execution mode --- README.md | 1 + benchmark/auto_generate_models.sh | 12 ++ dags/cosmos_async_dag.py | 41 +++++++ dbt/altered_jaffle_shop/.gitignore | 4 + dbt/altered_jaffle_shop/.user.yml | 1 + dbt/altered_jaffle_shop/README.md | 15 +++ dbt/altered_jaffle_shop/analyses/.gitkeep | 0 dbt/altered_jaffle_shop/dbt_project.yml | 36 ++++++ dbt/altered_jaffle_shop/macros/.gitkeep | 0 .../macros/get_model_param.sql | 9 ++ dbt/altered_jaffle_shop/models/customers.sql | 71 +++++++++++ .../models/customers_slow_query.sql | 37 ++++++ dbt/altered_jaffle_shop/models/docs.md | 14 +++ .../models/long_model_cross_random.sql | 30 +++++ .../models/long_model_subquery_windows.sql | 55 +++++++++ .../models/long_model_text_processing.sql | 41 +++++++ dbt/altered_jaffle_shop/models/orders.sql | 56 +++++++++ dbt/altered_jaffle_shop/models/schema.yml | 83 +++++++++++++ .../models/staging/stg_customers.sql | 22 ++++ .../models/staging/stg_orders.sql | 23 ++++ .../models/staging/stg_payments.sql | 25 ++++ dbt/altered_jaffle_shop/profiles.yml | 11 ++ dbt/altered_jaffle_shop/seeds/.gitkeep | 0 .../seeds/model_params.csv | 5 + .../seeds/raw_customers.csv | 101 ++++++++++++++++ dbt/altered_jaffle_shop/seeds/raw_orders.csv | 100 +++++++++++++++ .../seeds/raw_payments.csv | 114 ++++++++++++++++++ dbt/altered_jaffle_shop/snapshots/.gitkeep | 0 dbt/altered_jaffle_shop/tests/.gitkeep | 0 29 files changed, 907 insertions(+) create mode 100644 benchmark/auto_generate_models.sh create mode 100644 dags/cosmos_async_dag.py create mode 100644 dbt/altered_jaffle_shop/.gitignore create mode 100644 dbt/altered_jaffle_shop/.user.yml create mode 100644 dbt/altered_jaffle_shop/README.md create mode 100644 dbt/altered_jaffle_shop/analyses/.gitkeep create mode 100644 dbt/altered_jaffle_shop/dbt_project.yml create mode 100644 dbt/altered_jaffle_shop/macros/.gitkeep create mode 100644 dbt/altered_jaffle_shop/macros/get_model_param.sql create mode 100644 dbt/altered_jaffle_shop/models/customers.sql create mode 100644 dbt/altered_jaffle_shop/models/customers_slow_query.sql create mode 100644 dbt/altered_jaffle_shop/models/docs.md create mode 100644 dbt/altered_jaffle_shop/models/long_model_cross_random.sql create mode 100644 dbt/altered_jaffle_shop/models/long_model_subquery_windows.sql create mode 100644 dbt/altered_jaffle_shop/models/long_model_text_processing.sql create mode 100644 dbt/altered_jaffle_shop/models/orders.sql create mode 100644 dbt/altered_jaffle_shop/models/schema.yml create mode 100644 dbt/altered_jaffle_shop/models/staging/stg_customers.sql create mode 100644 dbt/altered_jaffle_shop/models/staging/stg_orders.sql create mode 100644 dbt/altered_jaffle_shop/models/staging/stg_payments.sql create mode 100644 dbt/altered_jaffle_shop/profiles.yml create mode 100644 dbt/altered_jaffle_shop/seeds/.gitkeep create mode 100644 dbt/altered_jaffle_shop/seeds/model_params.csv create mode 100644 dbt/altered_jaffle_shop/seeds/raw_customers.csv create mode 100644 dbt/altered_jaffle_shop/seeds/raw_orders.csv create mode 100644 dbt/altered_jaffle_shop/seeds/raw_payments.csv create mode 100644 dbt/altered_jaffle_shop/snapshots/.gitkeep create mode 100644 dbt/altered_jaffle_shop/tests/.gitkeep diff --git a/README.md b/README.md index 29e4822..88a4781 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,7 @@ This project contains the following files and folders: * 2 seeds * 52 sources * 185 models + * [altered_jaffle_shop](https://github.com/astronomer/cosmos-benchmark/tree/main/dbt/altered_jaffle_shop): A custom dbt project designed to pre-compile and focus on specific custom models without any dbt-generated metadata dependencies. This project is based on a copy of the jaffle-shop project, with an additional four models that require significant time to run, enabling measurement of async DAG performance. The project includes one seed file — you can increase model transformation time by updating the values in this seed. More models can be generated automatically by running the following bash command: `sh benchmark/auto_generate_models.sh 2`. - **Dockerfile**: This file contains a versioned Astro Runtime Docker image that provides a differentiated Airflow experience. - **include**: This folder contains any additional files that you want to include as part of your project. In this particular case, it contains configuration files. - **packages.txt**: Install OS-level packages needed for your project by adding them to this file. It is empty by default. diff --git a/benchmark/auto_generate_models.sh b/benchmark/auto_generate_models.sh new file mode 100644 index 0000000..02dc3bb --- /dev/null +++ b/benchmark/auto_generate_models.sh @@ -0,0 +1,12 @@ +slow_models=( + "dbt/altered_jaffle_shop/models/customers_slow_query.sql" + "dbt/altered_jaffle_shop/models/long_model_cross_random.sql" + "dbt/altered_jaffle_shop/models/long_model_subquery_windows.sql" + "dbt/altered_jaffle_shop/models/long_model_text_processing.sql" +) + +for file in "${slow_models[@]}"; do + for i in $(seq 1 "$1"); do + cp "$file" "${file%.sql}${i}.sql" + done +done diff --git a/dags/cosmos_async_dag.py b/dags/cosmos_async_dag.py new file mode 100644 index 0000000..8c13078 --- /dev/null +++ b/dags/cosmos_async_dag.py @@ -0,0 +1,41 @@ +import os +from datetime import datetime +from pathlib import Path + +from cosmos import DbtDag, ExecutionConfig, ExecutionMode, ProfileConfig, ProjectConfig, RenderConfig +from cosmos.profiles import GoogleCloudServiceAccountDictProfileMapping + +DBT_PROJECT_PATH = Path("/usr/local/airflow/dbt/altered_jaffle_shop") + +DBT_ADAPTER_VERSION = os.getenv("DBT_ADAPTER_VERSION", "1.9") + +profile_config = ProfileConfig( + profile_name="default", + target_name="dev", + profile_mapping=GoogleCloudServiceAccountDictProfileMapping( + conn_id="gcp_gs_conn", profile_args={"dataset": "test_async", "project": "astronomer-airflow-providers"} + ), +) + + +# [START airflow_async_execution_mode_example] +simple_dag_async = DbtDag( + # dbt/cosmos-specific parameters + project_config=ProjectConfig(DBT_PROJECT_PATH), + profile_config=profile_config, + execution_config=ExecutionConfig( + execution_mode=ExecutionMode.AIRFLOW_ASYNC, + async_py_requirements=[f"dbt-bigquery=={DBT_ADAPTER_VERSION}"], + ), + # normal dag parameters + schedule=None, + start_date=datetime(2023, 1, 1), + catchup=False, + dag_id="simple_dag_async", + tags=["simple"], + operator_args={ + "location": "US", + "install_deps": True, + "full_refresh": True, + }, +) diff --git a/dbt/altered_jaffle_shop/.gitignore b/dbt/altered_jaffle_shop/.gitignore new file mode 100644 index 0000000..49f147c --- /dev/null +++ b/dbt/altered_jaffle_shop/.gitignore @@ -0,0 +1,4 @@ + +target/ +dbt_packages/ +logs/ diff --git a/dbt/altered_jaffle_shop/.user.yml b/dbt/altered_jaffle_shop/.user.yml new file mode 100644 index 0000000..6299459 --- /dev/null +++ b/dbt/altered_jaffle_shop/.user.yml @@ -0,0 +1 @@ +id: d59e111f-6b74-4d7c-b380-61cabb5754b0 diff --git a/dbt/altered_jaffle_shop/README.md b/dbt/altered_jaffle_shop/README.md new file mode 100644 index 0000000..7874ac8 --- /dev/null +++ b/dbt/altered_jaffle_shop/README.md @@ -0,0 +1,15 @@ +Welcome to your new dbt project! + +### Using the starter project + +Try running the following commands: +- dbt run +- dbt test + + +### Resources: +- Learn more about dbt [in the docs](https://docs.getdbt.com/docs/introduction) +- Check out [Discourse](https://discourse.getdbt.com/) for commonly asked questions and answers +- Join the [chat](https://community.getdbt.com/) on Slack for live discussions and support +- Find [dbt events](https://events.getdbt.com) near you +- Check out [the blog](https://blog.getdbt.com/) for the latest news on dbt's development and best practices diff --git a/dbt/altered_jaffle_shop/analyses/.gitkeep b/dbt/altered_jaffle_shop/analyses/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/dbt/altered_jaffle_shop/dbt_project.yml b/dbt/altered_jaffle_shop/dbt_project.yml new file mode 100644 index 0000000..b1376d5 --- /dev/null +++ b/dbt/altered_jaffle_shop/dbt_project.yml @@ -0,0 +1,36 @@ + +# Name your project! Project names should contain only lowercase characters +# and underscores. A good package name should reflect your organization's +# name or the intended use of these models +name: 'altered_jaffle_shop' +version: '1.0.0' + +# This setting configures which "profile" dbt uses for this project. +profile: 'altered_jaffle_shop' + +# These configurations specify where dbt should look for different types of files. +# The `model-paths` config, for example, states that models in this project can be +# found in the "models/" directory. You probably won't need to change these! +model-paths: ["models"] +analysis-paths: ["analyses"] +test-paths: ["tests"] +seed-paths: ["seeds"] +macro-paths: ["macros"] +snapshot-paths: ["snapshots"] + +clean-targets: # directories to be removed by `dbt clean` + - "target" + - "dbt_packages" + + +# Configuring models +# Full documentation: https://docs.getdbt.com/docs/configuring-models + +# In this example config, we tell dbt to build all models in the example/ +# directory as views. These settings can be overridden in the individual model +# files using the `{{ config(...) }}` macro. +models: + altered_jaffle_shop: + # Config indicated by + and applies to all files under models/example/ + example: + +materialized: view diff --git a/dbt/altered_jaffle_shop/macros/.gitkeep b/dbt/altered_jaffle_shop/macros/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/dbt/altered_jaffle_shop/macros/get_model_param.sql b/dbt/altered_jaffle_shop/macros/get_model_param.sql new file mode 100644 index 0000000..1292a2c --- /dev/null +++ b/dbt/altered_jaffle_shop/macros/get_model_param.sql @@ -0,0 +1,9 @@ +{% macro get_model_param(model_name) %} + {% set seeds = load_seed('config') %} + {% for row in seeds %} + {% if row['model_name'] == model_name %} + {% do return(row) %} + {% endif %} + {% endfor %} + {% do exceptions.raise_compiler_error("Model '" ~ model_name ~ "' not found in seed 'config'") %} +{% endmacro %} diff --git a/dbt/altered_jaffle_shop/models/customers.sql b/dbt/altered_jaffle_shop/models/customers.sql new file mode 100644 index 0000000..e4bf27b --- /dev/null +++ b/dbt/altered_jaffle_shop/models/customers.sql @@ -0,0 +1,71 @@ +{{ config(tags=["customers"]) }} + +with customers as ( + + select * from {{ ref('stg_customers') }} + +), + +orders as ( + + select * from {{ ref('stg_orders') }} + +), + +payments as ( + + select * from {{ ref('stg_payments') }} + +), + +customer_orders as ( + + select + customer_id, + + min(order_date) as first_order, + max(order_date) as most_recent_order, + count(order_id) as number_of_orders + from orders + + group by customer_id + +), + +customer_payments as ( + + select + orders.customer_id, + sum(amount) as total_amount + + from payments + + left join orders on + payments.order_id = orders.order_id + + group by orders.customer_id + +), + +final as ( + + select + customers.customer_id, + customers.first_name, + customers.last_name, + customer_orders.first_order, + customer_orders.most_recent_order, + customer_orders.number_of_orders, + customer_payments.total_amount as customer_lifetime_value + + from customers + + left join customer_orders + on customers.customer_id = customer_orders.customer_id + + left join customer_payments + on customers.customer_id = customer_payments.customer_id + +) + +select * from final diff --git a/dbt/altered_jaffle_shop/models/customers_slow_query.sql b/dbt/altered_jaffle_shop/models/customers_slow_query.sql new file mode 100644 index 0000000..3427db8 --- /dev/null +++ b/dbt/altered_jaffle_shop/models/customers_slow_query.sql @@ -0,0 +1,37 @@ +{{ config( + materialized = "table" +) }} + +WITH params AS ( + SELECT array_x, array_y + FROM {{ ref('model_params') }} + WHERE model_name = 'customers_slow_query' + LIMIT 1 +), +base AS ( + SELECT * FROM {{ ref('customers') }} +), +expanded AS ( + SELECT + b.*, + x AS extra_x, + y AS extra_y + FROM base b + CROSS JOIN params p + CROSS JOIN UNNEST(GENERATE_ARRAY(1, p.array_x)) AS x + CROSS JOIN UNNEST(GENERATE_ARRAY(1, p.array_y)) AS y +), +windowed AS ( + SELECT + *, + AVG(LENGTH(CAST(customer_id AS STRING))) OVER ( + PARTITION BY customer_id + ORDER BY extra_x, extra_y + ) AS avg_len + FROM expanded +) +SELECT + customer_id, + SUM(avg_len) AS sum_len +FROM windowed +GROUP BY customer_id diff --git a/dbt/altered_jaffle_shop/models/docs.md b/dbt/altered_jaffle_shop/models/docs.md new file mode 100644 index 0000000..c6ae93b --- /dev/null +++ b/dbt/altered_jaffle_shop/models/docs.md @@ -0,0 +1,14 @@ +{% docs orders_status %} + +Orders can be one of the following statuses: + +| status | description | +|----------------|------------------------------------------------------------------------------------------------------------------------| +| placed | The order has been placed but has not yet left the warehouse | +| shipped | The order has ben shipped to the customer and is currently in transit | +| completed | The order has been received by the customer | +| return_pending | The customer has indicated that they would like to return the order, but it has not yet been received at the warehouse | +| returned | The order has been returned by the customer and received at the warehouse | + + +{% enddocs %} diff --git a/dbt/altered_jaffle_shop/models/long_model_cross_random.sql b/dbt/altered_jaffle_shop/models/long_model_cross_random.sql new file mode 100644 index 0000000..d7961b0 --- /dev/null +++ b/dbt/altered_jaffle_shop/models/long_model_cross_random.sql @@ -0,0 +1,30 @@ +{{ config( + materialized = "table" +) }} + +WITH params AS ( + SELECT array_x, array_y + FROM {{ ref('model_params') }} + WHERE model_name = 'long_model_cross_random' + LIMIT 1 +), +base AS ( + SELECT * FROM {{ ref('customers') }} +), +inflated AS ( + SELECT + b.customer_id, + x AS x_val, + y AS y_val, + RAND() * x * y AS random_val + FROM base b + CROSS JOIN params p + CROSS JOIN UNNEST(GENERATE_ARRAY(1, p.array_x)) AS x + CROSS JOIN UNNEST(GENERATE_ARRAY(1, p.array_y)) AS y +) +SELECT + customer_id, + COUNT(*) AS row_count, + AVG(random_val) AS avg_val +FROM inflated +GROUP BY customer_id diff --git a/dbt/altered_jaffle_shop/models/long_model_subquery_windows.sql b/dbt/altered_jaffle_shop/models/long_model_subquery_windows.sql new file mode 100644 index 0000000..b8d85d5 --- /dev/null +++ b/dbt/altered_jaffle_shop/models/long_model_subquery_windows.sql @@ -0,0 +1,55 @@ +{{ config( + materialized = "table" +) }} + +WITH params AS ( + SELECT array_x + FROM {{ ref('model_params') }} + WHERE model_name = 'long_model_subquery_windows' + LIMIT 1 +), + +-- Inflate base rows by duplicating each customer 100x +base AS ( + SELECT + c.customer_id, + d AS duplication_id + FROM {{ ref('customers') }} c + CROSS JOIN UNNEST(GENERATE_ARRAY(1, 100)) AS d +), + +-- Generate large expansion using params +expanded AS ( + SELECT + b.customer_id, + x AS factor, + POW(x, 0.5) AS sqrt_x, + SAFE_DIVIDE(x, NULLIF(MOD(x, 10), 0)) AS ratio, + LOG(x + 1) + SIN(x) + COS(x) AS expensive_math + FROM base b + CROSS JOIN params p + CROSS JOIN UNNEST(GENERATE_ARRAY(1, p.array_x)) AS x +), + +-- Complex windowing across a large range +windowed AS ( + SELECT + *, + AVG(expensive_math) OVER ( + PARTITION BY customer_id + ORDER BY factor + ROWS BETWEEN 10000 PRECEDING AND CURRENT ROW + ) AS moving_avg + FROM expanded +), + +aggregated AS ( + SELECT + customer_id, + SUM(moving_avg) AS total_avg, + COUNT(*) AS cnt + FROM windowed + GROUP BY customer_id +) + +SELECT * FROM aggregated diff --git a/dbt/altered_jaffle_shop/models/long_model_text_processing.sql b/dbt/altered_jaffle_shop/models/long_model_text_processing.sql new file mode 100644 index 0000000..95c44ed --- /dev/null +++ b/dbt/altered_jaffle_shop/models/long_model_text_processing.sql @@ -0,0 +1,41 @@ +{{ config( + materialized = "table" +) }} + +WITH params AS ( + SELECT array_x, array_y + FROM {{ ref('model_params') }} + WHERE model_name = 'long_model_text_processing' + LIMIT 1 +), +base AS ( + SELECT * FROM {{ ref('customers') }} +), +text_gen AS ( + SELECT + b.customer_id, + CONCAT('Customer_', CAST(x AS STRING), '_', CAST(y AS STRING)) AS tag, + REPEAT('A', MOD(x * y, 1000)) AS description + FROM base b + CROSS JOIN params p + CROSS JOIN UNNEST(GENERATE_ARRAY(1, p.array_x)) AS x + CROSS JOIN UNNEST(GENERATE_ARRAY(1, p.array_y)) AS y +), +analyzed AS ( + SELECT + customer_id, + LENGTH(tag) AS tag_len, + LENGTH(description) AS desc_len, + RANK() OVER (PARTITION BY customer_id ORDER BY LENGTH(description) DESC) AS rank_val + FROM text_gen +), +filtered AS ( + SELECT * FROM analyzed + WHERE rank_val <= 100 +) +SELECT + customer_id, + AVG(tag_len) AS avg_tag_len, + MAX(desc_len) AS max_desc_len +FROM filtered +GROUP BY customer_id diff --git a/dbt/altered_jaffle_shop/models/orders.sql b/dbt/altered_jaffle_shop/models/orders.sql new file mode 100644 index 0000000..cbb2934 --- /dev/null +++ b/dbt/altered_jaffle_shop/models/orders.sql @@ -0,0 +1,56 @@ +{% set payment_methods = ['credit_card', 'coupon', 'bank_transfer', 'gift_card'] %} + +with orders as ( + + select * from {{ ref('stg_orders') }} + +), + +payments as ( + + select * from {{ ref('stg_payments') }} + +), + +order_payments as ( + + select + order_id, + + {% for payment_method in payment_methods -%} + sum(case when payment_method = '{{ payment_method }}' then amount else 0 end) as {{ payment_method }}_amount, + {% endfor -%} + + sum(amount) as total_amount + + from payments + + group by order_id + +), + +final as ( + + select + orders.order_id, + orders.customer_id, + orders.order_date, + orders.status, + + {% for payment_method in payment_methods -%} + + order_payments.{{ payment_method }}_amount, + + {% endfor -%} + + order_payments.total_amount as amount + + from orders + + + left join order_payments + on orders.order_id = order_payments.order_id + +) + +select * from final diff --git a/dbt/altered_jaffle_shop/models/schema.yml b/dbt/altered_jaffle_shop/models/schema.yml new file mode 100644 index 0000000..3059bab --- /dev/null +++ b/dbt/altered_jaffle_shop/models/schema.yml @@ -0,0 +1,83 @@ +version: 2 + +models: + - name: customers + description: This table has basic information about a customer, as well as some derived facts based on a customer's orders + + columns: + - name: customer_id + description: This is a unique identifier for a customer + tests: + - unique + - not_null + + - name: first_name + description: Customer's first name. PII. + + - name: last_name + description: Customer's last name. PII. + + - name: first_order + description: Date (UTC) of a customer's first order + + - name: most_recent_order + description: Date (UTC) of a customer's most recent order + + - name: number_of_orders + description: Count of the number of orders a customer has placed + + - name: total_order_amount + description: Total value (AUD) of a customer's orders + + - name: orders + description: This table has basic information about orders, as well as some derived facts based on payments + + columns: + - name: order_id + tests: + - unique + - not_null + description: This is a unique identifier for an order + + # Comment so we don't have a standalone test relationships_orders_customer_id__customer_id__ref_customers__test + #- name: customer_id + # description: Foreign key to the customers table + # tests: + # - not_null + # - relationships: + # to: ref('customers') + # field: customer_id + + - name: order_date + description: Date (UTC) that the order was placed + + - name: status + description: '{{ doc("orders_status") }}' + tests: + - accepted_values: + values: ['placed', 'shipped', 'completed', 'return_pending', 'returned'] + + - name: amount + description: Total amount (AUD) of the order + tests: + - not_null + + - name: credit_card_amount + description: Amount of the order (AUD) paid for by credit card + tests: + - not_null + + - name: coupon_amount + description: Amount of the order (AUD) paid for by coupon + tests: + - not_null + + - name: bank_transfer_amount + description: Amount of the order (AUD) paid for by bank transfer + tests: + - not_null + + - name: gift_card_amount + description: Amount of the order (AUD) paid for by gift card + tests: + - not_null \ No newline at end of file diff --git a/dbt/altered_jaffle_shop/models/staging/stg_customers.sql b/dbt/altered_jaffle_shop/models/staging/stg_customers.sql new file mode 100644 index 0000000..cad0472 --- /dev/null +++ b/dbt/altered_jaffle_shop/models/staging/stg_customers.sql @@ -0,0 +1,22 @@ +with source as ( + + {#- + Normally we would select from the table here, but we are using seeds to load + our data in this project + #} + select * from {{ ref('raw_customers') }} + +), + +renamed as ( + + select + id as customer_id, + first_name, + last_name + + from source + +) + +select * from renamed diff --git a/dbt/altered_jaffle_shop/models/staging/stg_orders.sql b/dbt/altered_jaffle_shop/models/staging/stg_orders.sql new file mode 100644 index 0000000..a654dcb --- /dev/null +++ b/dbt/altered_jaffle_shop/models/staging/stg_orders.sql @@ -0,0 +1,23 @@ +with source as ( + + {#- + Normally we would select from the table here, but we are using seeds to load + our data in this project + #} + select * from {{ ref('raw_orders') }} + +), + +renamed as ( + + select + id as order_id, + user_id as customer_id, + order_date, + status + + from source + +) + +select * from renamed diff --git a/dbt/altered_jaffle_shop/models/staging/stg_payments.sql b/dbt/altered_jaffle_shop/models/staging/stg_payments.sql new file mode 100644 index 0000000..f718596 --- /dev/null +++ b/dbt/altered_jaffle_shop/models/staging/stg_payments.sql @@ -0,0 +1,25 @@ +with source as ( + + {#- + Normally we would select from the table here, but we are using seeds to load + our data in this project + #} + select * from {{ ref('raw_payments') }} + +), + +renamed as ( + + select + id as payment_id, + order_id, + payment_method, + + -- `amount` is currently stored in cents, so we convert it to dollars + amount / 100 as amount + + from source + +) + +select * from renamed diff --git a/dbt/altered_jaffle_shop/profiles.yml b/dbt/altered_jaffle_shop/profiles.yml new file mode 100644 index 0000000..c34b3bf --- /dev/null +++ b/dbt/altered_jaffle_shop/profiles.yml @@ -0,0 +1,11 @@ +altered_jaffle_shop: + target: dev + outputs: + dev: + type: bigquery + method: service-account + project: astronomer-airflow-providers + dataset: test_async + threads: 4 # Must be a value of 1 or greater + keyfile: # /PATH/TO/BIGQUERY/keyfile.json + OPTIONAL_CONFIG: VALUE \ No newline at end of file diff --git a/dbt/altered_jaffle_shop/seeds/.gitkeep b/dbt/altered_jaffle_shop/seeds/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/dbt/altered_jaffle_shop/seeds/model_params.csv b/dbt/altered_jaffle_shop/seeds/model_params.csv new file mode 100644 index 0000000..785e16b --- /dev/null +++ b/dbt/altered_jaffle_shop/seeds/model_params.csv @@ -0,0 +1,5 @@ +model_name,array_x,array_y +customers_slow_query,2000,2000 +long_model_cross_random,2000,2000 +long_model_text_processing,2000,2000 +long_model_subquery_windows,2000,1 diff --git a/dbt/altered_jaffle_shop/seeds/raw_customers.csv b/dbt/altered_jaffle_shop/seeds/raw_customers.csv new file mode 100644 index 0000000..e386bb3 --- /dev/null +++ b/dbt/altered_jaffle_shop/seeds/raw_customers.csv @@ -0,0 +1,101 @@ +id,first_name,last_name +1,Michael,P. +2,Shawn,M. +3,Kathleen,P. +4,Jimmy,C. +5,Katherine,R. +6,Sarah,R. +7,Martin,M. +8,Frank,R. +9,Jennifer,F. +10,Henry,W. +11,Fred,S. +12,Amy,D. +13,Kathleen,M. +14,Steve,F. +15,Teresa,H. +16,Amanda,H. +17,Kimberly,R. +18,Johnny,K. +19,Virginia,F. +20,Anna,A. +21,Willie,H. +22,Sean,H. +23,Mildred,A. +24,David,G. +25,Victor,H. +26,Aaron,R. +27,Benjamin,B. +28,Lisa,W. +29,Benjamin,K. +30,Christina,W. +31,Jane,G. +32,Thomas,O. +33,Katherine,M. +34,Jennifer,S. +35,Sara,T. +36,Harold,O. +37,Shirley,J. +38,Dennis,J. +39,Louise,W. +40,Maria,A. +41,Gloria,C. +42,Diana,S. +43,Kelly,N. +44,Jane,R. +45,Scott,B. +46,Norma,C. +47,Marie,P. +48,Lillian,C. +49,Judy,N. +50,Billy,L. +51,Howard,R. +52,Laura,F. +53,Anne,B. +54,Rose,M. +55,Nicholas,R. +56,Joshua,K. +57,Paul,W. +58,Kathryn,K. +59,Adam,A. +60,Norma,W. +61,Timothy,R. +62,Elizabeth,P. +63,Edward,G. +64,David,C. +65,Brenda,W. +66,Adam,W. +67,Michael,H. +68,Jesse,E. +69,Janet,P. +70,Helen,F. +71,Gerald,C. +72,Kathryn,O. +73,Alan,B. +74,Harry,A. +75,Andrea,H. +76,Barbara,W. +77,Anne,W. +78,Harry,H. +79,Jack,R. +80,Phillip,H. +81,Shirley,H. +82,Arthur,D. +83,Virginia,R. +84,Christina,R. +85,Theresa,M. +86,Jason,C. +87,Phillip,B. +88,Adam,T. +89,Margaret,J. +90,Paul,P. +91,Todd,W. +92,Willie,O. +93,Frances,R. +94,Gregory,H. +95,Lisa,P. +96,Jacqueline,A. +97,Shirley,D. +98,Nicole,M. +99,Mary,G. +100,Jean,M. \ No newline at end of file diff --git a/dbt/altered_jaffle_shop/seeds/raw_orders.csv b/dbt/altered_jaffle_shop/seeds/raw_orders.csv new file mode 100644 index 0000000..45160e1 --- /dev/null +++ b/dbt/altered_jaffle_shop/seeds/raw_orders.csv @@ -0,0 +1,100 @@ +id,user_id,order_date,status +1,1,2018-01-01,returned +2,3,2018-01-02,completed +3,94,2018-01-04,completed +4,50,2018-01-05,completed +5,64,2018-01-05,completed +6,54,2018-01-07,completed +7,88,2018-01-09,completed +8,2,2018-01-11,returned +9,53,2018-01-12,completed +10,7,2018-01-14,completed +11,99,2018-01-14,completed +12,59,2018-01-15,completed +13,84,2018-01-17,completed +14,40,2018-01-17,returned +15,25,2018-01-17,completed +16,39,2018-01-18,completed +17,71,2018-01-18,completed +18,64,2018-01-20,returned +19,54,2018-01-22,completed +20,20,2018-01-23,completed +21,71,2018-01-23,completed +22,86,2018-01-24,completed +23,22,2018-01-26,return_pending +24,3,2018-01-27,completed +25,51,2018-01-28,completed +26,32,2018-01-28,completed +27,94,2018-01-29,completed +28,8,2018-01-29,completed +29,57,2018-01-31,completed +30,69,2018-02-02,completed +31,16,2018-02-02,completed +32,28,2018-02-04,completed +33,42,2018-02-04,completed +34,38,2018-02-06,completed +35,80,2018-02-08,completed +36,85,2018-02-10,completed +37,1,2018-02-10,completed +38,51,2018-02-10,completed +39,26,2018-02-11,completed +40,33,2018-02-13,completed +41,99,2018-02-14,completed +42,92,2018-02-16,completed +43,31,2018-02-17,completed +44,66,2018-02-17,completed +45,22,2018-02-17,completed +46,6,2018-02-19,completed +47,50,2018-02-20,completed +48,27,2018-02-21,completed +49,35,2018-02-21,completed +50,51,2018-02-23,completed +51,71,2018-02-24,completed +52,54,2018-02-25,return_pending +53,34,2018-02-26,completed +54,54,2018-02-26,completed +55,18,2018-02-27,completed +56,79,2018-02-28,completed +57,93,2018-03-01,completed +58,22,2018-03-01,completed +59,30,2018-03-02,completed +60,12,2018-03-03,completed +61,63,2018-03-03,completed +62,57,2018-03-05,completed +63,70,2018-03-06,completed +64,13,2018-03-07,completed +65,26,2018-03-08,completed +66,36,2018-03-10,completed +67,79,2018-03-11,completed +68,53,2018-03-11,completed +69,3,2018-03-11,completed +70,8,2018-03-12,completed +71,42,2018-03-12,shipped +72,30,2018-03-14,shipped +73,19,2018-03-16,completed +74,9,2018-03-17,shipped +75,69,2018-03-18,completed +76,25,2018-03-20,completed +77,35,2018-03-21,shipped +78,90,2018-03-23,shipped +79,52,2018-03-23,shipped +80,11,2018-03-23,shipped +81,76,2018-03-23,shipped +82,46,2018-03-24,shipped +83,54,2018-03-24,shipped +84,70,2018-03-26,placed +85,47,2018-03-26,shipped +86,68,2018-03-26,placed +87,46,2018-03-27,placed +88,91,2018-03-27,shipped +89,21,2018-03-28,placed +90,66,2018-03-30,shipped +91,47,2018-03-31,placed +92,84,2018-04-02,placed +93,66,2018-04-03,placed +94,63,2018-04-03,placed +95,27,2018-04-04,placed +96,90,2018-04-06,placed +97,89,2018-04-07,placed +98,41,2018-04-07,placed +99,85,2018-04-09,placed \ No newline at end of file diff --git a/dbt/altered_jaffle_shop/seeds/raw_payments.csv b/dbt/altered_jaffle_shop/seeds/raw_payments.csv new file mode 100644 index 0000000..3989cb2 --- /dev/null +++ b/dbt/altered_jaffle_shop/seeds/raw_payments.csv @@ -0,0 +1,114 @@ +id,order_id,payment_method,amount +1,1,credit_card,1000 +2,2,credit_card,2000 +3,3,coupon,100 +4,4,coupon,2500 +5,5,bank_transfer,1700 +6,6,credit_card,600 +7,7,credit_card,1600 +8,8,credit_card,2300 +9,9,gift_card,2300 +10,9,bank_transfer,0 +11,10,bank_transfer,2600 +12,11,credit_card,2700 +13,12,credit_card,100 +14,13,credit_card,500 +15,13,bank_transfer,1400 +16,14,bank_transfer,300 +17,15,coupon,2200 +18,16,credit_card,1000 +19,17,bank_transfer,200 +20,18,credit_card,500 +21,18,credit_card,800 +22,19,gift_card,600 +23,20,bank_transfer,1500 +24,21,credit_card,1200 +25,22,bank_transfer,800 +26,23,gift_card,2300 +27,24,coupon,2600 +28,25,bank_transfer,2000 +29,25,credit_card,2200 +30,25,coupon,1600 +31,26,credit_card,3000 +32,27,credit_card,2300 +33,28,bank_transfer,1900 +34,29,bank_transfer,1200 +35,30,credit_card,1300 +36,31,credit_card,1200 +37,32,credit_card,300 +38,33,credit_card,2200 +39,34,bank_transfer,1500 +40,35,credit_card,2900 +41,36,bank_transfer,900 +42,37,credit_card,2300 +43,38,credit_card,1500 +44,39,bank_transfer,800 +45,40,credit_card,1400 +46,41,credit_card,1700 +47,42,coupon,1700 +48,43,gift_card,1800 +49,44,gift_card,1100 +50,45,bank_transfer,500 +51,46,bank_transfer,800 +52,47,credit_card,2200 +53,48,bank_transfer,300 +54,49,credit_card,600 +55,49,credit_card,900 +56,50,credit_card,2600 +57,51,credit_card,2900 +58,51,credit_card,100 +59,52,bank_transfer,1500 +60,53,credit_card,300 +61,54,credit_card,1800 +62,54,bank_transfer,1100 +63,55,credit_card,2900 +64,56,credit_card,400 +65,57,bank_transfer,200 +66,58,coupon,1800 +67,58,gift_card,600 +68,59,gift_card,2800 +69,60,credit_card,400 +70,61,bank_transfer,1600 +71,62,gift_card,1400 +72,63,credit_card,2900 +73,64,bank_transfer,2600 +74,65,credit_card,0 +75,66,credit_card,2800 +76,67,bank_transfer,400 +77,67,credit_card,1900 +78,68,credit_card,1600 +79,69,credit_card,1900 +80,70,credit_card,2600 +81,71,credit_card,500 +82,72,credit_card,2900 +83,73,bank_transfer,300 +84,74,credit_card,3000 +85,75,credit_card,1900 +86,76,coupon,200 +87,77,credit_card,0 +88,77,bank_transfer,1900 +89,78,bank_transfer,2600 +90,79,credit_card,1800 +91,79,credit_card,900 +92,80,gift_card,300 +93,81,coupon,200 +94,82,credit_card,800 +95,83,credit_card,100 +96,84,bank_transfer,2500 +97,85,bank_transfer,1700 +98,86,coupon,2300 +99,87,gift_card,3000 +100,87,credit_card,2600 +101,88,credit_card,2900 +102,89,bank_transfer,2200 +103,90,bank_transfer,200 +104,91,credit_card,1900 +105,92,bank_transfer,1500 +106,92,coupon,200 +107,93,gift_card,2600 +108,94,coupon,700 +109,95,coupon,2400 +110,96,gift_card,1700 +111,97,bank_transfer,1400 +112,98,bank_transfer,1000 +113,99,credit_card,2400 \ No newline at end of file diff --git a/dbt/altered_jaffle_shop/snapshots/.gitkeep b/dbt/altered_jaffle_shop/snapshots/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/dbt/altered_jaffle_shop/tests/.gitkeep b/dbt/altered_jaffle_shop/tests/.gitkeep new file mode 100644 index 0000000..e69de29 From 6f139dd9a690974cc39576ded3a31ed444f677a2 Mon Sep 17 00:00:00 2001 From: Pankaj Singh Date: Tue, 28 Apr 2026 15:26:39 +0530 Subject: [PATCH 2/9] Apply review feedback --- README.md | 4 +++- benchmark/auto_generate_models.sh | 9 +++++++-- dags/cosmos_async_dag.py | 16 ++++++++-------- dbt/altered_jaffle_shop/dbt_project.yml | 14 -------------- dbt/altered_jaffle_shop/models/customers.sql | 2 +- dbt/altered_jaffle_shop/models/docs.md | 2 +- dbt/altered_jaffle_shop/models/schema.yml | 9 --------- dbt/altered_jaffle_shop/profiles.yml | 5 ++--- include/constants.py | 4 ++++ 9 files changed, 26 insertions(+), 39 deletions(-) mode change 100644 => 100755 benchmark/auto_generate_models.sh diff --git a/README.md b/README.md index 88a4781..442eeec 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,9 @@ This project contains the following files and folders: * 2 seeds * 52 sources * 185 models - * [altered_jaffle_shop](https://github.com/astronomer/cosmos-benchmark/tree/main/dbt/altered_jaffle_shop): A custom dbt project designed to pre-compile and focus on specific custom models without any dbt-generated metadata dependencies. This project is based on a copy of the jaffle-shop project, with an additional four models that require significant time to run, enabling measurement of async DAG performance. The project includes one seed file — you can increase model transformation time by updating the values in this seed. More models can be generated automatically by running the following bash command: `sh benchmark/auto_generate_models.sh 2`. + * [altered_jaffle_shop](https://github.com/astronomer/cosmos-benchmark/tree/main/dbt/altered_jaffle_shop): A custom dbt project designed to pre-compile dbt project and focus on specific custom models without any dbt-generated metadata dependencies. This project is based on a copy of the jaffle-shop project, with an additional four models that require significant time to run, enabling measurement of async DAG performance. + * The project includes one addition seed file (model_params.csv) — you can increase model transformation time by updating the values in this seed. + * More models can be generated automatically by running the bash command: `./benchmark/auto_generate_models.sh 2` (Here input 2 means generate 2 addition model for each custom model i.e generate 8 additional model). - **Dockerfile**: This file contains a versioned Astro Runtime Docker image that provides a differentiated Airflow experience. - **include**: This folder contains any additional files that you want to include as part of your project. In this particular case, it contains configuration files. - **packages.txt**: Install OS-level packages needed for your project by adding them to this file. It is empty by default. diff --git a/benchmark/auto_generate_models.sh b/benchmark/auto_generate_models.sh old mode 100644 new mode 100755 index 02dc3bb..0b44353 --- a/benchmark/auto_generate_models.sh +++ b/benchmark/auto_generate_models.sh @@ -1,3 +1,8 @@ +#!/usr/bin/env bash +set -euo pipefail + +: "${1:?Usage: $0 }" + slow_models=( "dbt/altered_jaffle_shop/models/customers_slow_query.sql" "dbt/altered_jaffle_shop/models/long_model_cross_random.sql" @@ -6,7 +11,7 @@ slow_models=( ) for file in "${slow_models[@]}"; do - for i in $(seq 1 "$1"); do - cp "$file" "${file%.sql}${i}.sql" + for ((i = 1; i <= $1; i++)); do + cp -n "$file" "${file%.sql}${i}.sql" done done diff --git a/dags/cosmos_async_dag.py b/dags/cosmos_async_dag.py index 8c13078..ca8e603 100644 --- a/dags/cosmos_async_dag.py +++ b/dags/cosmos_async_dag.py @@ -2,24 +2,24 @@ from datetime import datetime from pathlib import Path -from cosmos import DbtDag, ExecutionConfig, ExecutionMode, ProfileConfig, ProjectConfig, RenderConfig +from cosmos import DbtDag, ExecutionConfig, ExecutionMode, ProfileConfig, ProjectConfig from cosmos.profiles import GoogleCloudServiceAccountDictProfileMapping +from include.constants import BIGQUERY_DATASET, DBT_ADAPTER_VERSION, GCP_PROJECT_ID DBT_PROJECT_PATH = Path("/usr/local/airflow/dbt/altered_jaffle_shop") -DBT_ADAPTER_VERSION = os.getenv("DBT_ADAPTER_VERSION", "1.9") + profile_config = ProfileConfig( - profile_name="default", + profile_name="altered_jaffle_shop", target_name="dev", profile_mapping=GoogleCloudServiceAccountDictProfileMapping( - conn_id="gcp_gs_conn", profile_args={"dataset": "test_async", "project": "astronomer-airflow-providers"} + conn_id="gcp_gs_conn", profile_args={"dataset": BIGQUERY_DATASET, "project": GCP_PROJECT_ID} ), ) -# [START airflow_async_execution_mode_example] -simple_dag_async = DbtDag( +cosmos_bq_async = DbtDag( # dbt/cosmos-specific parameters project_config=ProjectConfig(DBT_PROJECT_PATH), profile_config=profile_config, @@ -29,9 +29,9 @@ ), # normal dag parameters schedule=None, - start_date=datetime(2023, 1, 1), + start_date=datetime(2026, 1, 1), catchup=False, - dag_id="simple_dag_async", + dag_id="cosmos_bq_async", tags=["simple"], operator_args={ "location": "US", diff --git a/dbt/altered_jaffle_shop/dbt_project.yml b/dbt/altered_jaffle_shop/dbt_project.yml index b1376d5..11c4688 100644 --- a/dbt/altered_jaffle_shop/dbt_project.yml +++ b/dbt/altered_jaffle_shop/dbt_project.yml @@ -9,7 +9,6 @@ version: '1.0.0' profile: 'altered_jaffle_shop' # These configurations specify where dbt should look for different types of files. -# The `model-paths` config, for example, states that models in this project can be # found in the "models/" directory. You probably won't need to change these! model-paths: ["models"] analysis-paths: ["analyses"] @@ -21,16 +20,3 @@ snapshot-paths: ["snapshots"] clean-targets: # directories to be removed by `dbt clean` - "target" - "dbt_packages" - - -# Configuring models -# Full documentation: https://docs.getdbt.com/docs/configuring-models - -# In this example config, we tell dbt to build all models in the example/ -# directory as views. These settings can be overridden in the individual model -# files using the `{{ config(...) }}` macro. -models: - altered_jaffle_shop: - # Config indicated by + and applies to all files under models/example/ - example: - +materialized: view diff --git a/dbt/altered_jaffle_shop/models/customers.sql b/dbt/altered_jaffle_shop/models/customers.sql index e4bf27b..b5e0f79 100644 --- a/dbt/altered_jaffle_shop/models/customers.sql +++ b/dbt/altered_jaffle_shop/models/customers.sql @@ -56,7 +56,7 @@ final as ( customer_orders.first_order, customer_orders.most_recent_order, customer_orders.number_of_orders, - customer_payments.total_amount as customer_lifetime_value + customer_payments.total_amount as total_order_amount from customers diff --git a/dbt/altered_jaffle_shop/models/docs.md b/dbt/altered_jaffle_shop/models/docs.md index c6ae93b..c4354bc 100644 --- a/dbt/altered_jaffle_shop/models/docs.md +++ b/dbt/altered_jaffle_shop/models/docs.md @@ -5,7 +5,7 @@ Orders can be one of the following statuses: | status | description | |----------------|------------------------------------------------------------------------------------------------------------------------| | placed | The order has been placed but has not yet left the warehouse | -| shipped | The order has ben shipped to the customer and is currently in transit | +| shipped | The order has been shipped to the customer and is currently in transit | | completed | The order has been received by the customer | | return_pending | The customer has indicated that they would like to return the order, but it has not yet been received at the warehouse | | returned | The order has been returned by the customer and received at the warehouse | diff --git a/dbt/altered_jaffle_shop/models/schema.yml b/dbt/altered_jaffle_shop/models/schema.yml index 3059bab..8d59ac3 100644 --- a/dbt/altered_jaffle_shop/models/schema.yml +++ b/dbt/altered_jaffle_shop/models/schema.yml @@ -39,15 +39,6 @@ models: - not_null description: This is a unique identifier for an order - # Comment so we don't have a standalone test relationships_orders_customer_id__customer_id__ref_customers__test - #- name: customer_id - # description: Foreign key to the customers table - # tests: - # - not_null - # - relationships: - # to: ref('customers') - # field: customer_id - - name: order_date description: Date (UTC) that the order was placed diff --git a/dbt/altered_jaffle_shop/profiles.yml b/dbt/altered_jaffle_shop/profiles.yml index c34b3bf..f851212 100644 --- a/dbt/altered_jaffle_shop/profiles.yml +++ b/dbt/altered_jaffle_shop/profiles.yml @@ -4,8 +4,7 @@ altered_jaffle_shop: dev: type: bigquery method: service-account - project: astronomer-airflow-providers - dataset: test_async + project: astronomer-dag-authoring + dataset: cosmos_async_bechmark_test threads: 4 # Must be a value of 1 or greater keyfile: # /PATH/TO/BIGQUERY/keyfile.json - OPTIONAL_CONFIG: VALUE \ No newline at end of file diff --git a/include/constants.py b/include/constants.py index 6bedbd7..a1c7788 100644 --- a/include/constants.py +++ b/include/constants.py @@ -1,8 +1,12 @@ "Contains constants used in the DAGs" +import os from pathlib import Path from cosmos import ExecutionConfig +DBT_ADAPTER_VERSION = os.getenv("DBT_ADAPTER_VERSION", "1.9") +GCP_PROJECT_ID = os.getenv("GCP_PROJECT_ID", "astronomer-dag-authoring") +BIGQUERY_DATASET = os.getenv("BIGQUERY_DATASET", "cosmos_async_bechmark_test") dbt_executable = Path("/usr/local/airflow/dbt_venv/bin/dbt") venv_execution_config = ExecutionConfig( From fde45dcf1351b3eaea87c19f3ef9c44f55b83924 Mon Sep 17 00:00:00 2001 From: Pankaj Singh Date: Tue, 28 Apr 2026 15:36:41 +0530 Subject: [PATCH 3/9] Ignore .user.yml and remove it from tracking Co-Authored-By: Claude Sonnet 4.6 --- .gitignore | 1 + dbt/altered_jaffle_shop/.user.yml | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) delete mode 100644 dbt/altered_jaffle_shop/.user.yml diff --git a/.gitignore b/.gitignore index 595a233..98c00c8 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ __pycache__ benchmark/pre-process/key.json dbt/logs +dbt/altered_jaffle_shop/.user.yml diff --git a/dbt/altered_jaffle_shop/.user.yml b/dbt/altered_jaffle_shop/.user.yml deleted file mode 100644 index 6299459..0000000 --- a/dbt/altered_jaffle_shop/.user.yml +++ /dev/null @@ -1 +0,0 @@ -id: d59e111f-6b74-4d7c-b380-61cabb5754b0 From cab79b11ca8fe81f679f0f489f8a3feda1c88888 Mon Sep 17 00:00:00 2001 From: Pankaj Singh Date: Wed, 29 Apr 2026 18:32:53 +0530 Subject: [PATCH 4/9] Add kube script --- benchmark/Dockerfile | 1 + .../experiment/airflow-test-cosmos-async.yaml | 28 +++++++++++++++++++ 2 files changed, 29 insertions(+) create mode 100644 benchmark/experiment/airflow-test-cosmos-async.yaml diff --git a/benchmark/Dockerfile b/benchmark/Dockerfile index fcc46e2..cdb26c8 100644 --- a/benchmark/Dockerfile +++ b/benchmark/Dockerfile @@ -28,6 +28,7 @@ WORKDIR /app # Copy project files into the container COPY dbt/fhir-dbt-analytics /app +COPY dbt/altered_jaffle_shop /app COPY benchmark/pre-process/profiles.yml /app/profiles.yml COPY benchmark/pre-process/key.json /app/key.json diff --git a/benchmark/experiment/airflow-test-cosmos-async.yaml b/benchmark/experiment/airflow-test-cosmos-async.yaml new file mode 100644 index 0000000..a46c68a --- /dev/null +++ b/benchmark/experiment/airflow-test-cosmos-async.yaml @@ -0,0 +1,28 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: airflow-test-cosmos-async +spec: + template: + spec: + containers: + - name: airflow + image: cosmos-benchmark:0.0.3 + imagePullPolicy: Never + command: ["airflow", "dags", "test", "cosmos_bq_async"] + env: + - name: AIRFLOW_CONN_GCP_GS_CONN + valueFrom: + secretKeyRef: + name: gcp-credentials + key: airflow-conn + resources: + # Equivalent to Astro's A10 instance + requests: + cpu: "2" + memory: "4Gi" + limits: + cpu: "2" + memory: "4Gi" + restartPolicy: Never + backoffLimit: 0 From 820a027c73a96448c96241efee6b20e344dbb499 Mon Sep 17 00:00:00 2001 From: Pankaj Singh Date: Thu, 30 Apr 2026 18:02:13 +0530 Subject: [PATCH 5/9] Disable dataset --- Dockerfile | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Dockerfile b/Dockerfile index b52d7b3..8800609 100644 --- a/Dockerfile +++ b/Dockerfile @@ -25,5 +25,11 @@ RUN pydantic==2.11.0 #ENV OPENLINEAGE_DISABLED=True ENV AIRFLOW__CORE__LOAD_EXAMPLES=False ENV AIRFLOW__CORE__TEST_CONNECTION=Enabled +# In async DAG getting error +# sqlalchemy.exc.PendingRollbackError: This Session's transaction has been rolled back due to a previous exception during flush. +# To begin a new transaction with this Session, first issue Session.rollback(). Original exception was: Can't flush None value found in collection DatasetModel.aliases (Background on this error at: https://sqlalche.me/e/14/7s2a) +ENV AIRFLOW__COSMOS__ENABLE_DATASET_ALIAS=False USER astro + +RUN airflow db migrate From 1dc1ab5628cdd1e43836a220310a807bd0970912 Mon Sep 17 00:00:00 2001 From: Pankaj Singh Date: Thu, 30 Apr 2026 18:44:48 +0530 Subject: [PATCH 6/9] Delete unused macros --- dbt/altered_jaffle_shop/macros/get_model_param.sql | 9 --------- 1 file changed, 9 deletions(-) delete mode 100644 dbt/altered_jaffle_shop/macros/get_model_param.sql diff --git a/dbt/altered_jaffle_shop/macros/get_model_param.sql b/dbt/altered_jaffle_shop/macros/get_model_param.sql deleted file mode 100644 index 1292a2c..0000000 --- a/dbt/altered_jaffle_shop/macros/get_model_param.sql +++ /dev/null @@ -1,9 +0,0 @@ -{% macro get_model_param(model_name) %} - {% set seeds = load_seed('config') %} - {% for row in seeds %} - {% if row['model_name'] == model_name %} - {% do return(row) %} - {% endif %} - {% endfor %} - {% do exceptions.raise_compiler_error("Model '" ~ model_name ~ "' not found in seed 'config'") %} -{% endmacro %} From b7faf0088a105bb7be84b3057699da4731ffd4cd Mon Sep 17 00:00:00 2001 From: Pankaj Singh <98807258+pankajastro@users.noreply.github.com> Date: Thu, 30 Apr 2026 18:46:37 +0530 Subject: [PATCH 7/9] Apply suggestion from @pankajkoti Co-authored-by: Pankaj Koti --- dags/cosmos_async_dag.py | 1 - 1 file changed, 1 deletion(-) diff --git a/dags/cosmos_async_dag.py b/dags/cosmos_async_dag.py index ca8e603..56e0e17 100644 --- a/dags/cosmos_async_dag.py +++ b/dags/cosmos_async_dag.py @@ -1,4 +1,3 @@ -import os from datetime import datetime from pathlib import Path From db3babacd4976786b1f4d71f71493382931cac4d Mon Sep 17 00:00:00 2001 From: Pankaj Singh <98807258+pankajastro@users.noreply.github.com> Date: Thu, 30 Apr 2026 18:47:24 +0530 Subject: [PATCH 8/9] Apply suggestion from @pankajkoti Co-authored-by: Pankaj Koti --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 8800609..e09f272 100644 --- a/Dockerfile +++ b/Dockerfile @@ -20,7 +20,7 @@ RUN pip install "dbt-core<1.10" RUN pip install dbt-bigquery RUN pip install dbt-postgres RUN pip install dbt-snowflake -RUN pydantic==2.11.0 +RUN pip install pydantic==2.11.0 #ENV OPENLINEAGE_DISABLED=True ENV AIRFLOW__CORE__LOAD_EXAMPLES=False From fe9a7d0f8f27ab4f42a4a331785c7d73c453c94e Mon Sep 17 00:00:00 2001 From: Pankaj Singh <98807258+pankajastro@users.noreply.github.com> Date: Thu, 30 Apr 2026 18:49:24 +0530 Subject: [PATCH 9/9] Apply suggestion from @pankajkoti Co-authored-by: Pankaj Koti --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 442eeec..3605a79 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ This project contains the following files and folders: * 185 models * [altered_jaffle_shop](https://github.com/astronomer/cosmos-benchmark/tree/main/dbt/altered_jaffle_shop): A custom dbt project designed to pre-compile dbt project and focus on specific custom models without any dbt-generated metadata dependencies. This project is based on a copy of the jaffle-shop project, with an additional four models that require significant time to run, enabling measurement of async DAG performance. * The project includes one addition seed file (model_params.csv) — you can increase model transformation time by updating the values in this seed. - * More models can be generated automatically by running the bash command: `./benchmark/auto_generate_models.sh 2` (Here input 2 means generate 2 addition model for each custom model i.e generate 8 additional model). + * More models can be generated automatically by running the bash command: `./benchmark/auto_generate_models.sh 2` (Here, input 2 means generate 2 additional models for each custom model, i.e., generate 8 additional models). - **Dockerfile**: This file contains a versioned Astro Runtime Docker image that provides a differentiated Airflow experience. - **include**: This folder contains any additional files that you want to include as part of your project. In this particular case, it contains configuration files. - **packages.txt**: Install OS-level packages needed for your project by adding them to this file. It is empty by default.