diff --git a/.gitignore b/.gitignore index 595a233..98c00c8 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ __pycache__ benchmark/pre-process/key.json dbt/logs +dbt/altered_jaffle_shop/.user.yml diff --git a/Dockerfile b/Dockerfile index b52d7b3..e09f272 100644 --- a/Dockerfile +++ b/Dockerfile @@ -20,10 +20,16 @@ RUN pip install "dbt-core<1.10" RUN pip install dbt-bigquery RUN pip install dbt-postgres RUN pip install dbt-snowflake -RUN pydantic==2.11.0 +RUN pip install pydantic==2.11.0 #ENV OPENLINEAGE_DISABLED=True ENV AIRFLOW__CORE__LOAD_EXAMPLES=False ENV AIRFLOW__CORE__TEST_CONNECTION=Enabled +# In async DAG getting error +# sqlalchemy.exc.PendingRollbackError: This Session's transaction has been rolled back due to a previous exception during flush. +# To begin a new transaction with this Session, first issue Session.rollback(). Original exception was: Can't flush None value found in collection DatasetModel.aliases (Background on this error at: https://sqlalche.me/e/14/7s2a) +ENV AIRFLOW__COSMOS__ENABLE_DATASET_ALIAS=False USER astro + +RUN airflow db migrate diff --git a/README.md b/README.md index 29e4822..3605a79 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,9 @@ This project contains the following files and folders: * 2 seeds * 52 sources * 185 models + * [altered_jaffle_shop](https://github.com/astronomer/cosmos-benchmark/tree/main/dbt/altered_jaffle_shop): A custom dbt project designed to pre-compile dbt project and focus on specific custom models without any dbt-generated metadata dependencies. This project is based on a copy of the jaffle-shop project, with an additional four models that require significant time to run, enabling measurement of async DAG performance. + * The project includes one addition seed file (model_params.csv) — you can increase model transformation time by updating the values in this seed. + * More models can be generated automatically by running the bash command: `./benchmark/auto_generate_models.sh 2` (Here, input 2 means generate 2 additional models for each custom model, i.e., generate 8 additional models). - **Dockerfile**: This file contains a versioned Astro Runtime Docker image that provides a differentiated Airflow experience. - **include**: This folder contains any additional files that you want to include as part of your project. In this particular case, it contains configuration files. - **packages.txt**: Install OS-level packages needed for your project by adding them to this file. It is empty by default. diff --git a/benchmark/Dockerfile b/benchmark/Dockerfile index fcc46e2..cdb26c8 100644 --- a/benchmark/Dockerfile +++ b/benchmark/Dockerfile @@ -28,6 +28,7 @@ WORKDIR /app # Copy project files into the container COPY dbt/fhir-dbt-analytics /app +COPY dbt/altered_jaffle_shop /app COPY benchmark/pre-process/profiles.yml /app/profiles.yml COPY benchmark/pre-process/key.json /app/key.json diff --git a/benchmark/auto_generate_models.sh b/benchmark/auto_generate_models.sh new file mode 100755 index 0000000..0b44353 --- /dev/null +++ b/benchmark/auto_generate_models.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash +set -euo pipefail + +: "${1:?Usage: $0 }" + +slow_models=( + "dbt/altered_jaffle_shop/models/customers_slow_query.sql" + "dbt/altered_jaffle_shop/models/long_model_cross_random.sql" + "dbt/altered_jaffle_shop/models/long_model_subquery_windows.sql" + "dbt/altered_jaffle_shop/models/long_model_text_processing.sql" +) + +for file in "${slow_models[@]}"; do + for ((i = 1; i <= $1; i++)); do + cp -n "$file" "${file%.sql}${i}.sql" + done +done diff --git a/benchmark/experiment/airflow-test-cosmos-async.yaml b/benchmark/experiment/airflow-test-cosmos-async.yaml new file mode 100644 index 0000000..a46c68a --- /dev/null +++ b/benchmark/experiment/airflow-test-cosmos-async.yaml @@ -0,0 +1,28 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: airflow-test-cosmos-async +spec: + template: + spec: + containers: + - name: airflow + image: cosmos-benchmark:0.0.3 + imagePullPolicy: Never + command: ["airflow", "dags", "test", "cosmos_bq_async"] + env: + - name: AIRFLOW_CONN_GCP_GS_CONN + valueFrom: + secretKeyRef: + name: gcp-credentials + key: airflow-conn + resources: + # Equivalent to Astro's A10 instance + requests: + cpu: "2" + memory: "4Gi" + limits: + cpu: "2" + memory: "4Gi" + restartPolicy: Never + backoffLimit: 0 diff --git a/dags/cosmos_async_dag.py b/dags/cosmos_async_dag.py new file mode 100644 index 0000000..56e0e17 --- /dev/null +++ b/dags/cosmos_async_dag.py @@ -0,0 +1,40 @@ +from datetime import datetime +from pathlib import Path + +from cosmos import DbtDag, ExecutionConfig, ExecutionMode, ProfileConfig, ProjectConfig +from cosmos.profiles import GoogleCloudServiceAccountDictProfileMapping +from include.constants import BIGQUERY_DATASET, DBT_ADAPTER_VERSION, GCP_PROJECT_ID + +DBT_PROJECT_PATH = Path("/usr/local/airflow/dbt/altered_jaffle_shop") + + + +profile_config = ProfileConfig( + profile_name="altered_jaffle_shop", + target_name="dev", + profile_mapping=GoogleCloudServiceAccountDictProfileMapping( + conn_id="gcp_gs_conn", profile_args={"dataset": BIGQUERY_DATASET, "project": GCP_PROJECT_ID} + ), +) + + +cosmos_bq_async = DbtDag( + # dbt/cosmos-specific parameters + project_config=ProjectConfig(DBT_PROJECT_PATH), + profile_config=profile_config, + execution_config=ExecutionConfig( + execution_mode=ExecutionMode.AIRFLOW_ASYNC, + async_py_requirements=[f"dbt-bigquery=={DBT_ADAPTER_VERSION}"], + ), + # normal dag parameters + schedule=None, + start_date=datetime(2026, 1, 1), + catchup=False, + dag_id="cosmos_bq_async", + tags=["simple"], + operator_args={ + "location": "US", + "install_deps": True, + "full_refresh": True, + }, +) diff --git a/dbt/altered_jaffle_shop/.gitignore b/dbt/altered_jaffle_shop/.gitignore new file mode 100644 index 0000000..49f147c --- /dev/null +++ b/dbt/altered_jaffle_shop/.gitignore @@ -0,0 +1,4 @@ + +target/ +dbt_packages/ +logs/ diff --git a/dbt/altered_jaffle_shop/README.md b/dbt/altered_jaffle_shop/README.md new file mode 100644 index 0000000..7874ac8 --- /dev/null +++ b/dbt/altered_jaffle_shop/README.md @@ -0,0 +1,15 @@ +Welcome to your new dbt project! + +### Using the starter project + +Try running the following commands: +- dbt run +- dbt test + + +### Resources: +- Learn more about dbt [in the docs](https://docs.getdbt.com/docs/introduction) +- Check out [Discourse](https://discourse.getdbt.com/) for commonly asked questions and answers +- Join the [chat](https://community.getdbt.com/) on Slack for live discussions and support +- Find [dbt events](https://events.getdbt.com) near you +- Check out [the blog](https://blog.getdbt.com/) for the latest news on dbt's development and best practices diff --git a/dbt/altered_jaffle_shop/analyses/.gitkeep b/dbt/altered_jaffle_shop/analyses/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/dbt/altered_jaffle_shop/dbt_project.yml b/dbt/altered_jaffle_shop/dbt_project.yml new file mode 100644 index 0000000..11c4688 --- /dev/null +++ b/dbt/altered_jaffle_shop/dbt_project.yml @@ -0,0 +1,22 @@ + +# Name your project! Project names should contain only lowercase characters +# and underscores. A good package name should reflect your organization's +# name or the intended use of these models +name: 'altered_jaffle_shop' +version: '1.0.0' + +# This setting configures which "profile" dbt uses for this project. +profile: 'altered_jaffle_shop' + +# These configurations specify where dbt should look for different types of files. +# found in the "models/" directory. You probably won't need to change these! +model-paths: ["models"] +analysis-paths: ["analyses"] +test-paths: ["tests"] +seed-paths: ["seeds"] +macro-paths: ["macros"] +snapshot-paths: ["snapshots"] + +clean-targets: # directories to be removed by `dbt clean` + - "target" + - "dbt_packages" diff --git a/dbt/altered_jaffle_shop/macros/.gitkeep b/dbt/altered_jaffle_shop/macros/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/dbt/altered_jaffle_shop/models/customers.sql b/dbt/altered_jaffle_shop/models/customers.sql new file mode 100644 index 0000000..b5e0f79 --- /dev/null +++ b/dbt/altered_jaffle_shop/models/customers.sql @@ -0,0 +1,71 @@ +{{ config(tags=["customers"]) }} + +with customers as ( + + select * from {{ ref('stg_customers') }} + +), + +orders as ( + + select * from {{ ref('stg_orders') }} + +), + +payments as ( + + select * from {{ ref('stg_payments') }} + +), + +customer_orders as ( + + select + customer_id, + + min(order_date) as first_order, + max(order_date) as most_recent_order, + count(order_id) as number_of_orders + from orders + + group by customer_id + +), + +customer_payments as ( + + select + orders.customer_id, + sum(amount) as total_amount + + from payments + + left join orders on + payments.order_id = orders.order_id + + group by orders.customer_id + +), + +final as ( + + select + customers.customer_id, + customers.first_name, + customers.last_name, + customer_orders.first_order, + customer_orders.most_recent_order, + customer_orders.number_of_orders, + customer_payments.total_amount as total_order_amount + + from customers + + left join customer_orders + on customers.customer_id = customer_orders.customer_id + + left join customer_payments + on customers.customer_id = customer_payments.customer_id + +) + +select * from final diff --git a/dbt/altered_jaffle_shop/models/customers_slow_query.sql b/dbt/altered_jaffle_shop/models/customers_slow_query.sql new file mode 100644 index 0000000..3427db8 --- /dev/null +++ b/dbt/altered_jaffle_shop/models/customers_slow_query.sql @@ -0,0 +1,37 @@ +{{ config( + materialized = "table" +) }} + +WITH params AS ( + SELECT array_x, array_y + FROM {{ ref('model_params') }} + WHERE model_name = 'customers_slow_query' + LIMIT 1 +), +base AS ( + SELECT * FROM {{ ref('customers') }} +), +expanded AS ( + SELECT + b.*, + x AS extra_x, + y AS extra_y + FROM base b + CROSS JOIN params p + CROSS JOIN UNNEST(GENERATE_ARRAY(1, p.array_x)) AS x + CROSS JOIN UNNEST(GENERATE_ARRAY(1, p.array_y)) AS y +), +windowed AS ( + SELECT + *, + AVG(LENGTH(CAST(customer_id AS STRING))) OVER ( + PARTITION BY customer_id + ORDER BY extra_x, extra_y + ) AS avg_len + FROM expanded +) +SELECT + customer_id, + SUM(avg_len) AS sum_len +FROM windowed +GROUP BY customer_id diff --git a/dbt/altered_jaffle_shop/models/docs.md b/dbt/altered_jaffle_shop/models/docs.md new file mode 100644 index 0000000..c4354bc --- /dev/null +++ b/dbt/altered_jaffle_shop/models/docs.md @@ -0,0 +1,14 @@ +{% docs orders_status %} + +Orders can be one of the following statuses: + +| status | description | +|----------------|------------------------------------------------------------------------------------------------------------------------| +| placed | The order has been placed but has not yet left the warehouse | +| shipped | The order has been shipped to the customer and is currently in transit | +| completed | The order has been received by the customer | +| return_pending | The customer has indicated that they would like to return the order, but it has not yet been received at the warehouse | +| returned | The order has been returned by the customer and received at the warehouse | + + +{% enddocs %} diff --git a/dbt/altered_jaffle_shop/models/long_model_cross_random.sql b/dbt/altered_jaffle_shop/models/long_model_cross_random.sql new file mode 100644 index 0000000..d7961b0 --- /dev/null +++ b/dbt/altered_jaffle_shop/models/long_model_cross_random.sql @@ -0,0 +1,30 @@ +{{ config( + materialized = "table" +) }} + +WITH params AS ( + SELECT array_x, array_y + FROM {{ ref('model_params') }} + WHERE model_name = 'long_model_cross_random' + LIMIT 1 +), +base AS ( + SELECT * FROM {{ ref('customers') }} +), +inflated AS ( + SELECT + b.customer_id, + x AS x_val, + y AS y_val, + RAND() * x * y AS random_val + FROM base b + CROSS JOIN params p + CROSS JOIN UNNEST(GENERATE_ARRAY(1, p.array_x)) AS x + CROSS JOIN UNNEST(GENERATE_ARRAY(1, p.array_y)) AS y +) +SELECT + customer_id, + COUNT(*) AS row_count, + AVG(random_val) AS avg_val +FROM inflated +GROUP BY customer_id diff --git a/dbt/altered_jaffle_shop/models/long_model_subquery_windows.sql b/dbt/altered_jaffle_shop/models/long_model_subquery_windows.sql new file mode 100644 index 0000000..b8d85d5 --- /dev/null +++ b/dbt/altered_jaffle_shop/models/long_model_subquery_windows.sql @@ -0,0 +1,55 @@ +{{ config( + materialized = "table" +) }} + +WITH params AS ( + SELECT array_x + FROM {{ ref('model_params') }} + WHERE model_name = 'long_model_subquery_windows' + LIMIT 1 +), + +-- Inflate base rows by duplicating each customer 100x +base AS ( + SELECT + c.customer_id, + d AS duplication_id + FROM {{ ref('customers') }} c + CROSS JOIN UNNEST(GENERATE_ARRAY(1, 100)) AS d +), + +-- Generate large expansion using params +expanded AS ( + SELECT + b.customer_id, + x AS factor, + POW(x, 0.5) AS sqrt_x, + SAFE_DIVIDE(x, NULLIF(MOD(x, 10), 0)) AS ratio, + LOG(x + 1) + SIN(x) + COS(x) AS expensive_math + FROM base b + CROSS JOIN params p + CROSS JOIN UNNEST(GENERATE_ARRAY(1, p.array_x)) AS x +), + +-- Complex windowing across a large range +windowed AS ( + SELECT + *, + AVG(expensive_math) OVER ( + PARTITION BY customer_id + ORDER BY factor + ROWS BETWEEN 10000 PRECEDING AND CURRENT ROW + ) AS moving_avg + FROM expanded +), + +aggregated AS ( + SELECT + customer_id, + SUM(moving_avg) AS total_avg, + COUNT(*) AS cnt + FROM windowed + GROUP BY customer_id +) + +SELECT * FROM aggregated diff --git a/dbt/altered_jaffle_shop/models/long_model_text_processing.sql b/dbt/altered_jaffle_shop/models/long_model_text_processing.sql new file mode 100644 index 0000000..95c44ed --- /dev/null +++ b/dbt/altered_jaffle_shop/models/long_model_text_processing.sql @@ -0,0 +1,41 @@ +{{ config( + materialized = "table" +) }} + +WITH params AS ( + SELECT array_x, array_y + FROM {{ ref('model_params') }} + WHERE model_name = 'long_model_text_processing' + LIMIT 1 +), +base AS ( + SELECT * FROM {{ ref('customers') }} +), +text_gen AS ( + SELECT + b.customer_id, + CONCAT('Customer_', CAST(x AS STRING), '_', CAST(y AS STRING)) AS tag, + REPEAT('A', MOD(x * y, 1000)) AS description + FROM base b + CROSS JOIN params p + CROSS JOIN UNNEST(GENERATE_ARRAY(1, p.array_x)) AS x + CROSS JOIN UNNEST(GENERATE_ARRAY(1, p.array_y)) AS y +), +analyzed AS ( + SELECT + customer_id, + LENGTH(tag) AS tag_len, + LENGTH(description) AS desc_len, + RANK() OVER (PARTITION BY customer_id ORDER BY LENGTH(description) DESC) AS rank_val + FROM text_gen +), +filtered AS ( + SELECT * FROM analyzed + WHERE rank_val <= 100 +) +SELECT + customer_id, + AVG(tag_len) AS avg_tag_len, + MAX(desc_len) AS max_desc_len +FROM filtered +GROUP BY customer_id diff --git a/dbt/altered_jaffle_shop/models/orders.sql b/dbt/altered_jaffle_shop/models/orders.sql new file mode 100644 index 0000000..cbb2934 --- /dev/null +++ b/dbt/altered_jaffle_shop/models/orders.sql @@ -0,0 +1,56 @@ +{% set payment_methods = ['credit_card', 'coupon', 'bank_transfer', 'gift_card'] %} + +with orders as ( + + select * from {{ ref('stg_orders') }} + +), + +payments as ( + + select * from {{ ref('stg_payments') }} + +), + +order_payments as ( + + select + order_id, + + {% for payment_method in payment_methods -%} + sum(case when payment_method = '{{ payment_method }}' then amount else 0 end) as {{ payment_method }}_amount, + {% endfor -%} + + sum(amount) as total_amount + + from payments + + group by order_id + +), + +final as ( + + select + orders.order_id, + orders.customer_id, + orders.order_date, + orders.status, + + {% for payment_method in payment_methods -%} + + order_payments.{{ payment_method }}_amount, + + {% endfor -%} + + order_payments.total_amount as amount + + from orders + + + left join order_payments + on orders.order_id = order_payments.order_id + +) + +select * from final diff --git a/dbt/altered_jaffle_shop/models/schema.yml b/dbt/altered_jaffle_shop/models/schema.yml new file mode 100644 index 0000000..8d59ac3 --- /dev/null +++ b/dbt/altered_jaffle_shop/models/schema.yml @@ -0,0 +1,74 @@ +version: 2 + +models: + - name: customers + description: This table has basic information about a customer, as well as some derived facts based on a customer's orders + + columns: + - name: customer_id + description: This is a unique identifier for a customer + tests: + - unique + - not_null + + - name: first_name + description: Customer's first name. PII. + + - name: last_name + description: Customer's last name. PII. + + - name: first_order + description: Date (UTC) of a customer's first order + + - name: most_recent_order + description: Date (UTC) of a customer's most recent order + + - name: number_of_orders + description: Count of the number of orders a customer has placed + + - name: total_order_amount + description: Total value (AUD) of a customer's orders + + - name: orders + description: This table has basic information about orders, as well as some derived facts based on payments + + columns: + - name: order_id + tests: + - unique + - not_null + description: This is a unique identifier for an order + + - name: order_date + description: Date (UTC) that the order was placed + + - name: status + description: '{{ doc("orders_status") }}' + tests: + - accepted_values: + values: ['placed', 'shipped', 'completed', 'return_pending', 'returned'] + + - name: amount + description: Total amount (AUD) of the order + tests: + - not_null + + - name: credit_card_amount + description: Amount of the order (AUD) paid for by credit card + tests: + - not_null + + - name: coupon_amount + description: Amount of the order (AUD) paid for by coupon + tests: + - not_null + + - name: bank_transfer_amount + description: Amount of the order (AUD) paid for by bank transfer + tests: + - not_null + + - name: gift_card_amount + description: Amount of the order (AUD) paid for by gift card + tests: + - not_null \ No newline at end of file diff --git a/dbt/altered_jaffle_shop/models/staging/stg_customers.sql b/dbt/altered_jaffle_shop/models/staging/stg_customers.sql new file mode 100644 index 0000000..cad0472 --- /dev/null +++ b/dbt/altered_jaffle_shop/models/staging/stg_customers.sql @@ -0,0 +1,22 @@ +with source as ( + + {#- + Normally we would select from the table here, but we are using seeds to load + our data in this project + #} + select * from {{ ref('raw_customers') }} + +), + +renamed as ( + + select + id as customer_id, + first_name, + last_name + + from source + +) + +select * from renamed diff --git a/dbt/altered_jaffle_shop/models/staging/stg_orders.sql b/dbt/altered_jaffle_shop/models/staging/stg_orders.sql new file mode 100644 index 0000000..a654dcb --- /dev/null +++ b/dbt/altered_jaffle_shop/models/staging/stg_orders.sql @@ -0,0 +1,23 @@ +with source as ( + + {#- + Normally we would select from the table here, but we are using seeds to load + our data in this project + #} + select * from {{ ref('raw_orders') }} + +), + +renamed as ( + + select + id as order_id, + user_id as customer_id, + order_date, + status + + from source + +) + +select * from renamed diff --git a/dbt/altered_jaffle_shop/models/staging/stg_payments.sql b/dbt/altered_jaffle_shop/models/staging/stg_payments.sql new file mode 100644 index 0000000..f718596 --- /dev/null +++ b/dbt/altered_jaffle_shop/models/staging/stg_payments.sql @@ -0,0 +1,25 @@ +with source as ( + + {#- + Normally we would select from the table here, but we are using seeds to load + our data in this project + #} + select * from {{ ref('raw_payments') }} + +), + +renamed as ( + + select + id as payment_id, + order_id, + payment_method, + + -- `amount` is currently stored in cents, so we convert it to dollars + amount / 100 as amount + + from source + +) + +select * from renamed diff --git a/dbt/altered_jaffle_shop/profiles.yml b/dbt/altered_jaffle_shop/profiles.yml new file mode 100644 index 0000000..f851212 --- /dev/null +++ b/dbt/altered_jaffle_shop/profiles.yml @@ -0,0 +1,10 @@ +altered_jaffle_shop: + target: dev + outputs: + dev: + type: bigquery + method: service-account + project: astronomer-dag-authoring + dataset: cosmos_async_bechmark_test + threads: 4 # Must be a value of 1 or greater + keyfile: # /PATH/TO/BIGQUERY/keyfile.json diff --git a/dbt/altered_jaffle_shop/seeds/.gitkeep b/dbt/altered_jaffle_shop/seeds/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/dbt/altered_jaffle_shop/seeds/model_params.csv b/dbt/altered_jaffle_shop/seeds/model_params.csv new file mode 100644 index 0000000..785e16b --- /dev/null +++ b/dbt/altered_jaffle_shop/seeds/model_params.csv @@ -0,0 +1,5 @@ +model_name,array_x,array_y +customers_slow_query,2000,2000 +long_model_cross_random,2000,2000 +long_model_text_processing,2000,2000 +long_model_subquery_windows,2000,1 diff --git a/dbt/altered_jaffle_shop/seeds/raw_customers.csv b/dbt/altered_jaffle_shop/seeds/raw_customers.csv new file mode 100644 index 0000000..e386bb3 --- /dev/null +++ b/dbt/altered_jaffle_shop/seeds/raw_customers.csv @@ -0,0 +1,101 @@ +id,first_name,last_name +1,Michael,P. +2,Shawn,M. +3,Kathleen,P. +4,Jimmy,C. +5,Katherine,R. +6,Sarah,R. +7,Martin,M. +8,Frank,R. +9,Jennifer,F. +10,Henry,W. +11,Fred,S. +12,Amy,D. +13,Kathleen,M. +14,Steve,F. +15,Teresa,H. +16,Amanda,H. +17,Kimberly,R. +18,Johnny,K. +19,Virginia,F. +20,Anna,A. +21,Willie,H. +22,Sean,H. +23,Mildred,A. +24,David,G. +25,Victor,H. +26,Aaron,R. +27,Benjamin,B. +28,Lisa,W. +29,Benjamin,K. +30,Christina,W. +31,Jane,G. +32,Thomas,O. +33,Katherine,M. +34,Jennifer,S. +35,Sara,T. +36,Harold,O. +37,Shirley,J. +38,Dennis,J. +39,Louise,W. +40,Maria,A. +41,Gloria,C. +42,Diana,S. +43,Kelly,N. +44,Jane,R. +45,Scott,B. +46,Norma,C. +47,Marie,P. +48,Lillian,C. +49,Judy,N. +50,Billy,L. +51,Howard,R. +52,Laura,F. +53,Anne,B. +54,Rose,M. +55,Nicholas,R. +56,Joshua,K. +57,Paul,W. +58,Kathryn,K. +59,Adam,A. +60,Norma,W. +61,Timothy,R. +62,Elizabeth,P. +63,Edward,G. +64,David,C. +65,Brenda,W. +66,Adam,W. +67,Michael,H. +68,Jesse,E. +69,Janet,P. +70,Helen,F. +71,Gerald,C. +72,Kathryn,O. +73,Alan,B. +74,Harry,A. +75,Andrea,H. +76,Barbara,W. +77,Anne,W. +78,Harry,H. +79,Jack,R. +80,Phillip,H. +81,Shirley,H. +82,Arthur,D. +83,Virginia,R. +84,Christina,R. +85,Theresa,M. +86,Jason,C. +87,Phillip,B. +88,Adam,T. +89,Margaret,J. +90,Paul,P. +91,Todd,W. +92,Willie,O. +93,Frances,R. +94,Gregory,H. +95,Lisa,P. +96,Jacqueline,A. +97,Shirley,D. +98,Nicole,M. +99,Mary,G. +100,Jean,M. \ No newline at end of file diff --git a/dbt/altered_jaffle_shop/seeds/raw_orders.csv b/dbt/altered_jaffle_shop/seeds/raw_orders.csv new file mode 100644 index 0000000..45160e1 --- /dev/null +++ b/dbt/altered_jaffle_shop/seeds/raw_orders.csv @@ -0,0 +1,100 @@ +id,user_id,order_date,status +1,1,2018-01-01,returned +2,3,2018-01-02,completed +3,94,2018-01-04,completed +4,50,2018-01-05,completed +5,64,2018-01-05,completed +6,54,2018-01-07,completed +7,88,2018-01-09,completed +8,2,2018-01-11,returned +9,53,2018-01-12,completed +10,7,2018-01-14,completed +11,99,2018-01-14,completed +12,59,2018-01-15,completed +13,84,2018-01-17,completed +14,40,2018-01-17,returned +15,25,2018-01-17,completed +16,39,2018-01-18,completed +17,71,2018-01-18,completed +18,64,2018-01-20,returned +19,54,2018-01-22,completed +20,20,2018-01-23,completed +21,71,2018-01-23,completed +22,86,2018-01-24,completed +23,22,2018-01-26,return_pending +24,3,2018-01-27,completed +25,51,2018-01-28,completed +26,32,2018-01-28,completed +27,94,2018-01-29,completed +28,8,2018-01-29,completed +29,57,2018-01-31,completed +30,69,2018-02-02,completed +31,16,2018-02-02,completed +32,28,2018-02-04,completed +33,42,2018-02-04,completed +34,38,2018-02-06,completed +35,80,2018-02-08,completed +36,85,2018-02-10,completed +37,1,2018-02-10,completed +38,51,2018-02-10,completed +39,26,2018-02-11,completed +40,33,2018-02-13,completed +41,99,2018-02-14,completed +42,92,2018-02-16,completed +43,31,2018-02-17,completed +44,66,2018-02-17,completed +45,22,2018-02-17,completed +46,6,2018-02-19,completed +47,50,2018-02-20,completed +48,27,2018-02-21,completed +49,35,2018-02-21,completed +50,51,2018-02-23,completed +51,71,2018-02-24,completed +52,54,2018-02-25,return_pending +53,34,2018-02-26,completed +54,54,2018-02-26,completed +55,18,2018-02-27,completed +56,79,2018-02-28,completed +57,93,2018-03-01,completed +58,22,2018-03-01,completed +59,30,2018-03-02,completed +60,12,2018-03-03,completed +61,63,2018-03-03,completed +62,57,2018-03-05,completed +63,70,2018-03-06,completed +64,13,2018-03-07,completed +65,26,2018-03-08,completed +66,36,2018-03-10,completed +67,79,2018-03-11,completed +68,53,2018-03-11,completed +69,3,2018-03-11,completed +70,8,2018-03-12,completed +71,42,2018-03-12,shipped +72,30,2018-03-14,shipped +73,19,2018-03-16,completed +74,9,2018-03-17,shipped +75,69,2018-03-18,completed +76,25,2018-03-20,completed +77,35,2018-03-21,shipped +78,90,2018-03-23,shipped +79,52,2018-03-23,shipped +80,11,2018-03-23,shipped +81,76,2018-03-23,shipped +82,46,2018-03-24,shipped +83,54,2018-03-24,shipped +84,70,2018-03-26,placed +85,47,2018-03-26,shipped +86,68,2018-03-26,placed +87,46,2018-03-27,placed +88,91,2018-03-27,shipped +89,21,2018-03-28,placed +90,66,2018-03-30,shipped +91,47,2018-03-31,placed +92,84,2018-04-02,placed +93,66,2018-04-03,placed +94,63,2018-04-03,placed +95,27,2018-04-04,placed +96,90,2018-04-06,placed +97,89,2018-04-07,placed +98,41,2018-04-07,placed +99,85,2018-04-09,placed \ No newline at end of file diff --git a/dbt/altered_jaffle_shop/seeds/raw_payments.csv b/dbt/altered_jaffle_shop/seeds/raw_payments.csv new file mode 100644 index 0000000..3989cb2 --- /dev/null +++ b/dbt/altered_jaffle_shop/seeds/raw_payments.csv @@ -0,0 +1,114 @@ +id,order_id,payment_method,amount +1,1,credit_card,1000 +2,2,credit_card,2000 +3,3,coupon,100 +4,4,coupon,2500 +5,5,bank_transfer,1700 +6,6,credit_card,600 +7,7,credit_card,1600 +8,8,credit_card,2300 +9,9,gift_card,2300 +10,9,bank_transfer,0 +11,10,bank_transfer,2600 +12,11,credit_card,2700 +13,12,credit_card,100 +14,13,credit_card,500 +15,13,bank_transfer,1400 +16,14,bank_transfer,300 +17,15,coupon,2200 +18,16,credit_card,1000 +19,17,bank_transfer,200 +20,18,credit_card,500 +21,18,credit_card,800 +22,19,gift_card,600 +23,20,bank_transfer,1500 +24,21,credit_card,1200 +25,22,bank_transfer,800 +26,23,gift_card,2300 +27,24,coupon,2600 +28,25,bank_transfer,2000 +29,25,credit_card,2200 +30,25,coupon,1600 +31,26,credit_card,3000 +32,27,credit_card,2300 +33,28,bank_transfer,1900 +34,29,bank_transfer,1200 +35,30,credit_card,1300 +36,31,credit_card,1200 +37,32,credit_card,300 +38,33,credit_card,2200 +39,34,bank_transfer,1500 +40,35,credit_card,2900 +41,36,bank_transfer,900 +42,37,credit_card,2300 +43,38,credit_card,1500 +44,39,bank_transfer,800 +45,40,credit_card,1400 +46,41,credit_card,1700 +47,42,coupon,1700 +48,43,gift_card,1800 +49,44,gift_card,1100 +50,45,bank_transfer,500 +51,46,bank_transfer,800 +52,47,credit_card,2200 +53,48,bank_transfer,300 +54,49,credit_card,600 +55,49,credit_card,900 +56,50,credit_card,2600 +57,51,credit_card,2900 +58,51,credit_card,100 +59,52,bank_transfer,1500 +60,53,credit_card,300 +61,54,credit_card,1800 +62,54,bank_transfer,1100 +63,55,credit_card,2900 +64,56,credit_card,400 +65,57,bank_transfer,200 +66,58,coupon,1800 +67,58,gift_card,600 +68,59,gift_card,2800 +69,60,credit_card,400 +70,61,bank_transfer,1600 +71,62,gift_card,1400 +72,63,credit_card,2900 +73,64,bank_transfer,2600 +74,65,credit_card,0 +75,66,credit_card,2800 +76,67,bank_transfer,400 +77,67,credit_card,1900 +78,68,credit_card,1600 +79,69,credit_card,1900 +80,70,credit_card,2600 +81,71,credit_card,500 +82,72,credit_card,2900 +83,73,bank_transfer,300 +84,74,credit_card,3000 +85,75,credit_card,1900 +86,76,coupon,200 +87,77,credit_card,0 +88,77,bank_transfer,1900 +89,78,bank_transfer,2600 +90,79,credit_card,1800 +91,79,credit_card,900 +92,80,gift_card,300 +93,81,coupon,200 +94,82,credit_card,800 +95,83,credit_card,100 +96,84,bank_transfer,2500 +97,85,bank_transfer,1700 +98,86,coupon,2300 +99,87,gift_card,3000 +100,87,credit_card,2600 +101,88,credit_card,2900 +102,89,bank_transfer,2200 +103,90,bank_transfer,200 +104,91,credit_card,1900 +105,92,bank_transfer,1500 +106,92,coupon,200 +107,93,gift_card,2600 +108,94,coupon,700 +109,95,coupon,2400 +110,96,gift_card,1700 +111,97,bank_transfer,1400 +112,98,bank_transfer,1000 +113,99,credit_card,2400 \ No newline at end of file diff --git a/dbt/altered_jaffle_shop/snapshots/.gitkeep b/dbt/altered_jaffle_shop/snapshots/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/dbt/altered_jaffle_shop/tests/.gitkeep b/dbt/altered_jaffle_shop/tests/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/include/constants.py b/include/constants.py index 6bedbd7..a1c7788 100644 --- a/include/constants.py +++ b/include/constants.py @@ -1,8 +1,12 @@ "Contains constants used in the DAGs" +import os from pathlib import Path from cosmos import ExecutionConfig +DBT_ADAPTER_VERSION = os.getenv("DBT_ADAPTER_VERSION", "1.9") +GCP_PROJECT_ID = os.getenv("GCP_PROJECT_ID", "astronomer-dag-authoring") +BIGQUERY_DATASET = os.getenv("BIGQUERY_DATASET", "cosmos_async_bechmark_test") dbt_executable = Path("/usr/local/airflow/dbt_venv/bin/dbt") venv_execution_config = ExecutionConfig(