From 94ad887b8b2a727cc7470abeae41c996703461c6 Mon Sep 17 00:00:00 2001 From: L Zdanski <25642903+lzdanski@users.noreply.github.com> Date: Fri, 27 Feb 2026 16:52:27 -0500 Subject: [PATCH 01/29] Create config subfolder structure --- .../{ => callbacks}/callbacks.rst | 0 .../testing-behavior.rst | 0 .../{ => dbt-docs}/generating-docs.rst | 0 .../{ => dbt-docs}/hosting-docs.rst | 0 .../execution-modes-local-conflicts.rst | 133 +++++ docs/configuration/index.rst | 89 +++- .../airflow-worker/async-execution-mode.rst | 247 +++++++++ .../run-dbt/airflow-worker/index.rst | 9 + .../airflow-worker/watcher-execution-mode.rst | 480 ++++++++++++++++++ .../container/aws-container-run-job.rst | 191 +++++++ .../container/azure-container-instance.rst | 138 +++++ .../run-dbt/container/docker.rst | 111 ++++ .../run-dbt/container/gcp-cloud-run-job.rst | 265 ++++++++++ .../configuration/run-dbt/container/index.rst | 13 + .../run-dbt/container/kubernetes.rst | 167 ++++++ .../watcher-kubernetes-execution-mode.rst | 214 ++++++++ .../configuration/run-dbt/execution-modes.rst | 387 ++++++++++++++ 17 files changed, 2428 insertions(+), 16 deletions(-) rename docs/configuration/{ => callbacks}/callbacks.rst (100%) rename docs/configuration/{ => configure-tests}/testing-behavior.rst (100%) rename docs/configuration/{ => dbt-docs}/generating-docs.rst (100%) rename docs/configuration/{ => dbt-docs}/hosting-docs.rst (100%) create mode 100644 docs/configuration/execution-modes-local-conflicts.rst create mode 100644 docs/configuration/run-dbt/airflow-worker/async-execution-mode.rst create mode 100644 docs/configuration/run-dbt/airflow-worker/index.rst create mode 100644 docs/configuration/run-dbt/airflow-worker/watcher-execution-mode.rst create mode 100644 docs/configuration/run-dbt/container/aws-container-run-job.rst create mode 100644 docs/configuration/run-dbt/container/azure-container-instance.rst create mode 100644 docs/configuration/run-dbt/container/docker.rst create mode 100644 docs/configuration/run-dbt/container/gcp-cloud-run-job.rst create mode 100644 docs/configuration/run-dbt/container/index.rst create mode 100644 docs/configuration/run-dbt/container/kubernetes.rst create mode 100644 docs/configuration/run-dbt/container/watcher-kubernetes-execution-mode.rst create mode 100644 docs/configuration/run-dbt/execution-modes.rst diff --git a/docs/configuration/callbacks.rst b/docs/configuration/callbacks/callbacks.rst similarity index 100% rename from docs/configuration/callbacks.rst rename to docs/configuration/callbacks/callbacks.rst diff --git a/docs/configuration/testing-behavior.rst b/docs/configuration/configure-tests/testing-behavior.rst similarity index 100% rename from docs/configuration/testing-behavior.rst rename to docs/configuration/configure-tests/testing-behavior.rst diff --git a/docs/configuration/generating-docs.rst b/docs/configuration/dbt-docs/generating-docs.rst similarity index 100% rename from docs/configuration/generating-docs.rst rename to docs/configuration/dbt-docs/generating-docs.rst diff --git a/docs/configuration/hosting-docs.rst b/docs/configuration/dbt-docs/hosting-docs.rst similarity index 100% rename from docs/configuration/hosting-docs.rst rename to docs/configuration/dbt-docs/hosting-docs.rst diff --git a/docs/configuration/execution-modes-local-conflicts.rst b/docs/configuration/execution-modes-local-conflicts.rst new file mode 100644 index 0000000000..9fec173751 --- /dev/null +++ b/docs/configuration/execution-modes-local-conflicts.rst @@ -0,0 +1,133 @@ +:orphan: + +.. _execution-modes-local-conflicts: + +Airflow and dbt dependencies conflicts +====================================== + +When using the `Local Execution Mode `__, users may face dependency conflicts between +`Apache Airflow® `_ and dbt. The conflicts may increase depending on the Airflow providers and dbt adapters being used. + +If you find errors, we recommend users isolating the installation of dbt from the Airflow installation. +With the `Local Execution Mode `__, this can be accomplished by installing dbt in a separate +Python virtualenv and setting the `ExecutionConfig.dbt_executable_path <../configuration/execution-config.html>`_ and +`RenderConfig.dbt_executable_path <../configuration/render-config.html>`_ parameters. + +The page `execution modes `__ describes many other methods that support isolating dbt from Airflow. + +In the following table, ``x`` represents combinations that lead to conflicts (vanilla ``apache-airflow`` and ``dbt-core`` packages): + ++---------------+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+------+ +| Airflow / DBT | 1.0 | 1.1 | 1.2 | 1.3 | 1.4 | 1.5 | 1.6 | 1.7 | 1.8 | 1.9 | 1.10 | ++===============+=====+=====+=====+=====+=====+=====+=====+=====+=====+=====+======+ +| 2.2 | | | | x | x | x | x | x | x | x | x | ++---------------+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+------+ +| 2.3 | x | x | | x | x | x | x | x | x | x | x | ++---------------+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+------+ +| 2.4 | x | x | x | | | | | | | | | ++---------------+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+------+ +| 2.5 | x | x | x | | | | | | | | | ++---------------+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+------+ +| 2.6 | x | x | x | x | x | | | | | | | ++---------------+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+------+ +| 2.7 | x | x | x | x | x | | | | | | | ++---------------+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+------+ +| 2.8 | x | x | x | x | x | | x | | | | x | ++---------------+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+------+ +| 2.9 | x | x | x | x | x | | | | | | | ++---------------+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+------+ +| 2.10 | x | x | x | x | x | | | | | | | ++---------------+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+------+ +| 2.11 | x | x | x | x | x | | | | | | | ++---------------+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+------+ +| 3.0 | x | x | x | x | x | x | x | x | | | x | ++---------------+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+------+ + +Examples of errors +----------------------------------- + +.. code-block:: bash + + The conflict is caused by: + apache-airflow 2.8.0 depends on pydantic>=2.3.0 + dbt-semantic-interfaces 0.4.2 depends on pydantic~=1.10 + apache-airflow 2.8.0 depends on pydantic>=2.3.0 + dbt-semantic-interfaces 0.4.2.dev0 depends on pydantic~=1.10 + apache-airflow 2.8.0 depends on pydantic>=2.3.0 + dbt-semantic-interfaces 0.4.1 depends on pydantic~=1.10 + apache-airflow 2.8.0 depends on pydantic>=2.3.0 + dbt-semantic-interfaces 0.4.0 depends on pydantic~=1.10 + + +.. code-block:: bash + + ERROR: Cannot install apache-airflow==2.2.4 and dbt-core==1.5.0 because these package versions have conflicting dependencies. + The conflict is caused by: + apache-airflow 2.2.4 depends on jinja2<3.1 and >=2.10.1 + dbt-core 1.5.0 depends on Jinja2==3.1.2 + +.. code-block:: bash + + ERROR: Cannot install apache-airflow==2.6.0 and dbt-core because these package versions have conflicting dependencies. + The conflict is caused by: + apache-airflow 2.6.0 depends on importlib-metadata<5.0.0 and >=1.7; python_version < "3.9" + dbt-semantic-interfaces 0.1.0.dev7 depends on importlib-metadata==6.6.0 + +.. code-block:: bash + + ERROR: Cannot install apache-airflow, apache-airflow==2.7.0 and dbt-core==1.4.0 because these package versions have conflicting dependencies. + + The conflict is caused by: + dbt-core 1.4.0 depends on pyyaml>=6.0 + connexion 2.12.0 depends on PyYAML<6 and >=5.1 + dbt-core 1.4.0 depends on pyyaml>=6.0 + connexion 2.11.2 depends on PyYAML<6 and >=5.1 + dbt-core 1.4.0 depends on pyyaml>=6.0 + connexion 2.11.1 depends on PyYAML<6 and >=5.1 + dbt-core 1.4.0 depends on pyyaml>=6.0 + connexion 2.11.0 depends on PyYAML<6 and >=5.1 + apache-airflow 2.7.0 depends on jsonschema>=4.18.0 + flask-appbuilder 4.3.3 depends on jsonschema<5 and >=3 + connexion 2.10.0 depends on jsonschema<4 and >=2.5.1 + +.. code-block:: bash + +ERROR: Cannot install apache-airflow and dbt-core==1.10.0 because these package versions have conflicting dependencies. + +The conflict is caused by: + dbt-core 1.10.0 depends on pydantic<2 + apache-airflow-core 3.0.0 depends on pydantic>=2.11.0 + + + +How to reproduce +---------------- + +The table was created by running `nox `__ with the following ``noxfile.py``: + +.. code-block:: python + + import nox + + nox.options.sessions = ["compatibility"] + nox.options.reuse_existing_virtualenvs = True + + + @nox.session(python=["3.10"]) + @nox.parametrize( + "dbt_version", + ["1.0", "1.1", "1.2", "1.3", "1.4", "1.5", "1.6", "1.7", "1.8", "1.9", "1.10"], + ) + @nox.parametrize( + "airflow_version", + ["2.2.4", "2.3", "2.4", "2.5", "2.6", "2.7", "2.8", "2.9", "2.10", "2.11", "3.0"], + ) + def compatibility(session: nox.Session, airflow_version, dbt_version) -> None: + """Run both unit and integration tests.""" + session.run( + "pip3", + "install", + "--pre", + f"apache-airflow=={airflow_version}", + f"dbt-core=={dbt_version}", + ) diff --git a/docs/configuration/index.rst b/docs/configuration/index.rst index a6042327b0..d699e6189e 100644 --- a/docs/configuration/index.rst +++ b/docs/configuration/index.rst @@ -6,31 +6,88 @@ Configuration Cosmos offers a number of configuration options to customize its behavior. For more info, check out the links on the left or the table of contents below. .. toctree:: - :caption: Contents: + :maxdepth: 1 + :hidden: + :caption: Translating dbt into Airflow + + Source Nodes Rendering + Post-rendering DAG customization + +.. toctree:: + :maxdepth: 3 + :hidden: + :caption: How Cosmos runs dbt + + execution-modes-local-conflicts + run-dbt/execution-modes + +.. toctree:: + :maxdepth: 1 + :hidden: + :caption: Configure tests + + configure-tests/testing-behavior + +.. toctree:: + :maxdepth: 1 + :hidden: + :caption: Callbacks + + callbacks/callbacks + +.. toctree:: + :maxdepth: 1 + :hidden: + :caption: Multi-project Setups - dbt Fusion Multi-Project Setups +.. toctree:: + :maxdepth: 1 + :hidden: + :caption: Operators + + Operator Args + +.. toctree:: + :maxdepth: 1 + :hidden: + :caption: Documentation + + dbt-docs/generating-docs + dbt-docs/hosting-docs + +.. toctree:: + :maxdepth: 1 + :hidden: + :caption: Optimizing Performance + + Memory Optimization + dbt Fusion + Selecting & Excluding + Parsing Methods + Partial Parsing + Caching + Render Config + +.. toctree:: + :maxdepth: 1 + :hidden: + :caption: Configurations + Project Config Profile Config Execution Config - Render Config - Parsing Methods + +.. toctree:: + :maxdepth: 1 + :hidden: + :caption: Customizing Airflow + Configuring in Airflow Configuring Lineage - Generating Docs - Hosting Docs Scheduling - Testing Behavior - Selecting & Excluding - Partial Parsing - Source Nodes Rendering - Post-rendering DAG customization - Operator Args Compiled SQL Logging - Caching - Task display name - Callbacks - Memory Optimization + Task display name \ No newline at end of file diff --git a/docs/configuration/run-dbt/airflow-worker/async-execution-mode.rst b/docs/configuration/run-dbt/airflow-worker/async-execution-mode.rst new file mode 100644 index 0000000000..55d6778abc --- /dev/null +++ b/docs/configuration/run-dbt/airflow-worker/async-execution-mode.rst @@ -0,0 +1,247 @@ +.. _async-execution-mode: + +Airflow Async Execution Mode +============================ + +This execution mode can reduce the runtime by 35% in comparison to Cosmos LOCAL execution mode, but is currently only available for BigQuery. While this mode was introduced in Cosmos 1.9, we strongly encourage users to use Cosmos 1.11, which has significant performance improvements. + +It can be particularly useful for long-running transformations, since it leverages Airflow's `deferrable operators `__. + +In this mode, there is a ``SetupAsyncOperator`` that will pre-generate the SQL files for the dbt project and upload them to Airflow XCom or a remote location. A remote location will only be used if users set ``AIRFLOW__COSMOS__REMOTE_TARGET_PATH`` and ``AIRFLOW__COSMOS__REMOTE_TARGET_PATH_CONN_ID``. This operator is run before the remaining pipeline. +All the pipeline dbt model transformations will be run using ``DbtRunAirflowAsyncOperator`` which, instead of running the ``dbt run`` command for each model. They will download the SQL files from the Airflow XCom or remote location and execute them directly leveraging the Airflow ``BigQueryInsertJobOperator``. + +Users can leverage other existing ``BigQueryInsertJobOperator`` features, such as the UI controls to link to the job in the BigQuery UI. + + +Advantages of Airflow Async Mode +++++++++++++++++++++++++++++++++ + +- **Improved Task Throughput:** Async tasks free up Airflow workers by leveraging the Airflow Trigger framework. While long-running SQL transformations are executing in the data warehouse, the worker is released and can handle other tasks, increasing overall task throughput. +- **Better Resource Utilization:** By minimizing idle time on Airflow workers, async tasks allow more efficient use of compute resources. Workers aren't blocked waiting for external systems and can be reused for other work while waiting on async operations. +- **Faster Task Execution:** With Cosmos ``SetupAsyncOperator``, the SQL transformations are precompiled and uploaded to XCom (default behaviour) or a remote location. Instead of invoking a full dbt run during each dbt model task, the SQL files are downloaded from this XCom or remote path and executed directly. This eliminates unnecessary overhead from running the full dbt command, resulting in faster and more efficient task execution. + +We have `observed `_ the following performance improvements by running a dbt project with 129 models: + ++----------------------------------------------+--------------------------+ +| How the dbt pipeline was executed | Execution Time (seconds) | ++==============================================+==========================+ +| ``dbt run`` with dbt Core 1.10 | 13 | ++----------------------------------------------+--------------------------+ +| Cosmos 1.11 with ExecutionMode.LOCAL | 11 | ++----------------------------------------------+--------------------------+ +| Cosmos 1.11 with ExecutionMode.AIRFLOW_ASYNC | 7 | ++----------------------------------------------+--------------------------+ + + +Getting Started with Airflow Async Mode ++++++++++++++++++++++++++++++++++++++++ + +This guide walks you through setting up an Astro CLI project and running a Cosmos-based DAG with a deferrable operator, enabling asynchronous task execution in Apache Airflow. + +Prerequisites ++++++++++++++ + +- `Astro CLI `_ +- Airflow>=2.9 + +1. Create Astro-CLI Project ++++++++++++++++++++++++++++ + +Run the following command in your terminal: + +.. code-block:: bash + + astro dev init + +This will create an Astro project with the following structure: + +.. code-block:: bash + + . + ├── Dockerfile + ├── README.md + ├── airflow_settings.yaml + ├── dags/ + ├── include/ + ├── packages.txt + ├── plugins/ + ├── requirements.txt + └── tests/ + + +2. Update Dockerfile +++++++++++++++++++++ + +Edit your Dockerfile to ensure all necessary requirements are included. + +.. code-block:: bash + + FROM astrocrpublic.azurecr.io/runtime:3.0-2 + + +3. Add astronomer-cosmos Dependency ++++++++++++++++++++++++++++++++++++ + +In your ``requirements.txt``, add: + +.. code-block:: bash + + astronomer-cosmos[dbt-bigquery, google]>=1.9 + + +4. Create Airflow DAG ++++++++++++++++++++++ + +1. Create a new DAG file: ``dags/cosmos_async_dag.py`` + +- Update the ``dataset`` and ``project`` + +.. code-block:: python + + import os + from datetime import datetime + from pathlib import Path + + from cosmos import ( + DbtDag, + ExecutionConfig, + ExecutionMode, + ProfileConfig, + ProjectConfig, + ) + from cosmos.constants import TestBehavior + from cosmos.profiles import GoogleCloudServiceAccountDictProfileMapping + + DEFAULT_DBT_ROOT_PATH = Path(__file__).resolve().parent / "dbt" + DBT_ROOT_PATH = Path(os.getenv("DBT_ROOT_PATH", DEFAULT_DBT_ROOT_PATH)) + DBT_ADAPTER_VERSION = os.getenv("DBT_ADAPTER_VERSION", "1.9") + + cosmos_async_dag = DbtDag( + project_config=ProjectConfig( + DBT_ROOT_PATH / "jaffle_shop", + ), + profile_config=ProfileConfig( + profile_name="default", + target_name="dev", + profile_mapping=GoogleCloudServiceAccountDictProfileMapping( + conn_id="gcp_conn", + profile_args={ + "dataset": "cosmos_async_demo", + "project": "astronomer-**", + }, + ), + ), + execution_config=ExecutionConfig( + execution_mode=ExecutionMode.AIRFLOW_ASYNC, + async_py_requirements=[f"dbt-bigquery=={DBT_ADAPTER_VERSION}"], + ), + schedule=None, + start_date=datetime(2025, 1, 1), + catchup=False, + dag_id="cosmos_async_dag", + operator_args={ + "location": "US", + "install_deps": True, + "full_refresh": True, + "virtualenv_dir": "dbt_venv", + }, + ) + +2. Folder structure for dbt project + +- Add a valid dbt project inside your Airflow project under ``dags/dbt/``. + + +5. Start the Project +++++++++++++++++++++ + +Launch the Airflow project locally: + +.. code-block:: bash + + astro dev start + +This will: + +- Spin up the scheduler, webserver, and triggerer (needed for deferrable operators) +- Expose Airflow UI at http://localhost:8080 + +6. Create Airflow Connection +++++++++++++++++++++++++++++ + +Create an Airflow connection with following configurations + +- Connection ID: gcp_conn +- Connection Type: google_cloud_platform +- Extra Fields JSON: + +.. code-block:: bash + + { + "project": "astronomer-**", + "keyfile_dict": { + "type": "***", + "project_id": "***", + "private_key_id": "***", + "private_key": "***", + "client_email": "***", + "client_id": "***", + "auth_uri": "***", + "token_uri": "***", + "auth_provider_x509_cert_url": "***", + "client_x509_cert_url": "***", + "universe_domain": "***" + } + } + + +7. Execute the DAG +++++++++++++++++++ + +1. Visit the Airflow UI at ``http://localhost:8080`` +2. Enable the DAG: ``cosmos_async_dag`` +3. Trigger the DAG manually + +.. image:: /_static/jaffle_shop_async_execution_mode.png + :alt: Cosmos dbt Async DAG + :align: center + +The ``run`` tasks will run asynchronously via the deferrable operator, freeing up worker slots while waiting on I/O or long-running tasks. + + +Control of where to upload the SQL files +++++++++++++++++++++++++++++++++++++++++ + +For optimal performance we encourage to keep Cosmos standard behaviour (introduced in 1.11), which is to upload the SQL files to XCom, instead of a remote object location. + +For the benchmark example described in a previous section, there was an overhead of ~500 seconds with remote SQL file upload/download, but only ~2 seconds using XCom, which can outweigh the performance improvements introduced by using deferrable operators. + +However, if you want to upload the SQL files to a remote object location instead of XCom, you can set the following environment variables: + +.. code-block:: bash + + AIRFLOW__COSMOS__REMOTE_TARGET_PATH=gs://cosmos_remote_target_demo + AIRFLOW__COSMOS__REMOTE_TARGET_PATH_CONN_ID=gcp_conn + + +Limitations ++++++++++++ + + +1. **Limited to dbt models**: Only dbt resource type models are run asynchronously using Airflow deferrable operators. Other resource types are executed synchronously, similar to the local execution mode. + +2. **BigQuery support only**: This mode only supports BigQuery as the target database. If a different target is specified, Cosmos will throw an error indicating the target database is unsupported in this mode. Adding support for other adapters is on the roadmap. + +3. **ProfileMapping parameter required**: You need to specify the ``ProfileMapping`` parameter in the ``ProfileConfig`` for your DAG. Refer to the example DAG below for details on setting this parameter. + +4. **Location parameter required**: You must specify the location of the BigQuery dataset in the ``operator_args`` of the ``DbtDag`` or ``DbtTaskGroup``. The example DAG below provides guidance on this. + +5. **async_py_requirements parameter required**: If you're using the default approach of having a setup task, you must specify the necessary dbt adapter Python requirements based on your profile type for the async execution mode in the ``ExecutionConfig`` of your ``DbtDag`` or ``DbtTaskGroup``. The example DAG below provides guidance on this. + +6. **Creation of new isolated virtual environment for each task run**: By default, the ``SetupAsyncOperator`` creates and executes within a new isolated virtual environment for each task run, which can cause performance issues. To reuse an existing virtual environment, use the ``virtualenv_dir`` parameter within the ``operator_args`` of the ``DbtDag``. We have observed that for ``dbt-bigquery``, the ``SetupAsyncOperator`` executes approximately 30% faster when reusing an existing virtual environment, particularly for transformations that take around 10 minutes to complete. + +7. **Performance degradation when uploading to remote object location**: Even though it is possible to upload the SQL files to a remote object location by setting environment variables, it is slow. We observed that this introduces a significant overhead in the execution time (500s for 129 models). + +8. **TeardownAsyncOperator limitation**: When using a remote object location, in addition to the ``SetupAsyncOperator``, a ``TeardownAsyncOperator`` is also added to the DAG. This task will delete the SQL files from the remote location by the end of the DAG Run. This is can lead to a limitation from a retry perspective, as described in the issue `#2066 `_. This can be avoided by setting the ``enable_teardown_async_task`` configuration to ``False``, as described in the :ref:`enable_teardown_async_task` section. + +For a comparison between different Cosmos execution modes, please, check the :ref:`execution-modes-comparison` section. diff --git a/docs/configuration/run-dbt/airflow-worker/index.rst b/docs/configuration/run-dbt/airflow-worker/index.rst new file mode 100644 index 0000000000..00cb281bc8 --- /dev/null +++ b/docs/configuration/run-dbt/airflow-worker/index.rst @@ -0,0 +1,9 @@ +Run dbt in an Airflow worker +============================ + +.. toctree:: + :maxdepth: 1 + :caption: Run dbt in an Airflow worker + + async-execution-mode + watcher-execution-mode \ No newline at end of file diff --git a/docs/configuration/run-dbt/airflow-worker/watcher-execution-mode.rst b/docs/configuration/run-dbt/airflow-worker/watcher-execution-mode.rst new file mode 100644 index 0000000000..af7589650c --- /dev/null +++ b/docs/configuration/run-dbt/airflow-worker/watcher-execution-mode.rst @@ -0,0 +1,480 @@ +.. _watcher-execution-mode: + +Introducing ``ExecutionMode.WATCHER``: Experimental High-Performance dbt Execution in Cosmos +============================================================================================ + +With the release of **Cosmos 1.11.0**, we are introducing a powerful new experimental execution mode — ``ExecutionMode.WATCHER`` — designed to drastically reduce dbt pipeline run times in Airflow. + +Early benchmarks show that ``ExecutionMode.WATCHER`` can cut total DAG runtime **by up to 80%**, bringing performance **on par with running dbt CLI locally**. Since this execution mode improves the performance by leveraging `dbt threading `_ and Airflow deferrable sensors, the performance gains will depend on three major factors: + +- The amount of dbt ``threads`` set either via the dbt profile configuration or the dbt ``--threads`` flag +- The topology of the dbt pipeline +- The ``poke_interval`` and ``timeout`` settings of the ``DbtConsumerWatcherSensor`` operator, which determine the frequency and duration of the sensor's polling. + +------------------------------------------------------------------------------- + +Background: The Problem with the Local Execution Mode in Cosmos +--------------------------------------------------------------- + +When running dbt via Cosmos using the default ``ExecutionMode.LOCAL``, each dbt model is executed as a separate Airflow task. + +This provides strong observability and task-level retry control — but it comes at a cost. Each model runs a new dbt process, which introduces significant overhead. + +Consider the `google/fhir-dbt-analytics `_ project: + ++-------------------------------------------------------------+-----------------------------------+------------------+ +| Run Type | Description | Total Runtime | ++=============================================================+===================================+==================+ +| Single ``dbt run`` (dbt CLI) | Runs the whole DAG in one command | ~5m 30s | ++-------------------------------------------------------------+-----------------------------------+------------------+ +| One ``dbt run`` per model, totalling 184 commands (dbt CLI) | Each model is its own task | ~32m | ++-------------------------------------------------------------+-----------------------------------+------------------+ + +This difference motivated a rethinking of how Cosmos interacts with dbt. + +------------------------------------------------------------------------------- + +Concept: ``ExecutionMode.WATCHER`` +---------------------------------- + +``ExecutionMode.WATCHER`` combines the **speed of a single dbt run** with the **observability and task management of Airflow**. + +It is built on two operator types: + +* ``DbtProducerWatcherOperator`` (`#1982 `_) + Runs dbt **once** across the entire pipeline, register to `dbt event callbacks `_ and sends model progress updates via Airflow **XComs**. + +* ``DbtConsumerWatcherSensor`` (`#1998 `_) + Watches those XComs and marks individual Airflow tasks as complete when their corresponding dbt models finish. + +Together, these operators let you: + +* Run dbt as a single command (for speed) +* Retain model-level observability (for clarity) +* Retry specific models (for resilience) + +------------------------------------------------------------------------------- + +Performance Gains +----------------- + +We used a dbt project developed by Google, the `google/fhir-dbt-analytics `_ project, that interfaces with BigQuery. It contains: +* 2 seeds +* 52 sources +* 185 models + +Initial benchmarks, using illustrate significant improvements: + ++-----------------------------------------------+-----------+--------------------+ +| Environment | Threads | Execution Time (s) | ++===============================================+===========+====================+ +| dbt build (dbt CLI) | 4 | 6–7 | ++-----------------------------------------------+-----------+--------------------+ +| dbt run per model (dbt CLI) | — | 30 | +| similar to the Cosmos ``ExecutionMode.LOCAL`` | | | ++-----------------------------------------------+-----------+--------------------+ +| Cosmos ``ExecutionMode.LOCAL`` (Astro CLI) | — | 10–15 | ++-----------------------------------------------+-----------+--------------------+ +| Cosmos ``ExecutionMode.WATCHER`` (Astro CLI) | 1 | 26 | +| | 2 | 14 | +| | 4 | 7 | +| | 8 | 4 | +| | 16 | 2 | ++-----------------------------------------------+-----------+--------------------+ +| Cosmos ``ExecutionMode.WATCHER`` (Astro Cloud | 8 | ≈5 | +| Standard Deployment with A10 workers | | | ++-----------------------------------------------+-----------+--------------------+ + +The last line represents the performance improvement in a real-world Airflow deployment, using `Astro Cloud `_. + +Depending on the dbt workflow topology, if your dbt DAG previously took 5 minutes with ``ExecutionMode.LOCAL``, you can expect it to complete in roughly **1 minute** with ``ExecutionMode.WATCHER``. + +We plan to repeat these benchmarks and share the code with the community in the future. + + +.. note:: + ``ExecutionMode.WATCHER`` relies on the ``threads`` value defined in your dbt profile. Start with a conservative value that matches the CPU capacity of your Airflow workers, then gradually increase it to find the sweet spot between faster runs and acceptable memory/CPU usage. + +When we ran the `astronomer/cosmos-benchmark `_ project with ``ExecutionMode.WATCHER``, that same ``threads`` setting directly affected runtime: moving from 1 to 8 threads reduced the end-to-end ``dbt build`` duration from roughly 26 seconds to about 4 seconds (see table above), while 16 threads squeezed it to around 2 seconds at the cost of higher CPU usage. Use those numbers as a reference point when evaluating how thread counts scale in your own environment. + +To increase the number of threads, edit your dbt ``profiles.yml`` (or Helm values if you manage the profile there) and update the ``threads`` key for the target you use with Cosmos: + +.. code-block:: yaml + + your_dbt_project: + target: prod + outputs: + prod: + type: postgres + host: your-host + user: your-user + password: your-password + schema: analytics + threads: 8 # increase or decrease to match available resources + + +If you prefer to manage threads through Cosmos profile mappings instead of editing ``profiles.yml`` directly, pass ``profile_args={"threads": }`` to your ``ProfileConfig``. For example, using the built-in ``PostgresUserPasswordProfileMapping``: + +.. code-block:: python + + from cosmos.config import ProfileConfig + from cosmos.profiles import PostgresUserPasswordProfileMapping + + profile_config = ProfileConfig( + profile_name="jaffle_shop", + target_name="prod", + profile_mapping=PostgresUserPasswordProfileMapping( + conn_id="postgres_connection", + profile_args={"threads": 8}, + ), + ) + + +------------------------------------------------------------------------------- + +Example Usage of ``ExecutionMode.WATCHER`` +------------------------------------------ + +There are two main ways to use the new execution mode in Cosmos — directly within a ``DbtDag``, or embedded as part of a ``DbtTaskGroup`` inside a larger DAG. + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Example 1 — Using ``DbtDag`` with ``ExecutionMode.WATCHER`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You can enable WATCHER mode directly in your ``DbtDag`` configuration. +This approach is best when your Airflow DAG is fully dedicated to a dbt project. + +.. literalinclude:: ../../dev/dags/example_watcher.py + :language: python + :start-after: [START example_watcher] + :end-before: [END example_watcher] + +As it can be observed, the only difference with the default ``ExecutionMode.LOCAL`` is the addition of the ``execution_config`` parameter with the ``execution_mode`` set to ``ExecutionMode.WATCHER``. The ``ExecutionMode`` enum can be imported from ``cosmos.constants``. For more information on the ``ExecutionMode.LOCAL``, please, check the `dedicated page `__ + +**How it works:** + +* Cosmos executes your dbt project once via a producer task. +* Model-level Airflow tasks act as watchers or sensors, updating their state as dbt completes each model. +* The DAG remains fully observable and retryable, with **dramatically improved runtime performance** (often 5× faster than ``ExecutionMode.LOCAL``). + +**How it looks like:** + +.. image:: /_static/jaffle_shop_watcher_dbt_dag_dag_run.png + :alt: Cosmos DbtDag with `ExecutionMode.WATCHER` + :align: center + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Example 2 — Using ``DbtTaskGroup`` with ``ExecutionMode.WATCHER`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If your Airflow DAG includes multiple stages or integrations (e.g., data ingestion → dbt → reporting), use ``DbtTaskGroup`` to embed your dbt project into a larger DAG — still benefiting from WATCHER performance. + +.. code-block:: python + :caption: example_watcher_taskgroup.py + :name: example_watcher_taskgroup + + from airflow.models import DAG + from airflow.operators.empty import EmptyOperator + from cosmos import DbtTaskGroup + + with DAG( + dag_id="example_watcher_taskgroup", + schedule="@daily", + start_date=datetime(2023, 1, 1), + catchup=False, + ): + """ + The simplest example of using Cosmos to render a dbt project as a TaskGroup. + """ + pre_dbt = EmptyOperator(task_id="pre_dbt") + + first_dbt_task_group = DbtTaskGroup( + group_id="first_dbt_task_group", + execution_config=ExecutionConfig( + execution_mode=ExecutionMode.WATCHER, + ), + project_config=ProjectConfig(DBT_PROJECT_PATH), + profile_config=profile_config, + operator_args=operator_args, + ) + + pre_dbt >> first_dbt_task_group + +**Key advantages:** + +* Integrates seamlessly into complex Airflow DAGs. +* Uses the same high-performance producer/consumer execution model. +* Each ``DbtTaskGroup`` behaves independently — allowing modular dbt runs within larger workflows. + +.. image:: /_static/jaffle_shop_watcher_dbt_taskgroup_dag_run.png + :alt: Cosmos DbtDag with `ExecutionMode.WATCHER` + :align: center + +------------------------------------------------------------------------------- + +Additional details +------------------- + +~~~~~~~~~~~~~~~~ +How retries work +~~~~~~~~~~~~~~~~ + +When the ``dbt build`` command run by ``DbtProducerWatcherOperator`` fails, it will notify all the ``DbtConsumerWatcherSensor``. + +The individual watcher tasks that subclass ``DbtConsumerWatcherSensor`` can retry the dbt command themselves, using the same behavior as ``ExecutionMode.LOCAL``. + +If a branch of the DAG fails, users can clear the status of a failed consumer task, including its downstream tasks, via the Airflow UI, and each of them will run in ``ExecutionMode.LOCAL``. + +**Producer retry behavior** + +.. versionadded:: 1.12.2 + +When the ``DbtProducerWatcherOperator`` is triggered for a retry (try_number > 1), it will not re-run the dbt build command and will succeed. In previous versions of Cosmos, the producer task would fail during retries. +This behavior is designed to support TaskGroup-level retries, as reported in `#2282 `_. + +**Why this matters:** + +- In earlier versions, attempting to retry the producer task would raise an ``AirflowException``, causing the retry to fail immediately. +- Now, the producer gracefully skips execution on retries, logging an informational message explaining that the retry was skipped to avoid running a second ``dbt build``. +- This allows users to retry entire TaskGroups and/or DAGs without the producer task blocking the retry flow. + +**Important considerations:** + +- The producer task should still be configured with ``retries=0`` (which Cosmos enforces by default) to avoid unintended duplicate ``dbt build`` runs. + +- By default, Cosmos sets ``retries`` to ``0`` in``DbtProducerWatcherOperator``. Users can retry manually by clearing the status of the producer task and all its downstream tasks, keeping in mind that the producer task will not re-run the ``dbt build`` command and will succeed. + +The overall retry behavior will be further improved once `#1978 `_ is implemented. + +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Watcher dbt Execution Queue +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. versionadded:: 1.14.0 + +In watcher execution mode, by default, consumer sensor tasks are lightweight sensors that wait for the producer task to complete. On their first attempt, they require minimal CPU and memory resources. However, when these tasks retry, they execute the dbt command for the node, which may require significantly more resources. + +The ``watcher_dbt_execution_queue`` configuration allows you to specify a different worker queue for retry attempts. This enables you to: + +- **Optimize resource allocation** — Use lightweight workers for initial sensor execution and high-resource workers for retries +- **Improve scheduling efficiency** — Prevent resource contention between initial sensor tasks and retry executions +- **Scale independently** — Scale retry queues separately based on retry workload patterns + +**Configuration:** + +Set the ``watcher_dbt_execution_queue`` in your Airflow configuration: + +.. code-block:: ini + + [cosmos] + watcher_dbt_execution_queue = high_memory_queue + +Or via environment variable: + +.. code-block:: bash + + export AIRFLOW__COSMOS__WATCHER_DBT_EXECUTION_QUEUE=high_memory_queue + +**How it works:** + +- For watcher producer tasks (``DbtProducerWatcherOperator``), the configured queue is used during their first execution +- For watcher consumer tasks (``DbtConsumerWatcherSensor``), from their first retry onwards, if ``watcher_dbt_execution_queue`` is configured, the task is automatically assigned to the specified queue +- This behavior is enforced by Cosmos via an `Airflow cluster policy `_ (``task_instance_mutation_hook``) that mutates ``task_instance.queue`` at runtime for retry attempts + +.. note:: + + For producer task execution, we encourage users to set the ``watcher_dbt_execution_queue`` configuration. If, for any reason, users prefer to use a different node pool for producer tasks without setting an Airflow Cluster Policy, they can set the ``queue`` argument via ``setup_operator_args``. This, however, would not solve the problem of assigning consumer retries to nodes that may have more memory and CPU available. + + The effective precedence is: + + ``watcher_dbt_execution_queue`` > explicit ``queue`` on the producer (from ``setup_operator_args``) > ``operator_args`` > your Airflow deployment’s default queue. + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Installation of Airflow and dbt +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Since Cosmos 1.12.0, ``ExecutionMode.WATCHER`` works well regardless of whether dbt and Airflow are installed in the same Python virtual environment. + +When dbt and Airflow are installed in the same Python virtual environment, the ``ExecutionMode.WATCHER`` uses dbt `callback features `_. + +When dbt and Airflow are not installed in the same Python virtual environment, the ``ExecutionMode.WATCHER`` consumes the dbt `structured logging `_ to update the consumer tasks. + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Synchronous versus Asynchronous sensor execution +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In Cosmos 1.11.0, the ``DbtConsumerWatcherSensor`` operator is implemented as a synchronous XCom sensor, which continuously occupies the worker slot - even if they're just sleeping and checking periodically. + +Starting with Cosmos 1.12.0, the ``DbtConsumerWatcherSensor`` supports +`deferrable (asynchronous) execution `_. Deferrable execution frees up the Airflow worker slot, while task status monitoring is handled by the Airflow triggerer component, +which increases overall task throughput. By default, the sensor now runs in deferrable mode. + +------------------------------------------------------------------------------- + +Known Limitations +------------------- + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Producer task implementation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The producer task is implemented as a ``DbtProducerWatcherOperator`` and currently relies on dbt being installed alongside the Airflow deployment, as in the ``ExecutionMode.LOCAL`` implementation. + +The alternative to this implementation is to use ``ExecutionMode.WATCHER_KUBERNETES``, which is built on top of ``ExecutionMode.KUBERNETES``. Check :ref:`watcher-kubernetes-execution-mode` for more information. + +~~~~~~~~~~~~~~~~~~~~~~~~ +Individual dbt Operators +~~~~~~~~~~~~~~~~~~~~~~~~ + +The ``ExecutionMode.WATCHER`` efficiently implements the following operators: +* ``DbtSeedWatcherOperator`` +* ``DbtSnapshotWatcherOperator`` +* ``DbtRunWatcherOperator`` + +However, other operators that are available in the ``ExecutionMode.LOCAL`` mode are not implemented. + +The ``DbtBuildWatcherOperator`` is not implemented, since the build command is executed by the producer ``DbtProducerWatcherOperator`` operator. + +Additionally, since the ``dbt build`` command does not run ``source`` nodes, the operator ``DbtSourceWatcherOperator`` is equivalent to the ``DbtSourceLocalOperator`` operator, from ``ExecutionMode.LOCAL``. + +Finally, the following features are not implemented as operators under ``ExecutionMode.WATCHER``: + +* ``dbt ls`` +* ``dbt run-operation`` +* ``dbt docs`` +* ``dbt clone`` + +You can still invoke these operators using the default ``ExecutionMode.LOCAL`` mode. + +~~~~~~~~~~~~~ +Test behavior +~~~~~~~~~~~~~ + +By default, the watcher mode runs tests alongside models via the ``dbt build`` command being executed by the producer ``DbtProducerWatcherOperator`` operator. + +As a starting point, this execution mode does not support the ``TestBehavior.AFTER_EACH`` behavior, since the tests are not run as individual tasks. Since this is the default ``TestBehavior`` in Cosmos, we are injecting ``EmptyOperator`` as a starting point to ensure a seamless transition to the new mode. + +The ``TestBehavior.BUILD`` behavior is embedded in the producer ``DbtProducerWatcherOperator`` operator. + +The ``TestBehavior.NONE`` and ``TestBehavior.AFTER_ALL`` behave similarly to ``ExecutionMode.LOCAL``. + +~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Airflow Datasets and Assets +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +While the ``ExecutionMode.WATCHER`` supports the ``emit_datasets`` parameter, the Airflow Datasets and Assets are emitted from the ``DbtProducerWatcherOperator`` task instead of the consumer tasks, as done for other Cosmos' execution modes. + +~~~~~~~~~~~~~~~~~~~~~~ +Source freshness nodes +~~~~~~~~~~~~~~~~~~~~~~ + +Since Cosmos 1.6, it `supports the rendering of source nodes `_. + +We noticed some Cosmos users use this feature alongside `overriding Cosmos source nodes `_ as sensors or another operator that allows them to skip the following branch of the DAG if the source is not fresh. + +This use case is not currently supported by the ``ExecutionMode.WATCHER``, since the ``dbt build`` command does not run `source freshness checks `_. + +We have a follow-up ticket to `further investigate this use case `_. + + +Advanced config +------------------- + +~~~~~~~~~~~~~~~~ +Callback support +~~~~~~~~~~~~~~~~ + +The ``DbtProducerWatcherOperator`` and ``DbtConsumerWatcherSensor`` will use the user-defined callback function similar to ``ExecutionMode.LOCAL`` mode. + +You can define different ``callback`` behaviors for producer and consumer nodes by using ``operator_args`` to configure the consumer callback and ``setup_operator_args`` to override the callback for the producer, as described below. + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Overriding ``operator_args`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The ``DbtProducerWatcherOperator`` and ``DbtConsumerWatcherSensor`` operators handle ``operator_args`` similar to the ``ExecutionMode.LOCAL`` mode. + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Using Custom Args for the Producer and Watcher +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. versionadded:: 1.12.0 + +If you need to override ``operator_args`` for the ``DbtProducerWatcherOperator``, you can do so using ``setup_operator_args``. + +When using ``ExecutionMode.WATCHER``, you may want to configure specific properties, such as ``retries`` specifically for the ``DbtProducerWatcherOperator`` task. This can be useful for several reasons: +- Improved resilience - transient issues (e.g., temporary database or network failures) can be automatically retried. +- Reduced manual intervention - failed producer runs can recover without requiring operator restarts. +- Better reliability - retry behavior can be tuned independently from sensor tasks. + +Example: Configure the producer task with custom retry settings. + +.. code-block:: python + + from datetime import timedelta + from cosmos.config import ExecutionConfig + from cosmos.constants import ExecutionMode + + execution_config = ExecutionConfig( + execution_mode=ExecutionMode.WATCHER, + setup_operator_args={ + "retries": 0, + "retry_delay": timedelta(minutes=5), + }, + ) + +This allows you to customize ``DbtProducerWatcherOperator`` retry behavior without affecting the arguments used by the other sensor tasks. + +If configuring queues, we suggest using the previously mentioned ``watcher_dbt_execution_queue`` configuration instead of the ``setup_operator_args``. + +.. note:: + Please note that ``setup_operator_args`` is specific to Cosmos and is not related to Airflow setup or teardown task. + + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Sensor slot allocation and polling +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Each ``DbtDag`` or ``DbtTaskGroup`` root node will startup during DAG runs at - potentially - the same time as the DAG Run. This may not happen, since it is dependent on the +concurrency settings and available task slots in the Airflow deployment. + +The consequence is that tasks may take longer to be updated if they are not sensing at the moment that the transformation happens. + +We plan to review this behaviour and alternative approaches in the future. + + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Asynchronous sensor execution +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +- Deferrable execution is currently supported only for dbt models, seeds and snapshots. +- Deferrable execution applies only to the first task attempt (try number 1). For subsequent retries, the sensor falls back to synchronous execution. + +To disable asynchronous execution, set the ``deferrable`` flag to ``False`` in the ``operator_args``. + +.. literalinclude:: ../../dev/dags/example_watcher.py + :language: python + :start-after: [START example_watcher_synchronous] + :end-before: [END example_watcher_synchronous] + +------------------------------------------------------------------------------- + +Troubleshooting +--------------- + +Problem: "I changed from ``ExecutionMode.LOCAL`` to ``ExecutionMode.WATCHER``, but my DAG is running slower." +Answer: Please, check the number of threads that are being used by searching the producer task logs for a message similar to ``Concurrency: 1 threads (target='DEV')``. To leverage the Watcher mode, you should have a high number of threads, at least dbt's default of 4. Check the `dbt threading docs `_ for more information on how to set the number of threads. + + +Summary +------- + +``ExecutionMode.WATCHER`` represents a significant leap forward for running dbt in Airflow via Cosmos: + +* ✅ Up to **5× faster** dbt DAG runs +* ✅ Maintains **model-level visibility** in Airflow +* ✅ Enables **smarter resource allocation** +* ✅ Built on proven Cosmos rendering techniques + +This is an experimental feature, and we are looking for feedback from the community. + +Stay tuned for further documentation and base image support for the ``ExecutionMode.WATCHER`` in upcoming releases. diff --git a/docs/configuration/run-dbt/container/aws-container-run-job.rst b/docs/configuration/run-dbt/container/aws-container-run-job.rst new file mode 100644 index 0000000000..4321c8f346 --- /dev/null +++ b/docs/configuration/run-dbt/container/aws-container-run-job.rst @@ -0,0 +1,191 @@ +.. _aws-container-run-job: + +Getting Started with Astronomer Cosmos on AWS ECS +================================================== + +Astronomer Cosmos provides a unified way to run containerized workloads across multiple cloud providers. In this guide, you’ll learn how to deploy and run a Cosmos job on AWS Elastic Container Service (ECS) using Fargate. +Schematically, the guide will walk you through the steps required to build the following architecture: + +.. figure:: https://github.com/astronomer/astronomer-cosmos/raw/main/docs/_static/cosmos_aws_ecs_schematic.png + :width: 800 + +Prerequisites ++++++++++++++ + +Before you begin, ensure you have the following: + +- An active **AWS Account** with permissions to create ECS clusters, register task definitions, and run tasks. +- The **AWS CLI** installed and configured with the proper credentials. +- **Docker** installed for building your container image. +- Access to your container registry (for example, **Amazon ECR**) where your job image is stored. +- Basic familiarity with AWS ECS concepts (clusters, task definitions, services, and Fargate). +- An existing installation of **Astronomer Cosmos** (refer to the `Cosmos documentation `_ for more details). + + + +Step-by-step guide +++++++++++++++++++ + +**Install Airflow and Cosmos** + +Create a python virtualenv, activate it, upgrade pip to the latest version and install ``apache airflow`` & ``astronomer cosmos``: + +.. code-block:: bash + + python3 -m venv venv + source venv/bin/activate + python3 -m pip install --upgrade pip + pip install apache-airflow + pip install "astronomer-cosmos[amazon]" + pip install "aiobotocore[boto3]" +.. note:: + The package aiobotocore[boto3] is optional; you will need it if you plan to use **deferred tasks**. + +**Set up your ECR** + +1. **Set your secrets** + On the `cosmos-examples `_ repository, you can find a ready-to-use Docker image for the AWS ECS service. Just replace your secrets, or you can create your own. + +2. **AWS CLI login** + Before building and pushing your image, you first need to log in to the AWS service using the AWS CLI tool. + Use the following command: + + .. code-block:: bash + + aws ecr-public get-login-password --region | docker login --username AWS --password-stdin + +3. **Build and tag your image** + Once you have your image ready, run the following commands: + + .. code-block:: bash + + docker build -f Dockerfile.aws_ecs . --platform=linux/amd64 -t + docker tag + +4. **Push your image** + + .. code-block:: bash + + docker push + +**Configure Your AWS Environment** + +1. **Create an ECS Cluster** + + Create an ECS cluster to host your Cosmos jobs. You can do this from the AWS Console or using the AWS CLI: + + .. code-block:: bash + + aws ecs create-cluster --cluster-name my-cosmos-cluster + +2. **Set Up an IAM Role for ECS Tasks** + + Ensure you have an IAM role that your ECS tasks can assume. This role should include permissions for ECS, ECR, and CloudWatch (for logs). For example, you might create a role named ``ecsTaskExecutionRole`` with the managed policies: + + - ``AmazonECSTaskExecutionRolePolicy`` + - (Optional) Additional policies for custom resource access + +3. **Configure Networking** + + For Fargate tasks, make sure you have at least one subnet (preferably in multiple Availability Zones) and a security group that permits outbound internet access if needed. Note the subnet IDs for later use. + +**Prepare Your Cosmos Job Definition** + +Cosmos jobs are defined as container tasks. Create a task definition file (e.g., ``cosmos-task-definition.json``) with the configuration for your job. + +For example: + +.. code-block:: json + + { + "family": "cosmos-job", + "networkMode": "awsvpc", + "requiresCompatibilities": [ + "FARGATE" + ], + "cpu": "512", + "memory": "1024", + "executionRoleArn": "arn:aws:iam:::role/ecsTaskExecutionRole", + "containerDefinitions": [ + { + "name": "cosmos-job", + "image": "/your_image:latest", + "essential": true, + "environment": [ + { "name": "VAR1", "value": "value1" }, + { "name": "VAR2", "value": "value2" } + ], + "logConfiguration": { + "logDriver": "awslogs", + "options": { + "awslogs-group": "/ecs/cosmos-job", + "awslogs-region": "us-east-1", + "awslogs-stream-prefix": "ecs" + } + } + } + ] + } + +.. note:: + + Replace ````, ````, and adjust the CPU, memory, and environment variables as needed. + +**Deploy Your Cosmos Job on AWS ECS** + +1. **Register the Task Definition** + + Use the AWS CLI to register your task definition: + + .. code-block:: bash + + aws ecs register-task-definition --cli-input-json file://cosmos-task-definition.json + +2. **Run the Task** + + Run a test task on your ECS cluster. Specify the subnets and security groups in your network configuration. For example: + + .. code-block:: bash + + aws ecs run-task \ + --cluster my-cosmos-cluster \ + --launch-type FARGATE \ + --task-definition cosmos-job \ + --network-configuration "awsvpcConfiguration={subnets=[subnet-12345678,subnet-87654321],securityGroups=[sg-abcdef12],assignPublicIp=ENABLED}" + + Once the test is ok, we are able to run the dbt commands in our Cosmos DAG: + + .. figure:: https://github.com/astronomer/astronomer-cosmos/raw/main/docs/_static/jaffle_shop_aws_ecs_dag_run.png + :width: 800 + + .. figure:: https://github.com/astronomer/astronomer-cosmos/raw/main/docs/_static/jaffle_shop_aws_ecs_dag_run_logs.png + :width: 800 + + Remember to config your DAG for connecting to AWS ECS and the database connection where you are performing your SQL queries! + + +**Monitor and Debug Your Job** + +1. **Check Task Status** + + You can view the status of your task from the AWS Console under your ECS cluster or via the CLI: + + .. code-block:: bash + + aws ecs describe-tasks --cluster my-cosmos-cluster --tasks + +2. **View Logs** + + Since the task definition configures AWS CloudWatch Logs, you can view your job’s output in the CloudWatch Logs console. Look for log streams with the prefix you set (e.g., ``ecs/cosmos-job``). + +**Conclusion** + + +By following this guide, you can deploy Astronomer Cosmos jobs on AWS ECS using Fargate. This integration enables you to leverage the scalability and managed infrastructure of ECS while maintaining a consistent container orchestration experience with Cosmos. + +For more detailed information on AWS ECS, please refer to the `AWS ECS Developer Guide `_. + +Happy deploying! :rocket: + + +Remember to config your DAG for connecting to AWS ECS and the database connection where you are performing your SQL queries! diff --git a/docs/configuration/run-dbt/container/azure-container-instance.rst b/docs/configuration/run-dbt/container/azure-container-instance.rst new file mode 100644 index 0000000000..86ce3ab9ef --- /dev/null +++ b/docs/configuration/run-dbt/container/azure-container-instance.rst @@ -0,0 +1,138 @@ +.. _azure-container-instance: + +Azure Container Instance Execution Mode +======================================= +.. versionadded:: 1.4 + +This tutorial will guide you through the steps required to use Azure Container Instance as the Execution Mode for your dbt code with Astronomer Cosmos. Schematically, the guide will walk you through the steps required to build the following architecture: + +.. figure:: https://github.com/astronomer/astronomer-cosmos/raw/main/docs/_static/cosmos_aci_schematic.png + :width: 800 + +Prerequisites ++++++++++++++ +1. Docker with docker daemon (Docker Desktop on MacOS). Follow the `Docker installation guide `_. +2. Airflow +3. Azure CLI (install guide here: `Azure CLI `_) +4. Astronomer-cosmos package containing the dbt Azure Container Instance operators +5. Azure account with: + 1. A resource group + 2. A service principal with `Contributor` permissions on the resource group + 3. A Container Registry + 4. A Postgres instance accessible from Azure. (we use an Azure Postgres instance in the example) +6. Docker image built with required dbt project and dbt DAG +7. dbt DAG with dbt Azure Container Instance operators in the Airflow DAGs directory to run in Airflow + +More information on how to achieve 2-6 is detailed below. + +Note that the steps below will walk you through an example, for which the code can be found HERE + +Step-by-step guide +++++++++++++++++++ + +**Install Airflow and Cosmos** + +Create a python virtualenv, activate it, upgrade pip to the latest version and install apache airflow & astronomer-postgres + +.. code-block:: bash + + python -m venv venv + source venv/bin/activate + pip install --upgrade pip + pip install apache-airflow + pip install "astronomer-cosmos[dbt-postgres,azure-container-instance]" + +**Setup Postgres database** + +You will need a postgres database running to be used as the database for the dbt project. In order to have it accessible from Azure Container Instance, the easiest way is to create an Azure Postgres instance. For this, run the following (assuming you are logged into your Azure account) + +.. code-block:: bash + + az postgres server create -l westeurope -g <<>> -n <<>> -u dbadmin -p <<>> --sku-name B_Gen5_1 --ssl-enforcement Enabled + + +**Setup Azure Container Registry** +In order to run a container in Azure Container Instance, it needs access to the container image. In our setup, we will use Azure Container Registry for this. To set an Azure Container Registry up, you can use the following bash command: + +.. code-block:: bash + + az acr create --name <<>> --resource-group <<>> --sku Basic --admin-enabled + +**Build the dbt Docker image** + +For the Docker operators to work, you need to create a docker image that will be supplied as image parameter to the dbt docker operators used in the DAG. + +Clone the `cosmos-example `_ repo + +.. code-block:: bash + + git clone https://github.com/astronomer/cosmos-example.git + cd cosmos-example + +Create a docker image containing the dbt project files and dbt profile by using the `Dockerfile `_, which will be supplied to the Docker operators. + +.. code-block:: bash + + docker build -t <<>:1.0.0 -f Dockerfile.azure_container_instance . + +After this, the image needs to be pushed to the registry of your choice. Note that your image name should contain the name of your registry: +.. code-block:: bash + + docker push <<>>:1.0.0 + +.. note:: + + You may need to ensure docker knows to use the right credentials. If using Azure Container Registry, this can be done by running the following command: + .. code-block:: bash + + az acr login + +.. note:: + + If running on M1, you may need to set the following envvars for running the docker build command in case it fails + + .. code-block:: bash + + export DOCKER_BUILDKIT=0 + export COMPOSE_DOCKER_CLI_BUILD=0 + export DOCKER_DEFAULT_PLATFORM=linux/amd64 + +Take a read of the Dockerfile to understand what it does so that you could use it as a reference in your project. + + - The `dbt profile `_ file is added to the image + - The dags directory containing the `dbt project jaffle_shop `_ is added to the image + - The dbt_project.yml is replaced with `postgres_profile_dbt_project.yml `_ which contains the profile key pointing to postgres_profile as profile creation is not handled at the moment for K8s operators like in local mode. + +**Setup Airflow Connections** +Now you have the required Azure infrastructure, you still need to add configuration to Airflow to ensure the infrastructure can be used. You'll need 3 connections: + +1. ``aci_db``: a Postgres connection to your Azure Postgres instance. +2. ``aci``: an Azure Container Instance connection configured with a Service Principal with sufficient permissions (i.e. ``Contributor`` on the resource group in which you will use Azure Container Instances). +3. ``acr``: an Azure Container Registry connection configured for your Azure Container Registry. + +Check out the ``airflow-settings.yml`` file `here `_ for an example. If you are using Astro CLI, filling in the right values here will be enough for this to work. + +**Setup and Trigger the DAG with Airflow** + +Copy the dags directory from cosmos-example repo to your Airflow home + +.. code-block:: bash + + cp -r dags $AIRFLOW_HOME/ + +Run Airflow + +.. code-block:: bash + + airflow standalone + +.. note:: + + You might need to run airflow standalone with ``sudo`` if your Airflow user is not able to access the docker socket URL or pull the images in the Kind cluster. + +Log in to Airflow through a web browser ``http://localhost:8080/``, using the user ``airflow`` and the password described in the ``standalone_admin_password.txt`` file. + +Enable and trigger a run of the `jaffle_shop_azure_container_instance `_ DAG. You will be able to see the following successful DAG run. + +.. figure:: https://github.com/astronomer/astronomer-cosmos/raw/main/docs/_static/jaffle_shop_azure_container_instance.png + :width: 800 diff --git a/docs/configuration/run-dbt/container/docker.rst b/docs/configuration/run-dbt/container/docker.rst new file mode 100644 index 0000000000..0005914886 --- /dev/null +++ b/docs/configuration/run-dbt/container/docker.rst @@ -0,0 +1,111 @@ +.. _docker: + +Docker Execution Mode +======================================== + +The following tutorial illustrates how to run the Cosmos dbt Docker Operators and the required setup for them. + +Requirements +++++++++++++ + +1. Docker with docker daemon (Docker Desktop on MacOS). Follow the `Docker installation guide `_. +2. Airflow +3. Astronomer-cosmos package containing the dbt Docker operators +4. Postgres docker container +5. Docker image built with required dbt project and dbt DAG +6. dbt DAG with dbt docker operators in the Airflow DAGs directory to run in Airflow + +More information on how to achieve 2-6 is detailed below. + +Step-by-step instructions ++++++++++++++++++++++++++ + +**Install Airflow and Cosmos** + +Create a python virtualenv, activate it, upgrade pip to the latest version and install `Apache Airflow® `_ & astronomer-postgres + +.. code-block:: bash + + python -m venv venv + source venv/bin/activate + pip install --upgrade pip + pip install apache-airflow + pip install "astronomer-cosmos[dbt-postgres]" + +**Setup Postgres database** + +You will need a postgres database running to be used as the database for the dbt project. Run the following command to run and expose a postgres database + +.. code-block:: bash + + docker run --name some-postgres -e POSTGRES_PASSWORD="" -e POSTGRES_USER=postgres -e POSTGRES_DB=postgres -p5432:5432 -d postgres + +**Build the dbt Docker image** + +For the Docker operators to work, you need to create a docker image that will be supplied as image parameter to the dbt docker operators used in the DAG. + +Clone the `cosmos-example `_ repo + +.. code-block:: bash + + git clone https://github.com/astronomer/cosmos-example.git + cd cosmos-example + +Create a docker image containing the dbt project files and dbt profile by using the `Dockerfile `_, which will be supplied to the Docker operators. + +.. code-block:: bash + + docker build -t dbt-jaffle-shop:1.0.0 -f Dockerfile.postgres_profile_docker_k8s . + +.. note:: + + If running on M1, you may need to set the following envvars for running the docker build command in case it fails + + .. code-block:: bash + + export DOCKER_BUILDKIT=0 + export COMPOSE_DOCKER_CLI_BUILD=0 + export DOCKER_DEFAULT_PLATFORM=linux/amd64 + +Take a read of the Dockerfile to understand what it does so that you could use it as a reference in your project. + + - The `dbt profile `_ file is added to the image + - The dags directory containing the `dbt project jaffle_shop `_ is added to the image + - The dbt_project.yml is replaced with `postgres_profile_dbt_project.yml `_ which contains the profile key pointing to postgres_profile as profile creation is not handled at the moment for K8s operators like in local mode. + +**Setup and Trigger the DAG with Airflow** + +Copy the dags directory from cosmos-example repo to your Airflow home + +.. code-block:: bash + + cp -r dags $AIRFLOW_HOME/ + +Run Airflow + +.. code-block:: bash + + airflow standalone + +.. note:: + + You might need to run airflow standalone with ``sudo`` if your Airflow user is not able to access the docker socket URL or pull the images in the Kind cluster. + +Log in to Airflow through a web browser ``http://localhost:8080/``, using the user ``airflow`` and the password described in the ``standalone_admin_password.txt`` file. + +Enable and trigger a run of the `jaffle_shop_docker `_ DAG. You will be able to see the following successful DAG run. + +.. figure:: https://github.com/astronomer/astronomer-cosmos/raw/main/docs/_static/jaffle_shop_docker_dag_run.png + :width: 800 + + +Specifying ProfileConfig ++++++++++++++++++++++++++ + +Starting with Cosmos 1.8.0, you can use the ``profile_config`` argument in your Dbt DAG Docker operators to reference +profiles for your dbt project defined in a profiles.yml file. To do so, provide the file’s path via the +``profiles_yml_path`` parameter in ``profile_config``. + +Note that in ``ExecutionMode.DOCKER``, the ``profile_config`` is only compatible with the ``profiles_yml_path`` +approach. The ``profile_mapping`` method will not work because the required Airflow connections cannot be accessed +within the Docker container to map them to the dbt profile. diff --git a/docs/configuration/run-dbt/container/gcp-cloud-run-job.rst b/docs/configuration/run-dbt/container/gcp-cloud-run-job.rst new file mode 100644 index 0000000000..fa4d0c60c4 --- /dev/null +++ b/docs/configuration/run-dbt/container/gcp-cloud-run-job.rst @@ -0,0 +1,265 @@ +.. _gcp-cloud-run-job: + +GCP Cloud Run Job Execution Mode +======================================= +.. versionadded:: 1.7 + +This tutorial will guide you through the steps required to use Cloud Run Job instance as the Execution Mode for your dbt code with Astronomer Cosmos. This guide will walk you through the steps required to build the following architecture: + +.. figure:: https://github.com/astronomer/astronomer-cosmos/raw/main/docs/_static/cosmos_gcp_crj_schematic.png + :width: 600 + +Prerequisites ++++++++++++++ +1. Docker with docker daemon (Docker Desktop on MacOS). Follow the `Docker installation guide `_. +2. Airflow +3. Google Cloud SDK (`install guide `_) +4. Astronomer-cosmos package containing the dbt Cloud Run Job operators +5. GCP account with: + 1. A GCP project (`setup guide `_) + 2. IAM roles: + * Basic Role: `Owner `_ (control over whole project) or + * Predefined Roles: `Artifact Registry Administrator `_, `Cloud Run Developer `_ (control over specific services) + 3. Enabled service APIs: + * Artifact Registry API + * Cloud Run Admin API + * BigQuery API + 4. A service account with BigQuery roles: `JobUser `_ and `DataEditor `_ +6. Docker image built with required dbt project and dbt DAG +7. dbt DAG with Cloud Run Job operators in the Airflow DAGs directory to run in Airflow + +.. note:: + + Google Cloud Platform provides free tier on many resources, as well as Free Trial with $300 in credit. Learn more `here `_. + +More information on how to achieve 2-6 is detailed below. + + +Step-by-step guide +++++++++++++++++++ + +**Install Airflow and Cosmos** + +Create a python virtualenv, activate it, upgrade pip to the latest version and install ``apache airflow`` & ``astronomer cosmos``: + +.. code-block:: bash + + python3 -m venv venv + source venv/bin/activate + python3 -m pip install --upgrade pip + pip install apache-airflow + pip install "astronomer-cosmos[dbt-bigquery,gcp-cloud-run-job]" + +**Setup gcloud and environment variables** + +Set environment variables that will be used to create cloud infrastructure. Replace placeholders with your unique GCP ``project id`` and ``region`` of the project: + +.. code-block:: bash + + export PROJECT_ID=<<>> + export REGION=<<>> + export REPO_NAME="astronomer-cosmos-dbt" + export IMAGE_NAME="$REGION-docker.pkg.dev/$PROJECT_ID/$REPO_NAME/cosmos-example" + export SERVICE_ACCOUNT_NAME="cloud-run-job-sa" + export DATASET_NAME="astronomer_cosmos_example" + export CLOUD_RUN_JOB_NAME="astronomer-cosmos-example" + +Before we do anything in the GCP project, we first need to authorize gcloud to access the Cloud Platform with Google user credentials: + +.. code-block:: bash + + gcloud auth login + +You'll receive a link to sign into Google Cloud SDK using a Google Account. + +Next, set default ``project id`` using below command: + +.. code-block:: bash + + gcloud config set project $PROJECT_ID + +In case BigQuery has never been used before in the project, run below command to enable BigQuery API: + +.. code-block:: bash + + gcloud services enable bigquery.googleapis.com + +**Setup Artifact Registry** + +In order to run a container in Cloud Run Job, it needs access to the container image. In our setup, we will use Artifact Registry repository that stores images. +To use Artifact Registry, you need to enable the API first: + +.. code-block:: bash + + gcloud services enable artifactregistry.googleapis.com + +To set an Artifact Registry repository up, you can use the following bash command: + +.. code-block:: bash + + gcloud artifacts repositories create $REPO_NAME \ + --repository-format=docker \ + --location=$REGION \ + --project $PROJECT_ID + +**Setup Service Account** + +In order to use dbt and make transformations in BigQuery, Cloud Run Job needs some BigQuery permissions. One way to achieve that is to set up a separate ``Service Account`` with needed permissions: + +.. code-block:: bash + + # create a service account + gcloud iam service-accounts create $SERVICE_ACCOUNT_NAME + +.. code-block:: bash + + # grant JobUser role + gcloud projects add-iam-policy-binding $PROJECT_ID \ + --member="serviceAccount:$SERVICE_ACCOUNT_NAME@$PROJECT_ID.iam.gserviceaccount.com" \ + --role="roles/bigquery.jobUser" + +.. code-block:: bash + + # grant DataEditor role + gcloud projects add-iam-policy-binding $PROJECT_ID \ + --member="serviceAccount:$SERVICE_ACCOUNT_NAME@$PROJECT_ID.iam.gserviceaccount.com" \ + --role="roles/bigquery.dataEditor" + +**Build the dbt Docker image** + +Now, we are going to download an example dbt project and build a Docker image with it. + +.. important:: + + You need to ensure Docker is using the right credentials to push images. For Artifact Registry, this can be done by running the following command: + + .. code-block:: bash + + gcloud auth print-access-token | docker login -u oauth2accesstoken --password-stdin https://$REGION-docker.pkg.dev + + The token will be valid for 1 hour. After that, you need to create another one, if still needed. + +Clone the `cosmos-example `_ repo: + +.. code-block:: bash + + git clone https://github.com/astronomer/cosmos-example.git + cd cosmos-example + +Open `Dockerfile `_ located in ``gcp_cloud_run_job_example`` folder and change environments variables ``GCP_PROJECT_ID`` and ``GCP_REGION`` to your GCP project id and project region. + +Build a Docker image using previously modified ``Dockerfile``, which will be used by Cloud Run Job: + +.. code-block:: bash + + docker build -t $IMAGE_NAME -f gcp_cloud_run_job_example/Dockerfile.gcp_cloud_run_job . + +.. important:: + + Make sure to stay in ``cosmos-example`` directory when running ``docker build`` command. + +After this, the image needs to be pushed to the Artifact Registry: + +.. code-block:: bash + + docker push $IMAGE_NAME + +Take a read of the Dockerfile to understand what it does so that you could use it as a reference in your project. + + - The dags directory containing the `dbt project jaffle_shop `_ is added to the image + - The `bigquery dbt profile `_ file is added to the image + - The dbt_project.yml is replaced with `bigquery_profile_dbt_project.yml `_ which contains the profile key pointing to postgres_profile as profile creation is not handled at the moment for K8s operators like in local mode. + +**Create Cloud Run Job instance** + +When the image is pushed to Artifact Registry, you can finally create Cloud Run Job with the image and previously created service account. + +First, enable Cloud Run Admin API using below command: + +.. code-block:: bash + + gcloud services enable run.googleapis.com + + +Next, set default Cloud Run region to your GCP region: + +.. code-block:: bash + + gcloud config set run/region $REGION + +Then, run below command to create Cloud Run Job instance: + +.. code-block:: bash + + gcloud run jobs create $CLOUD_RUN_JOB_NAME \ + --image=$IMAGE_NAME \ + --task-timeout=180s \ + --max-retries=0 \ + --cpu=1 \ + --memory=512Mi \ + --service-account=$SERVICE_ACCOUNT_NAME@$PROJECT_ID.iam.gserviceaccount.com + +**Setup Airflow Connections** + +Now, when you have the required Google Cloud infrastructure, you still need to check Airflow configuration to ensure the infrastructure can be used. You'll need a ``google_cloud_default`` connection in order to work on GCP resources. + +Check out an `example `_ of the ``airflow-settings.yml`` file. If you are using Astro CLI, filling in the right values here will be enough for this to work. + +**Setup and Trigger the DAG with Airflow** + +Open `jaffle_shop_gcp_cloud_run_job `_ DAG file and update ``GCP_PROJECT_ID`` and ``GCP_LOCATION`` constants with your GCP project id and project region. + +When the DAG is configured, copy the ``dags`` directory from ``cosmos-example`` repo to your Airflow home: + +.. code-block:: bash + + cp -r dags $AIRFLOW_HOME/ + +Run Airflow: + +.. code-block:: bash + + airflow standalone + +.. note:: + + You might need to run airflow standalone with ``sudo`` if your Airflow user is not able to access the docker socket URL or pull the images in the Kind cluster. + +Log in to Airflow through a web browser ``http://localhost:8080/``, using the user ``airflow`` and the password described in the ``standalone_admin_password.txt`` file. + +Enable and trigger a run of the `jaffle_shop_gcp_cloud_run_job `_ DAG. You will be able to see the following successful DAG run. + +.. figure:: https://github.com/astronomer/astronomer-cosmos/raw/main/docs/_static/jaffle_shop_gcp_cloud_run_job.png + :width: 800 + + +You can also verify the tables that were created using dbt in BigQuery Studio: + +.. figure:: https://github.com/astronomer/astronomer-cosmos/raw/main/docs/_static/jaffle_shop_big_query.png + :width: 800 + + +**Delete resources** + +After the successful tests, don't forget to delete Google Cloud resources to save up costs: + +.. code-block:: bash + + # Delete Cloud Run Job instance + + gcloud run jobs delete $CLOUD_RUN_JOB_NAME + +.. code-block:: bash + + # Delete BigQuery main and custom dataset specified in dbt schema.yml with all tables included + + bq rm -r -f -d $PROJECT_ID:$DATASET_NAME + + bq rm -r -f -d $PROJECT_ID:dbt_dev + +.. code-block:: bash + + # Delete Artifact Registry repository with all images included + + gcloud artifacts repositories delete $REPO_NAME \ + --location=$REGION diff --git a/docs/configuration/run-dbt/container/index.rst b/docs/configuration/run-dbt/container/index.rst new file mode 100644 index 0000000000..634e9e8eb4 --- /dev/null +++ b/docs/configuration/run-dbt/container/index.rst @@ -0,0 +1,13 @@ +Run dbt in a container +====================== + +.. toctree:: + :maxdepth: 1 + :caption: Run dbt in a container + + aws-container-run-job + azure-container-instance + docker + gcp-cloud-run-job + kubernetes + watcher-kubernetes-execution-mode \ No newline at end of file diff --git a/docs/configuration/run-dbt/container/kubernetes.rst b/docs/configuration/run-dbt/container/kubernetes.rst new file mode 100644 index 0000000000..607ba07bd7 --- /dev/null +++ b/docs/configuration/run-dbt/container/kubernetes.rst @@ -0,0 +1,167 @@ +.. _kubernetes: + +Kubernetes Execution Mode +============================================== + +The following tutorial illustrates how to run the Cosmos dbt Kubernetes Operator using a local Kubernetes (K8s) cluster. It assumes the following: + +- Postgres is run in the Kubernetes (K8s) cluster as a container +- Airflow is run locally, and it triggers a K8s Pod which runs dbt + +Requirements +++++++++++++ + +To test the DbtKubernetesOperators locally, we encourage you to install the following: + +- Local Airflow (either standalone or using Astro CLI) +- `Kind `_ to run K8s locally +- `Helm `_ to install Postgres in K8s +- `Docker `_ to create the dbt container image, which will allow Airflow to create a K8s pod which will run dbt + +At the moment, the user is expected to add to the Docker image both: + +- The dbt project files +- The dbt Profile, which contains the information for dbt to access the database while parsing the project from Apache Airflow nodes +- Handle secrets + +Additional KubernetesPodOperator parameters can be added to the ``operator_args`` parameter of the ``DbtKubernetesOperator``. + +For instance, + +.. literalinclude:: ../../dev/dags/jaffle_shop_kubernetes.py + :language: python + :start-after: [START kubernetes_tg_example] + :end-before: [END kubernetes_tg_example] + +Step-by-step instructions ++++++++++++++++++++++++++ + +Using installed `Kind `_, you can setup a local kubernetes cluster + +.. code-block:: bash + + kind create cluster + +Deploy a Postgres pod to Kind using `Helm `_ + +.. code-block:: bash + + helm repo add bitnami https://charts.bitnami.com/bitnami + helm repo update + helm install postgres bitnami/postgresql + +Retrieve the Postgres password and set it as an environment variable. + +.. code-block:: bash + + export POSTGRES_PASSWORD=$(kubectl get secret --namespace default postgres-postgresql -o jsonpath="{.data.postgres-password}" | base64 -d) + +Check that the environment variable was set and that it is not empty + +.. code-block:: bash + + echo $POSTGRES_PASSWORD + +Expose the Postgres to the host running Docker/Kind. + +.. code-block:: bash + + kubectl port-forward --namespace default postgres-postgresql-0 5432:5432 + +Check that you're able to connect to the exposed pod. + +.. code-block:: bash + + PGPASSWORD="$POSTGRES_PASSWORD" psql --host 127.0.0.1 -U postgres -d postgres -p 5432 + + postgres=# \dt + \q + +Create a K8s secret which contains the credentials to access Postgres. + +.. code-block:: bash + + kubectl create secret generic postgres-secrets --from-literal=host=postgres-postgresql.default.svc.cluster.local --from-literal=password=$POSTGRES_PASSWORD + +Clone the example repo that contains the Airflow DAG and dbt project files. + +.. code-block:: bash + + git clone https://github.com/astronomer/cosmos-example.git + cd cosmos-example/ + +Create a Docker image containing the dbt project files and dbt profile by using the `Dockerfile `_, which will be run in K8s. + +.. code-block:: bash + + docker build -t dbt-jaffle-shop:1.0.0 -f Dockerfile.postgres_profile_docker_k8s . + +.. note:: + + If running on M1, you may need to set the following environment variables to run the Docker build command in case it fails. + + .. code-block:: bash + + export DOCKER_BUILDKIT=0 + export COMPOSE_DOCKER_CLI_BUILD=0 + export DOCKER_DEFAULT_PLATFORM=linux/amd64 + +Take a look at the Dockerfile to understand its purpose so that you can use it as a reference in your project. + + - The `dbt profile `__ file is added to the image + - The dags directory containing the `dbt project jaffle_shop `_ is added to the image + - The dbt_project.yml is replaced with `postgres_profile_dbt_project.yml `_ which contains the profile key pointing to postgres_profile as profile creation is not handled at the moment for K8s operators like in local mode. + +Make the build image available in the Kind K8s cluster. + +.. code-block:: bash + + kind load docker-image dbt-jaffle-shop:1.0.0 + +Create a Python virtual environment and install the latest version of Astronomer Cosmos, which contains the K8s Operator. + +.. code-block:: bash + + python -m venv venv + source venv/bin/activate + pip install --upgrade pip + pip install "astronomer-cosmos[dbt-postgres]" apache-airflow-providers-cncf-kubernetes + +Make the `jaffle_shop_kubernetes.py `__ file at your Airflow DAG home: + +.. code-block:: bash + + cp -r dags $AIRFLOW_HOME/ + +Run Airflow + +.. code-block:: bash + + airflow standalone + +.. note:: + + You may need to run Airflow standalone with ``sudo`` if your Airflow user is unable to access the Docker socket URL or pull images in the Kind cluster. + +Log in to Airflow through a web browser ``http://localhost:8080/``, using the user ``airflow`` and the password described in the ``standalone_admin_password.txt`` file. + +Enable and trigger a run of the `jaffle_shop_k8s `_ DAG. You will be able to see the following successful DAG run. + +.. figure:: https://github.com/astronomer/astronomer-cosmos/raw/main/docs/_static/jaffle_shop_k8s_dag_run.png + :width: 800 + +.. _kubernetes-known-limitations: + +Known Limitations ++++++++++++++++++ + +The Kubernetes execution mode has the following limitations: + +- Does not emit OpenLineage events (there is an `open ticket #496 `__ to address this) +- Does not emit Airflow datasets, assets, and dataset aliases (there is an `open ticket #2329 `__ to address this) +- Does not handle installing dbt deps for users (there is an `open ticket #679 `__ to address this) +- Does not support `ProfileMapping `_ (there is an `open ticket #749 `__ to address this) +- Does not support `Callbacks `_ (there is an `open ticket #1575 `__ to address this) +- Does not expose Compiled SQL as a `templated field `_ +- Does not benefit from `Cosmos caching mechanisms `_ +- Does not support `generating dbt docs & uploading to an object store `_ (there is a `PR `_ to solve this for S3) diff --git a/docs/configuration/run-dbt/container/watcher-kubernetes-execution-mode.rst b/docs/configuration/run-dbt/container/watcher-kubernetes-execution-mode.rst new file mode 100644 index 0000000000..16dbbffd0a --- /dev/null +++ b/docs/configuration/run-dbt/container/watcher-kubernetes-execution-mode.rst @@ -0,0 +1,214 @@ +.. _watcher-kubernetes-execution-mode: + +``ExecutionMode.WATCHER_KUBERNETES``: High-Performance dbt Execution in Kubernetes +=================================================================================== + +.. versionadded:: 1.13.0 + +The ``ExecutionMode.WATCHER_KUBERNETES`` combines the **speed of the** :ref:`watcher-execution-mode` **with the isolation of** :ref:`kubernetes`. + +This execution mode is ideal for users who: + +* Want to leverage the performance benefits of the watcher execution mode +* Need to run dbt in isolated Kubernetes pods +* Prefer not to install dbt in their Airflow deployment + +------------------------------------------------------------------------------- + +Background +---------- + +The :ref:`watcher-execution-mode` introduced in Cosmos 1.11.0 significantly reduces dbt pipeline run times by running dbt as a single command while maintaining model-level observability in Airflow. + +However, the original ``ExecutionMode.WATCHER`` requires dbt to be installed alongside Airflow. The ``ExecutionMode.WATCHER_KUBERNETES`` removes this limitation by running the dbt command inside Kubernetes pods, similar to ``ExecutionMode.KUBERNETES``. + +For more details on the watcher concept and how it works, please refer to the :ref:`watcher-execution-mode` documentation. + +------------------------------------------------------------------------------- + +How to Use +---------- + +Users previously using ``ExecutionMode.KUBERNETES`` can simply replace the ``execution_mode`` to use ``ExecutionMode.WATCHER_KUBERNETES``. + +The following example shows how to configure a ``DbtDag`` with ``ExecutionMode.WATCHER_KUBERNETES``: + +.. code-block:: python + + from cosmos import DbtDag + from cosmos.config import ExecutionConfig + from cosmos.constants import ExecutionMode + + dag = DbtDag( + dag_id="jaffle_shop_watcher_kubernetes", + # ... other DAG parameters ... + execution_config=ExecutionConfig( + execution_mode=ExecutionMode.WATCHER_KUBERNETES, + dbt_project_path=K8S_PROJECT_DIR, + ), + operator_args={ + "image": DBT_IMAGE, + "get_logs": True, + "log_events_on_failure": True, + }, + ) + +**Key differences from** ``ExecutionMode.KUBERNETES``: + +* The ``execution_mode`` is set to ``ExecutionMode.WATCHER_KUBERNETES`` instead of ``ExecutionMode.KUBERNETES`` +* The producer task runs the entire ``dbt build`` command in a single Kubernetes pod +* Consumer tasks (sensors) watch for the completion of their corresponding dbt models + +For the complete setup including Kubernetes secrets, Docker image configuration, and profile setup, refer to the :ref:`kubernetes` documentation. + +------------------------------------------------------------------------------- + +Performance Gains +----------------- + +Early benchmarks using the ``jaffle_shop_watcher_kubernetes`` DAG show significant improvements: + ++-----------------------------------------------+------------------+ +| Execution Mode | Total Runtime | ++===============================================+==================+ +| ``ExecutionMode.KUBERNETES`` | 00:00:32.155 | ++-----------------------------------------------+------------------+ +| ``ExecutionMode.WATCHER_KUBERNETES`` | 00:00:11.783 | ++-----------------------------------------------+------------------+ + +This represents approximately a **63% reduction** in total DAG runtime. + +The performance improvement comes from: + +* Running dbt as a single command (reducing Kubernetes pod startup overhead) +* Leveraging dbt's native threading capabilities +* Eliminating repeated dbt initialization for each model + +------------------------------------------------------------------------------- + +Known Limitations +----------------- + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Kubernetes Provider Version Compatibility +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +``ExecutionMode.WATCHER_KUBERNETES`` does not work with older versions of the ``apache-airflow-providers-cncf-kubernetes`` provider (<=10.7.0). + +Please ensure you have a compatible version installed: + +.. code-block:: bash + + pip install "apache-airflow-providers-cncf-kubernetes>10.7.0" + +We successfully tested against the most recent release of the provider (`10.12.2 `_). + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Support for KPO deferrable mode +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The producer node created by the ``ExecutionMode.WATCHER_KUBERNETES`` producer task can be set to deferrable mode as long as: + +- The correct version of Airflow Kubernetes is installed (``>=10.12.2``). This version fixed a bug (`PR `_) that prevented setting callbacks and parsing the logs when the Kubernetes Operator run using ``deferrable``. The experience should be further improved once `this other PR is merged `_. + +.. code-block:: bash + + pip install "apache-airflow-providers-cncf-kubernetes>=10.12.2" + +- The arguments ``deferrable=True`` and ``is_delete_operator_pod=True`` are set: + +.. code-block:: python + + dag = DbtDag( + dag_id="jaffle_shop_watcher_kubernetes", + # ... other DAG parameters ... + execution_config=ExecutionConfig( + execution_mode=ExecutionMode.WATCHER_KUBERNETES, + dbt_project_path=K8S_PROJECT_DIR, + ), + operator_args={ + "deferrable": True, + "is_delete_operator_pod": True, + "image": DBT_IMAGE, + "get_logs": True, + "log_events_on_failure": True, + }, + ) + +Conversely, the consumer tasks that subclass ``DbtConsumerWatcherKubernetesSensor`` run in deferrable mode by default when operating as a sensor. They can also operate in deferrable mode if they are running dbt themselves upon retry. + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Mandatory ``operator_args`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The ``operator_args`` must define ``get_logs`` and ``log_events_on_failure``: + +.. code-block: python + + dag = DbtDag( + dag_id="jaffle_shop_watcher_kubernetes", + # ... other DAG parameters ... + execution_config=ExecutionConfig( + execution_mode=ExecutionMode.WATCHER_KUBERNETES, + dbt_project_path=K8S_PROJECT_DIR, + ), + operator_args={ + # ... other KPO mandatory args ... + "get_logs": True, + "log_events_on_failure": True, + }, + ) + + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Other Inherited Limitations +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The following limitations from ``ExecutionMode.WATCHER`` also apply to ``ExecutionMode.WATCHER_KUBERNETES``: + +* **Individual dbt Operators**: Only ``DbtSeedWatcherKubernetesOperator``, ``DbtSnapshotWatcherKubernetesOperator``, and ``DbtRunWatcherKubernetesOperator`` are implemented. The ``DbtTestWatcherKubernetesOperator`` is currently a placeholder. + +* **Test behavior**: The ``TestBehavior.AFTER_EACH`` is not supported. Tests are run as part of the ``dbt build`` command by the producer task. + +* **Source freshness nodes**: The ``dbt build`` command does not run source freshness checks. + +For more details on these limitations, refer to the :ref:`watcher-execution-mode` documentation. + +Additionally, the limitations from ``ExecutionMode.KUBERNETES`` also apply to ``ExecutionMode.WATCHER_KUBERNETES``. For details, refer to the :ref:`kubernetes-known-limitations` documentation. + +------------------------------------------------------------------------------- + +Example DAG +----------- + +Below is a complete example of a DAG using ``ExecutionMode.WATCHER_KUBERNETES``: + +.. literalinclude:: ../../dev/dags/jaffle_shop_watcher_kubernetes.py + :language: python + +------------------------------------------------------------------------------- + +Prerequisites +------------- + +Before using ``ExecutionMode.WATCHER_KUBERNETES``, ensure you have: + +1. A Kubernetes cluster configured and accessible from your Airflow deployment +2. A Docker image containing your dbt project and profile +3. The ``apache-airflow-providers-cncf-kubernetes`` provider installed (version >10.7.0) + +For detailed setup instructions, refer to the :ref:`kubernetes` documentation. + +------------------------------------------------------------------------------- + +Summary +------- + +``ExecutionMode.WATCHER_KUBERNETES`` provides: + +* ✅ **~63% faster** dbt DAG runs compared to ``ExecutionMode.KUBERNETES`` +* ✅ **Isolation** between dbt and Airflow dependencies +* ✅ **Model-level visibility** in Airflow +* ✅ **Easy migration** from ``ExecutionMode.KUBERNETES`` + +This execution mode is ideal for teams who want the performance benefits of the watcher mode while maintaining the isolation provided by Kubernetes execution. diff --git a/docs/configuration/run-dbt/execution-modes.rst b/docs/configuration/run-dbt/execution-modes.rst new file mode 100644 index 0000000000..a9bd3f1e2b --- /dev/null +++ b/docs/configuration/run-dbt/execution-modes.rst @@ -0,0 +1,387 @@ +.. _execution-modes: + +Execution Modes +=============== + +.. toctree:: + :maxdepth: 3 + :caption: Run dbt in the Airflow worker + + airflow-worker/index + +.. toctree:: + :maxdepth: 3 + :caption: Run dbt in a container + + container/index + + +Cosmos can run ``dbt`` commands using several different approaches, called ``execution modes``: + +1. **local**: Run ``dbt`` commands using a local ``dbt`` installation (default) +2. **virtualenv**: Run ``dbt`` commands from Python virtual environments managed by Cosmos +3. **docker**: Run ``dbt`` commands from Docker containers managed by Cosmos (requires a pre-existing Docker image) +4. **kubernetes**: Run ``dbt`` commands from Kubernetes Pods managed by Cosmos (requires a pre-existing Docker image) +5. **aws_eks**: Run ``dbt`` commands from AWS EKS Pods managed by Cosmos (requires a pre-existing Docker image) +6. **azure_container_instance**: Run ``dbt`` commands from Azure Container Instances managed by Cosmos (requires a pre-existing Docker image) +7. **gcp_cloud_run_job**: Run ``dbt`` commands from GCP Cloud Run Job instances managed by Cosmos (requires a pre-existing Docker image) +8. **aws_ecs**: Run ``dbt`` commands from AWS ECS instances managed by Cosmos (requires a pre-existing Docker image) +9. **airflow_async**: (stable since Cosmos 1.9.0) Run the dbt resources from your dbt project asynchronously, by submitting the corresponding compiled SQLs to Apache Airflow's `Deferrable operators `__ +10. **watcher**: (experimental since Cosmos 1.11.0) Run a single ``dbt build`` command from a producer task and have sensor tasks to watch the progress of the producer, with improved DAG run time while maintaining the tasks lineage in the Airflow UI, and ability to retry failed tasks. Check the :ref:`watcher-execution-mode` for more details. +11. **watcher_kubernetes**: (experimental since Cosmos 1.13.0) Combines the speed of the watcher execution mode with the isolation of Kubernetes. Check the :ref:`watcher-kubernetes-execution-mode` for more details. + +The choice of the ``execution mode`` can vary based on each user's needs and concerns. For more details, check each execution mode described below. + +.. _execution-modes-comparison: + +.. list-table:: Execution Modes Comparison + :widths: 25 25 25 25 + :header-rows: 1 + + * - Execution Mode + - Task Duration + - Environment Isolation + - Cosmos Profile Management + * - Local + - Fast + - None + - Yes + * - Virtualenv + - Medium + - Lightweight + - Yes + * - Docker + - Slow + - Medium + - No + * - Kubernetes + - Slow + - High + - No + * - AWS_EKS + - Slow + - High + - No + * - Azure Container Instance + - Slow + - High + - No + * - GCP Cloud Run Job Instance + - Slow + - High + - No + * - AWS ECS + - Slow + - High + - No + * - Airflow Async + - Very Fast + - Medium + - Yes + * - Watcher + - Very Fast + - None + - Yes + * - Watcher Kubernetes + - Fast + - High + - No + +Local +----- + +By default, Cosmos uses the ``local`` execution mode. + +The ``local`` execution mode is the fastest way to run Cosmos operators since they don't install ``dbt`` nor build docker containers. However, it may not be an option for users using managed Airflow services such as +Google Cloud Composer, since Airflow and ``dbt`` dependencies can conflict (:ref:`execution-modes-local-conflicts`), the user may not be able to install ``dbt`` in a custom path. + +The ``local`` execution mode assumes a ``dbt`` binary is reachable within the Airflow worker node. + +If ``dbt`` was not installed as part of the Cosmos packages, +users can define a custom path to ``dbt`` by declaring the argument ``dbt_executable_path``. + +.. note:: + Starting in the 1.4 version, Cosmos tries to leverage the dbt partial parsing (``partial_parse.msgpack``) to speed up task execution. + This feature is bound to `dbt partial parsing limitations `_. + Learn more: :ref:`partial-parsing`. + +When using the ``local`` execution mode, Cosmos converts Airflow Connections into a native ``dbt`` profiles file (``profiles.yml``). + +Example of how to use, for instance, when ``dbt`` was installed together with Cosmos: + +.. literalinclude:: ../../dev/dags/basic_cosmos_dag.py + :language: python + :start-after: [START local_example] + :end-before: [END local_example] + + +Virtualenv +---------- + +If you're using managed Airflow on GCP (Cloud Composer), for instance, we recommend you use the ``virtualenv`` execution mode. + +The ``virtualenv`` mode isolates the Airflow worker dependencies from ``dbt`` by managing a Python virtual environment created during task execution and deleted afterwards. + +In this case, users are responsible for declaring which version of ``dbt`` they want to use by giving the argument ``py_requirements``. This argument can be set directly in operator instances or when instantiating ``DbtDag`` and ``DbtTaskGroup`` as part of ``operator_args``. + +Similar to the ``local`` execution mode, Cosmos converts Airflow Connections into a way ``dbt`` understands them by creating a ``dbt`` profile file (``profiles.yml``). +Also similar to the ``local`` execution mode, Cosmos will by default attempt to use a ``partial_parse.msgpack`` if one exists to speed up parsing. + +Some drawbacks of this approach: + +- It is slower than ``local`` because it creates a new Python virtual environment for each Cosmos dbt task run. +- If dbt is unavailable in the Airflow scheduler, the default ``LoadMode.DBT_LS`` will not work. In this scenario, users must use a :ref:`parsing-methods` that does not rely on dbt, such as ``LoadMode.MANIFEST``. +- Only ``InvocationMode.SUBPROCESS`` is supported currently, attempt to use ``InvocationMode.DBT_RUNNER`` will raise error. + +Example of how to use: + +.. literalinclude:: ../../dev/dags/example_virtualenv.py + :language: python + :start-after: [START virtualenv_example] + :end-before: [END virtualenv_example] + +Docker +------ + +The ``docker`` approach assumes users have a previously created Docker image, which should contain all the ``dbt`` pipelines and a ``profiles.yml``, managed by the user. + +The user has better environment isolation than when using ``local`` or ``virtualenv`` modes, but also more responsibility (ensuring the Docker container used has up-to-date files and managing secrets potentially in multiple places). + +The other challenge with the ``docker`` approach is if the Airflow worker is already running in Docker, which sometimes can lead to challenges running `Docker in Docker `__. + +This approach can be significantly slower than ``virtualenv`` since it may have to build the ``Docker`` container, which is slower than creating a Virtualenv with ``dbt-core``. +If dbt is unavailable in the Airflow scheduler, the default ``LoadMode.DBT_LS`` will not work. In this scenario, users must use a :ref:`parsing-methods` that does not rely on dbt, such as ``LoadMode.MANIFEST``. + +Check the step-by-step guide on using the ``docker`` execution mode at :ref:`docker`. + +Example DAG: + +.. code-block:: python + + docker_cosmos_dag = DbtDag( + # ... + execution_config=ExecutionConfig( + execution_mode=ExecutionMode.DOCKER, + ), + operator_args={ + "image": "dbt-jaffle-shop:1.0.0", + "network_mode": "bridge", + }, + ) + + +Kubernetes +---------- + +The ``kubernetes`` approach is a very isolated way of running ``dbt`` since the ``dbt`` run commands from within a Kubernetes Pod, usually in a separate host. + +It assumes the user has a Kubernetes cluster. It also expects the user to ensure the Docker container has up-to-date ``dbt`` pipelines and profiles, potentially leading the user to declare secrets in two places (Airflow and Docker container). + +The ``Kubernetes`` deployment may be slower than ``Docker`` and ``Virtualenv`` assuming that the container image is built (which is slower than creating a Python ``virtualenv`` and installing ``dbt-core``) and the Airflow task needs to spin up a new ``Pod`` in Kubernetes. + +Check the step-by-step guide on using the ``kubernetes`` execution mode at :ref:`kubernetes`. + +Example DAG: + +.. literalinclude:: ../../dev/dags/jaffle_shop_kubernetes.py + :language: python + :start-after: [START kubernetes_seed_example] + :end-before: [END kubernetes_seed_example] + +AWS_EKS +---------- + +The ``aws_eks`` approach is very similar to the ``kubernetes`` approach, but it is specifically designed to run on AWS EKS clusters. +It uses the `EKSPodOperator `_ +to run the dbt commands. You need to provide the ``cluster_name`` in your operator_args to connect to the AWS EKS cluster. + + +Example DAG: + +.. code-block:: python + + postgres_password_secret = Secret( + deploy_type="env", + deploy_target="POSTGRES_PASSWORD", + secret="postgres-secrets", + key="password", + ) + + docker_cosmos_dag = DbtDag( + # ... + execution_config=ExecutionConfig( + execution_mode=ExecutionMode.AWS_EKS, + ), + operator_args={ + "image": "dbt-jaffle-shop:1.0.0", + "cluster_name": CLUSTER_NAME, + "get_logs": True, + "is_delete_operator_pod": False, + "secrets": [postgres_password_secret], + }, + ) + +Azure Container Instance +------------------------ +.. versionadded:: 1.4 + +Similar to the ``kubernetes`` approach, using ``Azure Container Instances`` as the execution mode gives a very isolated way of running ``dbt``, since the ``dbt`` run itself is run within a container running in an Azure Container Instance. + +This execution mode requires the user has an Azure environment that can be used to run Azure Container Groups in (see :ref:`azure-container-instance` for more details on the exact requirements). Similarly to the ``Docker`` and ``Kubernetes`` execution modes, a Docker container should be available, containing the up-to-date ``dbt`` pipelines and profiles. + +Each task will create a new container on Azure, giving full isolation. This, however, comes at the cost of speed, as this separation of tasks introduces some overhead. Please checkout the step-by-step guide for using Azure Container Instance as the execution mode + + +.. code-block:: python + + docker_cosmos_dag = DbtDag( + # ... + execution_config=ExecutionConfig( + execution_mode=ExecutionMode.AZURE_CONTAINER_INSTANCE + ), + operator_args={ + "ci_conn_id": "aci", + "registry_conn_id": "acr", + "resource_group": "my-rg", + "name": "my-aci-{{ ti.task_id.replace('.','-').replace('_','-') }}", + "region": "West Europe", + "image": "dbt-jaffle-shop:1.0.0", + }, + ) + +GCP Cloud Run Job +------------------------ +.. versionadded:: 1.7 + +The ``gcp_cloud_run_job`` execution mode is particularly useful for users who prefer to run their ``dbt`` commands on Google Cloud infrastructure, taking advantage of Cloud Run's scalability, isolation, and managed service capabilities. + +For the ``gcp_cloud_run_job`` execution mode to work, a Cloud Run Job instance must first be created using a previously built Docker container. This container should include the latest ``dbt`` pipelines and profiles. You can find more details in the `Cloud Run Job creation guide `__ . + +This execution mode allows users to run ``dbt`` core CLI commands in a Google Cloud Run Job instance. This mode leverages the ``CloudRunExecuteJobOperator`` from the Google Cloud Airflow provider to execute commands within a Cloud Run Job instance, where ``dbt`` is already installed. Similarly to the ``Docker`` and ``Kubernetes`` execution modes, a Docker container should be available, containing the up-to-date ``dbt`` pipelines and profiles. + +Each task will create a new Cloud Run Job execution, giving full isolation. The separation of tasks adds extra overhead; however, that can be mitigated by using the ``concurrency`` parameter in ``DbtDag``, which will result in parallelized execution of ``dbt`` models. + + +.. code-block:: python + + gcp_cloud_run_job_cosmos_dag = DbtDag( + # ... + execution_config=ExecutionConfig(execution_mode=ExecutionMode.GCP_CLOUD_RUN_JOB), + operator_args={ + "project_id": "my-gcp-project-id", + "region": "europe-west1", + "job_name": "my-crj-{{ ti.task_id.replace('.','-').replace('_','-') }}", + }, + ) + + +AWS ECS +--------- +.. versionadded:: 1.9.0 + +Using ``AWS Elastic Container Service (ECS)`` as the execution mode provides an isolated and scalable way to run ``dbt`` tasks within an AWS ECS service. This execution mode ensures that each ``dbt`` run is performed inside a dedicated container running in an ECS task. + +This execution mode requires the user to have an AWS environment configured to run ECS tasks (see :ref:``aws-ecs`` for more details on the exact requirements). Similar to the ``Docker`` and ``Kubernetes`` execution modes, a Docker container should be available, containing the up-to-date ``dbt`` pipelines and profiles. + +Each task will create a new ECS task execution, providing full isolation. However, this separation introduces some overhead in execution time due to container startup and provisioning. For users who require faster execution times, configuring appropriate ECS task definitions and cluster optimizations can help mitigate these delays. + +Please refer to the step-by-step guide for using AWS ECS as the execution mode. + +.. code-block:: python + + aws_ecs_cosmos_dag = DbtDag( + # ... + execution_config=ExecutionConfig(execution_mode=ExecutionMode.AWS_ECS), + operator_args={ + "aws_conn_id": "aws_default", + "cluster": "my-ecs-cluster", + "task_definition": "my-dbt-task", + "container_name": "dbt-container", + "launch_type": "FARGATE", + "deferrable": True, + "network_configuration": { + "awsvpcConfiguration": { + "subnets": ["<<>>"], + "assignPublicIp": "ENABLED", + }, + }, + "environment_variables": {"DBT_PROFILE_NAME": "default"}, + }, + ) + +.. _airflow-async-execution-mode: + +Airflow Async +------------- + +.. versionadded:: 1.9.0 + +Although this execution mode was introduced in Cosmos 1.9, we strongly encourage users to use Cosmos 1.11, which has significant performance improvements. +In comparison to the ``local``, the ``airflow_async`` execution mode can reduce the execution time of a dbt project by up to 36%. + +The ``airflow_async`` execution mode is a way to run the dbt resources from your dbt project using Apache Airflow's +`Deferrable operators `__. +This execution mode could be preferred when you've long running resources and you want to run them asynchronously by +leveraging Airflow's deferrable operators. With that, you would be able to potentially observe higher throughput of tasks +as more dbt nodes will be run in parallel since they won't be blocking Airflow's worker slots. + +Example DAG: + +.. literalinclude:: ../../dev/dags/simple_dag_async.py + :language: python + :start-after: [START airflow_async_execution_mode_example] + :end-before: [END airflow_async_execution_mode_example] + +For a full step-by-step guide and limitations, check the :ref:`async-execution-mode` page. + + +Watcher Execution Mode (Experimental) +------------------------------------- + +.. versionadded:: 1.11.0 + +The ``watcher`` execution mode is an experimental execution mode that runs a single ``dbt build`` command from a producer task and has sensor tasks to watch the progress of the producer. +It is designed to improve DAG run time while maintaining the tasks lineage in the Airflow UI, and ability to retry failed tasks. + +Check the :ref:`watcher-execution-mode` for more details. + + +Watcher Kubernetes Execution Mode (Experimental) +------------------------------------------------ + +.. versionadded:: 1.13.0 + +The ``watcher_kubernetes`` execution mode combines the speed of the ``watcher`` execution mode with the isolation of the ``kubernetes`` execution mode. It runs a single ``dbt build`` command from a producer task inside a Kubernetes pod and has sensor tasks to watch the progress of the producer. + +Check the :ref:`watcher-kubernetes-execution-mode` for more details. + + +.. _invocation_modes: + +Invocation Modes +================ +.. versionadded:: 1.4 + +For ``ExecutionMode.LOCAL`` execution mode, Cosmos supports two invocation modes for running dbt: + +1. ``InvocationMode.SUBPROCESS``: In this mode, Cosmos runs dbt cli commands using the Python ``subprocess`` module and parses the output to capture logs and to raise exceptions. + +2. ``InvocationMode.DBT_RUNNER``: In this mode, Cosmos uses the ``dbtRunner`` available for `dbt programmatic invocations `__ to run dbt commands. \ + In order to use this mode, dbt must be installed in the same local environment. This mode does not have the overhead of spawning new subprocesses or parsing the output of dbt commands and is faster than ``InvocationMode.SUBPROCESS``. \ + This mode requires dbt version 1.5.0 or higher. It is up to the user to resolve :ref:`execution-modes-local-conflicts` when using this mode. + +The invocation mode can be set in the ``ExecutionConfig`` as shown below: + +.. code-block:: python + + from cosmos.constants import InvocationMode + + dag = DbtDag( + # ... + execution_config=ExecutionConfig( + execution_mode=ExecutionMode.LOCAL, + invocation_mode=InvocationMode.DBT_RUNNER, + ), + ) + +If the invocation mode is not set, Cosmos will attempt to use ``InvocationMode.DBT_RUNNER`` if dbt is installed in the same environment as the worker, otherwise it will fall back to ``InvocationMode.SUBPROCESS``. From 5bca1f2ee96e2434131a48133a20e6bb4e2a42d3 Mon Sep 17 00:00:00 2001 From: L Zdanski <25642903+lzdanski@users.noreply.github.com> Date: Fri, 27 Feb 2026 16:53:56 -0500 Subject: [PATCH 02/29] move execution methods --- docs/getting_started/astro.rst | 4 +- docs/getting_started/async-execution-mode.rst | 249 --------- .../getting_started/aws-container-run-job.rst | 193 ------- .../azure-container-instance.rst | 138 ----- docs/getting_started/dbt-airflow-concepts.rst | 4 +- docs/getting_started/docker.rst | 111 ---- .../execution-modes-local-conflicts.rst | 133 ----- docs/getting_started/execution-modes.rst | 374 -------------- docs/getting_started/gcp-cloud-run-job.rst | 265 ---------- docs/getting_started/index.rst | 51 +- docs/getting_started/kubernetes.rst | 167 ------ docs/getting_started/mwaa.rst | 4 +- docs/getting_started/open-source.rst | 4 +- .../watcher-execution-mode.rst | 480 ------------------ .../watcher-kubernetes-execution-mode.rst | 214 -------- 15 files changed, 39 insertions(+), 2352 deletions(-) delete mode 100644 docs/getting_started/async-execution-mode.rst delete mode 100644 docs/getting_started/aws-container-run-job.rst delete mode 100644 docs/getting_started/azure-container-instance.rst delete mode 100644 docs/getting_started/docker.rst delete mode 100644 docs/getting_started/execution-modes-local-conflicts.rst delete mode 100644 docs/getting_started/execution-modes.rst delete mode 100644 docs/getting_started/gcp-cloud-run-job.rst delete mode 100644 docs/getting_started/kubernetes.rst delete mode 100644 docs/getting_started/watcher-execution-mode.rst delete mode 100644 docs/getting_started/watcher-kubernetes-execution-mode.rst diff --git a/docs/getting_started/astro.rst b/docs/getting_started/astro.rst index b590575f2e..56e9fa0d53 100644 --- a/docs/getting_started/astro.rst +++ b/docs/getting_started/astro.rst @@ -1,7 +1,7 @@ .. _astro: -Getting Started on Astro -======================== +Getting Started with Cosmos on Astro +==================================== While it is possible to use Cosmos on Astro with all :ref:`Execution Modes `, we recommend using the ``local`` execution mode. It's the simplest to set up and use. diff --git a/docs/getting_started/async-execution-mode.rst b/docs/getting_started/async-execution-mode.rst deleted file mode 100644 index 6d61bcf22b..0000000000 --- a/docs/getting_started/async-execution-mode.rst +++ /dev/null @@ -1,249 +0,0 @@ -.. _async-execution-mode: - -.. title:: Getting Started with Deferrable Operator - -Airflow Async Execution Mode -============================ - -This execution mode can reduce the runtime by 35% in comparison to Cosmos LOCAL execution mode, but is currently only available for BigQuery. While this mode was introduced in Cosmos 1.9, we strongly encourage users to use Cosmos 1.11, which has significant performance improvements. - -It can be particularly useful for long-running transformations, since it leverages Airflow's `deferrable operators `__. - -In this mode, there is a ``SetupAsyncOperator`` that will pre-generate the SQL files for the dbt project and upload them to Airflow XCom or a remote location. A remote location will only be used if users set ``AIRFLOW__COSMOS__REMOTE_TARGET_PATH`` and ``AIRFLOW__COSMOS__REMOTE_TARGET_PATH_CONN_ID``. This operator is run before the remaining pipeline. -All the pipeline dbt model transformations will be run using ``DbtRunAirflowAsyncOperator`` which, instead of running the ``dbt run`` command for each model. They will download the SQL files from the Airflow XCom or remote location and execute them directly leveraging the Airflow ``BigQueryInsertJobOperator``. - -Users can leverage other existing ``BigQueryInsertJobOperator`` features, such as the UI controls to link to the job in the BigQuery UI. - - -Advantages of Airflow Async Mode -++++++++++++++++++++++++++++++++ - -- **Improved Task Throughput:** Async tasks free up Airflow workers by leveraging the Airflow Trigger framework. While long-running SQL transformations are executing in the data warehouse, the worker is released and can handle other tasks, increasing overall task throughput. -- **Better Resource Utilization:** By minimizing idle time on Airflow workers, async tasks allow more efficient use of compute resources. Workers aren't blocked waiting for external systems and can be reused for other work while waiting on async operations. -- **Faster Task Execution:** With Cosmos ``SetupAsyncOperator``, the SQL transformations are precompiled and uploaded to XCom (default behaviour) or a remote location. Instead of invoking a full dbt run during each dbt model task, the SQL files are downloaded from this XCom or remote path and executed directly. This eliminates unnecessary overhead from running the full dbt command, resulting in faster and more efficient task execution. - -We have `observed `_ the following performance improvements by running a dbt project with 129 models: - -+----------------------------------------------+--------------------------+ -| How the dbt pipeline was executed | Execution Time (seconds) | -+==============================================+==========================+ -| ``dbt run`` with dbt Core 1.10 | 13 | -+----------------------------------------------+--------------------------+ -| Cosmos 1.11 with ExecutionMode.LOCAL | 11 | -+----------------------------------------------+--------------------------+ -| Cosmos 1.11 with ExecutionMode.AIRFLOW_ASYNC | 7 | -+----------------------------------------------+--------------------------+ - - -Getting Started with Airflow Async Mode -+++++++++++++++++++++++++++++++++++++++ - -This guide walks you through setting up an Astro CLI project and running a Cosmos-based DAG with a deferrable operator, enabling asynchronous task execution in Apache Airflow. - -Prerequisites -+++++++++++++ - -- `Astro CLI `_ -- Airflow>=2.9 - -1. Create Astro-CLI Project -+++++++++++++++++++++++++++ - -Run the following command in your terminal: - -.. code-block:: bash - - astro dev init - -This will create an Astro project with the following structure: - -.. code-block:: bash - - . - ├── Dockerfile - ├── README.md - ├── airflow_settings.yaml - ├── dags/ - ├── include/ - ├── packages.txt - ├── plugins/ - ├── requirements.txt - └── tests/ - - -2. Update Dockerfile -++++++++++++++++++++ - -Edit your Dockerfile to ensure all necessary requirements are included. - -.. code-block:: bash - - FROM astrocrpublic.azurecr.io/runtime:3.0-2 - - -3. Add astronomer-cosmos Dependency -+++++++++++++++++++++++++++++++++++ - -In your ``requirements.txt``, add: - -.. code-block:: bash - - astronomer-cosmos[dbt-bigquery, google]>=1.9 - - -4. Create Airflow DAG -+++++++++++++++++++++ - -1. Create a new DAG file: ``dags/cosmos_async_dag.py`` - -- Update the ``dataset`` and ``project`` - -.. code-block:: python - - import os - from datetime import datetime - from pathlib import Path - - from cosmos import ( - DbtDag, - ExecutionConfig, - ExecutionMode, - ProfileConfig, - ProjectConfig, - ) - from cosmos.constants import TestBehavior - from cosmos.profiles import GoogleCloudServiceAccountDictProfileMapping - - DEFAULT_DBT_ROOT_PATH = Path(__file__).resolve().parent / "dbt" - DBT_ROOT_PATH = Path(os.getenv("DBT_ROOT_PATH", DEFAULT_DBT_ROOT_PATH)) - DBT_ADAPTER_VERSION = os.getenv("DBT_ADAPTER_VERSION", "1.9") - - cosmos_async_dag = DbtDag( - project_config=ProjectConfig( - DBT_ROOT_PATH / "jaffle_shop", - ), - profile_config=ProfileConfig( - profile_name="default", - target_name="dev", - profile_mapping=GoogleCloudServiceAccountDictProfileMapping( - conn_id="gcp_conn", - profile_args={ - "dataset": "cosmos_async_demo", - "project": "astronomer-**", - }, - ), - ), - execution_config=ExecutionConfig( - execution_mode=ExecutionMode.AIRFLOW_ASYNC, - async_py_requirements=[f"dbt-bigquery=={DBT_ADAPTER_VERSION}"], - ), - schedule=None, - start_date=datetime(2025, 1, 1), - catchup=False, - dag_id="cosmos_async_dag", - operator_args={ - "location": "US", - "install_deps": True, - "full_refresh": True, - "virtualenv_dir": "dbt_venv", - }, - ) - -2. Folder structure for dbt project - -- Add a valid dbt project inside your Airflow project under ``dags/dbt/``. - - -5. Start the Project -++++++++++++++++++++ - -Launch the Airflow project locally: - -.. code-block:: bash - - astro dev start - -This will: - -- Spin up the scheduler, webserver, and triggerer (needed for deferrable operators) -- Expose Airflow UI at http://localhost:8080 - -6. Create Airflow Connection -++++++++++++++++++++++++++++ - -Create an Airflow connection with following configurations - -- Connection ID: gcp_conn -- Connection Type: google_cloud_platform -- Extra Fields JSON: - -.. code-block:: bash - - { - "project": "astronomer-**", - "keyfile_dict": { - "type": "***", - "project_id": "***", - "private_key_id": "***", - "private_key": "***", - "client_email": "***", - "client_id": "***", - "auth_uri": "***", - "token_uri": "***", - "auth_provider_x509_cert_url": "***", - "client_x509_cert_url": "***", - "universe_domain": "***" - } - } - - -7. Execute the DAG -++++++++++++++++++ - -1. Visit the Airflow UI at ``http://localhost:8080`` -2. Enable the DAG: ``cosmos_async_dag`` -3. Trigger the DAG manually - -.. image:: /_static/jaffle_shop_async_execution_mode.png - :alt: Cosmos dbt Async DAG - :align: center - -The ``run`` tasks will run asynchronously via the deferrable operator, freeing up worker slots while waiting on I/O or long-running tasks. - - -Control of where to upload the SQL files -++++++++++++++++++++++++++++++++++++++++ - -For optimal performance we encourage to keep Cosmos standard behaviour (introduced in 1.11), which is to upload the SQL files to XCom, instead of a remote object location. - -For the benchmark example described in a previous section, there was an overhead of ~500 seconds with remote SQL file upload/download, but only ~2 seconds using XCom, which can outweigh the performance improvements introduced by using deferrable operators. - -However, if you want to upload the SQL files to a remote object location instead of XCom, you can set the following environment variables: - -.. code-block:: bash - - AIRFLOW__COSMOS__REMOTE_TARGET_PATH=gs://cosmos_remote_target_demo - AIRFLOW__COSMOS__REMOTE_TARGET_PATH_CONN_ID=gcp_conn - - -Limitations -+++++++++++ - - -1. **Limited to dbt models**: Only dbt resource type models are run asynchronously using Airflow deferrable operators. Other resource types are executed synchronously, similar to the local execution mode. - -2. **BigQuery support only**: This mode only supports BigQuery as the target database. If a different target is specified, Cosmos will throw an error indicating the target database is unsupported in this mode. Adding support for other adapters is on the roadmap. - -3. **ProfileMapping parameter required**: You need to specify the ``ProfileMapping`` parameter in the ``ProfileConfig`` for your DAG. Refer to the example DAG below for details on setting this parameter. - -4. **Location parameter required**: You must specify the location of the BigQuery dataset in the ``operator_args`` of the ``DbtDag`` or ``DbtTaskGroup``. The example DAG below provides guidance on this. - -5. **async_py_requirements parameter required**: If you're using the default approach of having a setup task, you must specify the necessary dbt adapter Python requirements based on your profile type for the async execution mode in the ``ExecutionConfig`` of your ``DbtDag`` or ``DbtTaskGroup``. The example DAG below provides guidance on this. - -6. **Creation of new isolated virtual environment for each task run**: By default, the ``SetupAsyncOperator`` creates and executes within a new isolated virtual environment for each task run, which can cause performance issues. To reuse an existing virtual environment, use the ``virtualenv_dir`` parameter within the ``operator_args`` of the ``DbtDag``. We have observed that for ``dbt-bigquery``, the ``SetupAsyncOperator`` executes approximately 30% faster when reusing an existing virtual environment, particularly for transformations that take around 10 minutes to complete. - -7. **Performance degradation when uploading to remote object location**: Even though it is possible to upload the SQL files to a remote object location by setting environment variables, it is slow. We observed that this introduces a significant overhead in the execution time (500s for 129 models). - -8. **TeardownAsyncOperator limitation**: When using a remote object location, in addition to the ``SetupAsyncOperator``, a ``TeardownAsyncOperator`` is also added to the DAG. This task will delete the SQL files from the remote location by the end of the DAG Run. This is can lead to a limitation from a retry perspective, as described in the issue `#2066 `_. This can be avoided by setting the ``enable_teardown_async_task`` configuration to ``False``, as described in the :ref:`enable_teardown_async_task` section. - -For a comparison between different Cosmos execution modes, please, check the :ref:`execution-modes-comparison` section. diff --git a/docs/getting_started/aws-container-run-job.rst b/docs/getting_started/aws-container-run-job.rst deleted file mode 100644 index db00fc8c3c..0000000000 --- a/docs/getting_started/aws-container-run-job.rst +++ /dev/null @@ -1,193 +0,0 @@ -.. _aws-container-run-job: - -.. title:: Getting Started with Astronomer Cosmos on AWS ECS - -Getting Started with Astronomer Cosmos on AWS ECS -================================================== - -Astronomer Cosmos provides a unified way to run containerized workloads across multiple cloud providers. In this guide, you’ll learn how to deploy and run a Cosmos job on AWS Elastic Container Service (ECS) using Fargate. -Schematically, the guide will walk you through the steps required to build the following architecture: - -.. figure:: https://github.com/astronomer/astronomer-cosmos/raw/main/docs/_static/cosmos_aws_ecs_schematic.png - :width: 800 - -Prerequisites -+++++++++++++ - -Before you begin, ensure you have the following: - -- An active **AWS Account** with permissions to create ECS clusters, register task definitions, and run tasks. -- The **AWS CLI** installed and configured with the proper credentials. -- **Docker** installed for building your container image. -- Access to your container registry (for example, **Amazon ECR**) where your job image is stored. -- Basic familiarity with AWS ECS concepts (clusters, task definitions, services, and Fargate). -- An existing installation of **Astronomer Cosmos** (refer to the `Cosmos documentation `_ for more details). - - - -Step-by-step guide -++++++++++++++++++ - -**Install Airflow and Cosmos** - -Create a python virtualenv, activate it, upgrade pip to the latest version and install ``apache airflow`` & ``astronomer cosmos``: - -.. code-block:: bash - - python3 -m venv venv - source venv/bin/activate - python3 -m pip install --upgrade pip - pip install apache-airflow - pip install "astronomer-cosmos[amazon]" - pip install "aiobotocore[boto3]" -.. note:: - The package aiobotocore[boto3] is optional; you will need it if you plan to use **deferred tasks**. - -**Set up your ECR** - -1. **Set your secrets** - On the `cosmos-examples `_ repository, you can find a ready-to-use Docker image for the AWS ECS service. Just replace your secrets, or you can create your own. - -2. **AWS CLI login** - Before building and pushing your image, you first need to log in to the AWS service using the AWS CLI tool. - Use the following command: - - .. code-block:: bash - - aws ecr-public get-login-password --region | docker login --username AWS --password-stdin - -3. **Build and tag your image** - Once you have your image ready, run the following commands: - - .. code-block:: bash - - docker build -f Dockerfile.aws_ecs . --platform=linux/amd64 -t - docker tag - -4. **Push your image** - - .. code-block:: bash - - docker push - -**Configure Your AWS Environment** - -1. **Create an ECS Cluster** - - Create an ECS cluster to host your Cosmos jobs. You can do this from the AWS Console or using the AWS CLI: - - .. code-block:: bash - - aws ecs create-cluster --cluster-name my-cosmos-cluster - -2. **Set Up an IAM Role for ECS Tasks** - - Ensure you have an IAM role that your ECS tasks can assume. This role should include permissions for ECS, ECR, and CloudWatch (for logs). For example, you might create a role named ``ecsTaskExecutionRole`` with the managed policies: - - - ``AmazonECSTaskExecutionRolePolicy`` - - (Optional) Additional policies for custom resource access - -3. **Configure Networking** - - For Fargate tasks, make sure you have at least one subnet (preferably in multiple Availability Zones) and a security group that permits outbound internet access if needed. Note the subnet IDs for later use. - -**Prepare Your Cosmos Job Definition** - -Cosmos jobs are defined as container tasks. Create a task definition file (e.g., ``cosmos-task-definition.json``) with the configuration for your job. - -For example: - -.. code-block:: json - - { - "family": "cosmos-job", - "networkMode": "awsvpc", - "requiresCompatibilities": [ - "FARGATE" - ], - "cpu": "512", - "memory": "1024", - "executionRoleArn": "arn:aws:iam:::role/ecsTaskExecutionRole", - "containerDefinitions": [ - { - "name": "cosmos-job", - "image": "/your_image:latest", - "essential": true, - "environment": [ - { "name": "VAR1", "value": "value1" }, - { "name": "VAR2", "value": "value2" } - ], - "logConfiguration": { - "logDriver": "awslogs", - "options": { - "awslogs-group": "/ecs/cosmos-job", - "awslogs-region": "us-east-1", - "awslogs-stream-prefix": "ecs" - } - } - } - ] - } - -.. note:: - - Replace ````, ````, and adjust the CPU, memory, and environment variables as needed. - -**Deploy Your Cosmos Job on AWS ECS** - -1. **Register the Task Definition** - - Use the AWS CLI to register your task definition: - - .. code-block:: bash - - aws ecs register-task-definition --cli-input-json file://cosmos-task-definition.json - -2. **Run the Task** - - Run a test task on your ECS cluster. Specify the subnets and security groups in your network configuration. For example: - - .. code-block:: bash - - aws ecs run-task \ - --cluster my-cosmos-cluster \ - --launch-type FARGATE \ - --task-definition cosmos-job \ - --network-configuration "awsvpcConfiguration={subnets=[subnet-12345678,subnet-87654321],securityGroups=[sg-abcdef12],assignPublicIp=ENABLED}" - - Once the test is ok, we are able to run the dbt commands in our Cosmos DAG: - - .. figure:: https://github.com/astronomer/astronomer-cosmos/raw/main/docs/_static/jaffle_shop_aws_ecs_dag_run.png - :width: 800 - - .. figure:: https://github.com/astronomer/astronomer-cosmos/raw/main/docs/_static/jaffle_shop_aws_ecs_dag_run_logs.png - :width: 800 - - Remember to config your DAG for connecting to AWS ECS and the database connection where you are performing your SQL queries! - - -**Monitor and Debug Your Job** - -1. **Check Task Status** - - You can view the status of your task from the AWS Console under your ECS cluster or via the CLI: - - .. code-block:: bash - - aws ecs describe-tasks --cluster my-cosmos-cluster --tasks - -2. **View Logs** - - Since the task definition configures AWS CloudWatch Logs, you can view your job’s output in the CloudWatch Logs console. Look for log streams with the prefix you set (e.g., ``ecs/cosmos-job``). - -**Conclusion** - - -By following this guide, you can deploy Astronomer Cosmos jobs on AWS ECS using Fargate. This integration enables you to leverage the scalability and managed infrastructure of ECS while maintaining a consistent container orchestration experience with Cosmos. - -For more detailed information on AWS ECS, please refer to the `AWS ECS Developer Guide `_. - -Happy deploying! :rocket: - - -Remember to config your DAG for connecting to AWS ECS and the database connection where you are performing your SQL queries! diff --git a/docs/getting_started/azure-container-instance.rst b/docs/getting_started/azure-container-instance.rst deleted file mode 100644 index 86ce3ab9ef..0000000000 --- a/docs/getting_started/azure-container-instance.rst +++ /dev/null @@ -1,138 +0,0 @@ -.. _azure-container-instance: - -Azure Container Instance Execution Mode -======================================= -.. versionadded:: 1.4 - -This tutorial will guide you through the steps required to use Azure Container Instance as the Execution Mode for your dbt code with Astronomer Cosmos. Schematically, the guide will walk you through the steps required to build the following architecture: - -.. figure:: https://github.com/astronomer/astronomer-cosmos/raw/main/docs/_static/cosmos_aci_schematic.png - :width: 800 - -Prerequisites -+++++++++++++ -1. Docker with docker daemon (Docker Desktop on MacOS). Follow the `Docker installation guide `_. -2. Airflow -3. Azure CLI (install guide here: `Azure CLI `_) -4. Astronomer-cosmos package containing the dbt Azure Container Instance operators -5. Azure account with: - 1. A resource group - 2. A service principal with `Contributor` permissions on the resource group - 3. A Container Registry - 4. A Postgres instance accessible from Azure. (we use an Azure Postgres instance in the example) -6. Docker image built with required dbt project and dbt DAG -7. dbt DAG with dbt Azure Container Instance operators in the Airflow DAGs directory to run in Airflow - -More information on how to achieve 2-6 is detailed below. - -Note that the steps below will walk you through an example, for which the code can be found HERE - -Step-by-step guide -++++++++++++++++++ - -**Install Airflow and Cosmos** - -Create a python virtualenv, activate it, upgrade pip to the latest version and install apache airflow & astronomer-postgres - -.. code-block:: bash - - python -m venv venv - source venv/bin/activate - pip install --upgrade pip - pip install apache-airflow - pip install "astronomer-cosmos[dbt-postgres,azure-container-instance]" - -**Setup Postgres database** - -You will need a postgres database running to be used as the database for the dbt project. In order to have it accessible from Azure Container Instance, the easiest way is to create an Azure Postgres instance. For this, run the following (assuming you are logged into your Azure account) - -.. code-block:: bash - - az postgres server create -l westeurope -g <<>> -n <<>> -u dbadmin -p <<>> --sku-name B_Gen5_1 --ssl-enforcement Enabled - - -**Setup Azure Container Registry** -In order to run a container in Azure Container Instance, it needs access to the container image. In our setup, we will use Azure Container Registry for this. To set an Azure Container Registry up, you can use the following bash command: - -.. code-block:: bash - - az acr create --name <<>> --resource-group <<>> --sku Basic --admin-enabled - -**Build the dbt Docker image** - -For the Docker operators to work, you need to create a docker image that will be supplied as image parameter to the dbt docker operators used in the DAG. - -Clone the `cosmos-example `_ repo - -.. code-block:: bash - - git clone https://github.com/astronomer/cosmos-example.git - cd cosmos-example - -Create a docker image containing the dbt project files and dbt profile by using the `Dockerfile `_, which will be supplied to the Docker operators. - -.. code-block:: bash - - docker build -t <<>:1.0.0 -f Dockerfile.azure_container_instance . - -After this, the image needs to be pushed to the registry of your choice. Note that your image name should contain the name of your registry: -.. code-block:: bash - - docker push <<>>:1.0.0 - -.. note:: - - You may need to ensure docker knows to use the right credentials. If using Azure Container Registry, this can be done by running the following command: - .. code-block:: bash - - az acr login - -.. note:: - - If running on M1, you may need to set the following envvars for running the docker build command in case it fails - - .. code-block:: bash - - export DOCKER_BUILDKIT=0 - export COMPOSE_DOCKER_CLI_BUILD=0 - export DOCKER_DEFAULT_PLATFORM=linux/amd64 - -Take a read of the Dockerfile to understand what it does so that you could use it as a reference in your project. - - - The `dbt profile `_ file is added to the image - - The dags directory containing the `dbt project jaffle_shop `_ is added to the image - - The dbt_project.yml is replaced with `postgres_profile_dbt_project.yml `_ which contains the profile key pointing to postgres_profile as profile creation is not handled at the moment for K8s operators like in local mode. - -**Setup Airflow Connections** -Now you have the required Azure infrastructure, you still need to add configuration to Airflow to ensure the infrastructure can be used. You'll need 3 connections: - -1. ``aci_db``: a Postgres connection to your Azure Postgres instance. -2. ``aci``: an Azure Container Instance connection configured with a Service Principal with sufficient permissions (i.e. ``Contributor`` on the resource group in which you will use Azure Container Instances). -3. ``acr``: an Azure Container Registry connection configured for your Azure Container Registry. - -Check out the ``airflow-settings.yml`` file `here `_ for an example. If you are using Astro CLI, filling in the right values here will be enough for this to work. - -**Setup and Trigger the DAG with Airflow** - -Copy the dags directory from cosmos-example repo to your Airflow home - -.. code-block:: bash - - cp -r dags $AIRFLOW_HOME/ - -Run Airflow - -.. code-block:: bash - - airflow standalone - -.. note:: - - You might need to run airflow standalone with ``sudo`` if your Airflow user is not able to access the docker socket URL or pull the images in the Kind cluster. - -Log in to Airflow through a web browser ``http://localhost:8080/``, using the user ``airflow`` and the password described in the ``standalone_admin_password.txt`` file. - -Enable and trigger a run of the `jaffle_shop_azure_container_instance `_ DAG. You will be able to see the following successful DAG run. - -.. figure:: https://github.com/astronomer/astronomer-cosmos/raw/main/docs/_static/jaffle_shop_azure_container_instance.png - :width: 800 diff --git a/docs/getting_started/dbt-airflow-concepts.rst b/docs/getting_started/dbt-airflow-concepts.rst index 70c4feae8d..ee55abe694 100644 --- a/docs/getting_started/dbt-airflow-concepts.rst +++ b/docs/getting_started/dbt-airflow-concepts.rst @@ -1,7 +1,7 @@ .. _dbt-airflow-concepts: -Similar dbt & Airflow concepts -============================== +Similar dbt and Airflow concepts +================================ While dbt is an open source tool for data transformations and analysis, using SQL, Airflow focuses on being a platform for the development, scheduling and monitoring of batch-oriented workflows, using Python. Although both tools have many diff --git a/docs/getting_started/docker.rst b/docs/getting_started/docker.rst deleted file mode 100644 index 0005914886..0000000000 --- a/docs/getting_started/docker.rst +++ /dev/null @@ -1,111 +0,0 @@ -.. _docker: - -Docker Execution Mode -======================================== - -The following tutorial illustrates how to run the Cosmos dbt Docker Operators and the required setup for them. - -Requirements -++++++++++++ - -1. Docker with docker daemon (Docker Desktop on MacOS). Follow the `Docker installation guide `_. -2. Airflow -3. Astronomer-cosmos package containing the dbt Docker operators -4. Postgres docker container -5. Docker image built with required dbt project and dbt DAG -6. dbt DAG with dbt docker operators in the Airflow DAGs directory to run in Airflow - -More information on how to achieve 2-6 is detailed below. - -Step-by-step instructions -+++++++++++++++++++++++++ - -**Install Airflow and Cosmos** - -Create a python virtualenv, activate it, upgrade pip to the latest version and install `Apache Airflow® `_ & astronomer-postgres - -.. code-block:: bash - - python -m venv venv - source venv/bin/activate - pip install --upgrade pip - pip install apache-airflow - pip install "astronomer-cosmos[dbt-postgres]" - -**Setup Postgres database** - -You will need a postgres database running to be used as the database for the dbt project. Run the following command to run and expose a postgres database - -.. code-block:: bash - - docker run --name some-postgres -e POSTGRES_PASSWORD="" -e POSTGRES_USER=postgres -e POSTGRES_DB=postgres -p5432:5432 -d postgres - -**Build the dbt Docker image** - -For the Docker operators to work, you need to create a docker image that will be supplied as image parameter to the dbt docker operators used in the DAG. - -Clone the `cosmos-example `_ repo - -.. code-block:: bash - - git clone https://github.com/astronomer/cosmos-example.git - cd cosmos-example - -Create a docker image containing the dbt project files and dbt profile by using the `Dockerfile `_, which will be supplied to the Docker operators. - -.. code-block:: bash - - docker build -t dbt-jaffle-shop:1.0.0 -f Dockerfile.postgres_profile_docker_k8s . - -.. note:: - - If running on M1, you may need to set the following envvars for running the docker build command in case it fails - - .. code-block:: bash - - export DOCKER_BUILDKIT=0 - export COMPOSE_DOCKER_CLI_BUILD=0 - export DOCKER_DEFAULT_PLATFORM=linux/amd64 - -Take a read of the Dockerfile to understand what it does so that you could use it as a reference in your project. - - - The `dbt profile `_ file is added to the image - - The dags directory containing the `dbt project jaffle_shop `_ is added to the image - - The dbt_project.yml is replaced with `postgres_profile_dbt_project.yml `_ which contains the profile key pointing to postgres_profile as profile creation is not handled at the moment for K8s operators like in local mode. - -**Setup and Trigger the DAG with Airflow** - -Copy the dags directory from cosmos-example repo to your Airflow home - -.. code-block:: bash - - cp -r dags $AIRFLOW_HOME/ - -Run Airflow - -.. code-block:: bash - - airflow standalone - -.. note:: - - You might need to run airflow standalone with ``sudo`` if your Airflow user is not able to access the docker socket URL or pull the images in the Kind cluster. - -Log in to Airflow through a web browser ``http://localhost:8080/``, using the user ``airflow`` and the password described in the ``standalone_admin_password.txt`` file. - -Enable and trigger a run of the `jaffle_shop_docker `_ DAG. You will be able to see the following successful DAG run. - -.. figure:: https://github.com/astronomer/astronomer-cosmos/raw/main/docs/_static/jaffle_shop_docker_dag_run.png - :width: 800 - - -Specifying ProfileConfig -+++++++++++++++++++++++++ - -Starting with Cosmos 1.8.0, you can use the ``profile_config`` argument in your Dbt DAG Docker operators to reference -profiles for your dbt project defined in a profiles.yml file. To do so, provide the file’s path via the -``profiles_yml_path`` parameter in ``profile_config``. - -Note that in ``ExecutionMode.DOCKER``, the ``profile_config`` is only compatible with the ``profiles_yml_path`` -approach. The ``profile_mapping`` method will not work because the required Airflow connections cannot be accessed -within the Docker container to map them to the dbt profile. diff --git a/docs/getting_started/execution-modes-local-conflicts.rst b/docs/getting_started/execution-modes-local-conflicts.rst deleted file mode 100644 index 9fec173751..0000000000 --- a/docs/getting_started/execution-modes-local-conflicts.rst +++ /dev/null @@ -1,133 +0,0 @@ -:orphan: - -.. _execution-modes-local-conflicts: - -Airflow and dbt dependencies conflicts -====================================== - -When using the `Local Execution Mode `__, users may face dependency conflicts between -`Apache Airflow® `_ and dbt. The conflicts may increase depending on the Airflow providers and dbt adapters being used. - -If you find errors, we recommend users isolating the installation of dbt from the Airflow installation. -With the `Local Execution Mode `__, this can be accomplished by installing dbt in a separate -Python virtualenv and setting the `ExecutionConfig.dbt_executable_path <../configuration/execution-config.html>`_ and -`RenderConfig.dbt_executable_path <../configuration/render-config.html>`_ parameters. - -The page `execution modes `__ describes many other methods that support isolating dbt from Airflow. - -In the following table, ``x`` represents combinations that lead to conflicts (vanilla ``apache-airflow`` and ``dbt-core`` packages): - -+---------------+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+------+ -| Airflow / DBT | 1.0 | 1.1 | 1.2 | 1.3 | 1.4 | 1.5 | 1.6 | 1.7 | 1.8 | 1.9 | 1.10 | -+===============+=====+=====+=====+=====+=====+=====+=====+=====+=====+=====+======+ -| 2.2 | | | | x | x | x | x | x | x | x | x | -+---------------+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+------+ -| 2.3 | x | x | | x | x | x | x | x | x | x | x | -+---------------+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+------+ -| 2.4 | x | x | x | | | | | | | | | -+---------------+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+------+ -| 2.5 | x | x | x | | | | | | | | | -+---------------+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+------+ -| 2.6 | x | x | x | x | x | | | | | | | -+---------------+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+------+ -| 2.7 | x | x | x | x | x | | | | | | | -+---------------+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+------+ -| 2.8 | x | x | x | x | x | | x | | | | x | -+---------------+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+------+ -| 2.9 | x | x | x | x | x | | | | | | | -+---------------+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+------+ -| 2.10 | x | x | x | x | x | | | | | | | -+---------------+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+------+ -| 2.11 | x | x | x | x | x | | | | | | | -+---------------+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+------+ -| 3.0 | x | x | x | x | x | x | x | x | | | x | -+---------------+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+------+ - -Examples of errors ------------------------------------ - -.. code-block:: bash - - The conflict is caused by: - apache-airflow 2.8.0 depends on pydantic>=2.3.0 - dbt-semantic-interfaces 0.4.2 depends on pydantic~=1.10 - apache-airflow 2.8.0 depends on pydantic>=2.3.0 - dbt-semantic-interfaces 0.4.2.dev0 depends on pydantic~=1.10 - apache-airflow 2.8.0 depends on pydantic>=2.3.0 - dbt-semantic-interfaces 0.4.1 depends on pydantic~=1.10 - apache-airflow 2.8.0 depends on pydantic>=2.3.0 - dbt-semantic-interfaces 0.4.0 depends on pydantic~=1.10 - - -.. code-block:: bash - - ERROR: Cannot install apache-airflow==2.2.4 and dbt-core==1.5.0 because these package versions have conflicting dependencies. - The conflict is caused by: - apache-airflow 2.2.4 depends on jinja2<3.1 and >=2.10.1 - dbt-core 1.5.0 depends on Jinja2==3.1.2 - -.. code-block:: bash - - ERROR: Cannot install apache-airflow==2.6.0 and dbt-core because these package versions have conflicting dependencies. - The conflict is caused by: - apache-airflow 2.6.0 depends on importlib-metadata<5.0.0 and >=1.7; python_version < "3.9" - dbt-semantic-interfaces 0.1.0.dev7 depends on importlib-metadata==6.6.0 - -.. code-block:: bash - - ERROR: Cannot install apache-airflow, apache-airflow==2.7.0 and dbt-core==1.4.0 because these package versions have conflicting dependencies. - - The conflict is caused by: - dbt-core 1.4.0 depends on pyyaml>=6.0 - connexion 2.12.0 depends on PyYAML<6 and >=5.1 - dbt-core 1.4.0 depends on pyyaml>=6.0 - connexion 2.11.2 depends on PyYAML<6 and >=5.1 - dbt-core 1.4.0 depends on pyyaml>=6.0 - connexion 2.11.1 depends on PyYAML<6 and >=5.1 - dbt-core 1.4.0 depends on pyyaml>=6.0 - connexion 2.11.0 depends on PyYAML<6 and >=5.1 - apache-airflow 2.7.0 depends on jsonschema>=4.18.0 - flask-appbuilder 4.3.3 depends on jsonschema<5 and >=3 - connexion 2.10.0 depends on jsonschema<4 and >=2.5.1 - -.. code-block:: bash - -ERROR: Cannot install apache-airflow and dbt-core==1.10.0 because these package versions have conflicting dependencies. - -The conflict is caused by: - dbt-core 1.10.0 depends on pydantic<2 - apache-airflow-core 3.0.0 depends on pydantic>=2.11.0 - - - -How to reproduce ----------------- - -The table was created by running `nox `__ with the following ``noxfile.py``: - -.. code-block:: python - - import nox - - nox.options.sessions = ["compatibility"] - nox.options.reuse_existing_virtualenvs = True - - - @nox.session(python=["3.10"]) - @nox.parametrize( - "dbt_version", - ["1.0", "1.1", "1.2", "1.3", "1.4", "1.5", "1.6", "1.7", "1.8", "1.9", "1.10"], - ) - @nox.parametrize( - "airflow_version", - ["2.2.4", "2.3", "2.4", "2.5", "2.6", "2.7", "2.8", "2.9", "2.10", "2.11", "3.0"], - ) - def compatibility(session: nox.Session, airflow_version, dbt_version) -> None: - """Run both unit and integration tests.""" - session.run( - "pip3", - "install", - "--pre", - f"apache-airflow=={airflow_version}", - f"dbt-core=={dbt_version}", - ) diff --git a/docs/getting_started/execution-modes.rst b/docs/getting_started/execution-modes.rst deleted file mode 100644 index ea6a03f283..0000000000 --- a/docs/getting_started/execution-modes.rst +++ /dev/null @@ -1,374 +0,0 @@ -.. _execution-modes: - -Execution Modes -=============== - -Cosmos can run ``dbt`` commands using several different approaches, called ``execution modes``: - -1. **local**: Run ``dbt`` commands using a local ``dbt`` installation (default) -2. **virtualenv**: Run ``dbt`` commands from Python virtual environments managed by Cosmos -3. **docker**: Run ``dbt`` commands from Docker containers managed by Cosmos (requires a pre-existing Docker image) -4. **kubernetes**: Run ``dbt`` commands from Kubernetes Pods managed by Cosmos (requires a pre-existing Docker image) -5. **aws_eks**: Run ``dbt`` commands from AWS EKS Pods managed by Cosmos (requires a pre-existing Docker image) -6. **azure_container_instance**: Run ``dbt`` commands from Azure Container Instances managed by Cosmos (requires a pre-existing Docker image) -7. **gcp_cloud_run_job**: Run ``dbt`` commands from GCP Cloud Run Job instances managed by Cosmos (requires a pre-existing Docker image) -8. **aws_ecs**: Run ``dbt`` commands from AWS ECS instances managed by Cosmos (requires a pre-existing Docker image) -9. **airflow_async**: (stable since Cosmos 1.9.0) Run the dbt resources from your dbt project asynchronously, by submitting the corresponding compiled SQLs to Apache Airflow's `Deferrable operators `__ -10. **watcher**: (experimental since Cosmos 1.11.0) Run a single ``dbt build`` command from a producer task and have sensor tasks to watch the progress of the producer, with improved DAG run time while maintaining the tasks lineage in the Airflow UI, and ability to retry failed tasks. Check the :ref:`watcher-execution-mode` for more details. -11. **watcher_kubernetes**: (experimental since Cosmos 1.13.0) Combines the speed of the watcher execution mode with the isolation of Kubernetes. Check the :ref:`watcher-kubernetes-execution-mode` for more details. - -The choice of the ``execution mode`` can vary based on each user's needs and concerns. For more details, check each execution mode described below. - -.. _execution-modes-comparison: - -.. list-table:: Execution Modes Comparison - :widths: 25 25 25 25 - :header-rows: 1 - - * - Execution Mode - - Task Duration - - Environment Isolation - - Cosmos Profile Management - * - Local - - Fast - - None - - Yes - * - Virtualenv - - Medium - - Lightweight - - Yes - * - Docker - - Slow - - Medium - - No - * - Kubernetes - - Slow - - High - - No - * - AWS_EKS - - Slow - - High - - No - * - Azure Container Instance - - Slow - - High - - No - * - GCP Cloud Run Job Instance - - Slow - - High - - No - * - AWS ECS - - Slow - - High - - No - * - Airflow Async - - Very Fast - - Medium - - Yes - * - Watcher - - Very Fast - - None - - Yes - * - Watcher Kubernetes - - Fast - - High - - No - -Local ------ - -By default, Cosmos uses the ``local`` execution mode. - -The ``local`` execution mode is the fastest way to run Cosmos operators since they don't install ``dbt`` nor build docker containers. However, it may not be an option for users using managed Airflow services such as -Google Cloud Composer, since Airflow and ``dbt`` dependencies can conflict (:ref:`execution-modes-local-conflicts`), the user may not be able to install ``dbt`` in a custom path. - -The ``local`` execution mode assumes a ``dbt`` binary is reachable within the Airflow worker node. - -If ``dbt`` was not installed as part of the Cosmos packages, -users can define a custom path to ``dbt`` by declaring the argument ``dbt_executable_path``. - -.. note:: - Starting in the 1.4 version, Cosmos tries to leverage the dbt partial parsing (``partial_parse.msgpack``) to speed up task execution. - This feature is bound to `dbt partial parsing limitations `_. - Learn more: :ref:`partial-parsing`. - -When using the ``local`` execution mode, Cosmos converts Airflow Connections into a native ``dbt`` profiles file (``profiles.yml``). - -Example of how to use, for instance, when ``dbt`` was installed together with Cosmos: - -.. literalinclude:: ../../dev/dags/basic_cosmos_dag.py - :language: python - :start-after: [START local_example] - :end-before: [END local_example] - - -Virtualenv ----------- - -If you're using managed Airflow on GCP (Cloud Composer), for instance, we recommend you use the ``virtualenv`` execution mode. - -The ``virtualenv`` mode isolates the Airflow worker dependencies from ``dbt`` by managing a Python virtual environment created during task execution and deleted afterwards. - -In this case, users are responsible for declaring which version of ``dbt`` they want to use by giving the argument ``py_requirements``. This argument can be set directly in operator instances or when instantiating ``DbtDag`` and ``DbtTaskGroup`` as part of ``operator_args``. - -Similar to the ``local`` execution mode, Cosmos converts Airflow Connections into a way ``dbt`` understands them by creating a ``dbt`` profile file (``profiles.yml``). -Also similar to the ``local`` execution mode, Cosmos will by default attempt to use a ``partial_parse.msgpack`` if one exists to speed up parsing. - -Some drawbacks of this approach: - -- It is slower than ``local`` because it creates a new Python virtual environment for each Cosmos dbt task run. -- If dbt is unavailable in the Airflow scheduler, the default ``LoadMode.DBT_LS`` will not work. In this scenario, users must use a :ref:`parsing-methods` that does not rely on dbt, such as ``LoadMode.MANIFEST``. -- Only ``InvocationMode.SUBPROCESS`` is supported currently, attempt to use ``InvocationMode.DBT_RUNNER`` will raise error. - -Example of how to use: - -.. literalinclude:: ../../dev/dags/example_virtualenv.py - :language: python - :start-after: [START virtualenv_example] - :end-before: [END virtualenv_example] - -Docker ------- - -The ``docker`` approach assumes users have a previously created Docker image, which should contain all the ``dbt`` pipelines and a ``profiles.yml``, managed by the user. - -The user has better environment isolation than when using ``local`` or ``virtualenv`` modes, but also more responsibility (ensuring the Docker container used has up-to-date files and managing secrets potentially in multiple places). - -The other challenge with the ``docker`` approach is if the Airflow worker is already running in Docker, which sometimes can lead to challenges running `Docker in Docker `__. - -This approach can be significantly slower than ``virtualenv`` since it may have to build the ``Docker`` container, which is slower than creating a Virtualenv with ``dbt-core``. -If dbt is unavailable in the Airflow scheduler, the default ``LoadMode.DBT_LS`` will not work. In this scenario, users must use a :ref:`parsing-methods` that does not rely on dbt, such as ``LoadMode.MANIFEST``. - -Check the step-by-step guide on using the ``docker`` execution mode at :ref:`docker`. - -Example DAG: - -.. code-block:: python - - docker_cosmos_dag = DbtDag( - # ... - execution_config=ExecutionConfig( - execution_mode=ExecutionMode.DOCKER, - ), - operator_args={ - "image": "dbt-jaffle-shop:1.0.0", - "network_mode": "bridge", - }, - ) - - -Kubernetes ----------- - -The ``kubernetes`` approach is a very isolated way of running ``dbt`` since the ``dbt`` run commands from within a Kubernetes Pod, usually in a separate host. - -It assumes the user has a Kubernetes cluster. It also expects the user to ensure the Docker container has up-to-date ``dbt`` pipelines and profiles, potentially leading the user to declare secrets in two places (Airflow and Docker container). - -The ``Kubernetes`` deployment may be slower than ``Docker`` and ``Virtualenv`` assuming that the container image is built (which is slower than creating a Python ``virtualenv`` and installing ``dbt-core``) and the Airflow task needs to spin up a new ``Pod`` in Kubernetes. - -Check the step-by-step guide on using the ``kubernetes`` execution mode at :ref:`kubernetes`. - -Example DAG: - -.. literalinclude:: ../../dev/dags/jaffle_shop_kubernetes.py - :language: python - :start-after: [START kubernetes_seed_example] - :end-before: [END kubernetes_seed_example] - -AWS_EKS ----------- - -The ``aws_eks`` approach is very similar to the ``kubernetes`` approach, but it is specifically designed to run on AWS EKS clusters. -It uses the `EKSPodOperator `_ -to run the dbt commands. You need to provide the ``cluster_name`` in your operator_args to connect to the AWS EKS cluster. - - -Example DAG: - -.. code-block:: python - - postgres_password_secret = Secret( - deploy_type="env", - deploy_target="POSTGRES_PASSWORD", - secret="postgres-secrets", - key="password", - ) - - docker_cosmos_dag = DbtDag( - # ... - execution_config=ExecutionConfig( - execution_mode=ExecutionMode.AWS_EKS, - ), - operator_args={ - "image": "dbt-jaffle-shop:1.0.0", - "cluster_name": CLUSTER_NAME, - "get_logs": True, - "is_delete_operator_pod": False, - "secrets": [postgres_password_secret], - }, - ) - -Azure Container Instance ------------------------- -.. versionadded:: 1.4 - -Similar to the ``kubernetes`` approach, using ``Azure Container Instances`` as the execution mode gives a very isolated way of running ``dbt``, since the ``dbt`` run itself is run within a container running in an Azure Container Instance. - -This execution mode requires the user has an Azure environment that can be used to run Azure Container Groups in (see :ref:`azure-container-instance` for more details on the exact requirements). Similarly to the ``Docker`` and ``Kubernetes`` execution modes, a Docker container should be available, containing the up-to-date ``dbt`` pipelines and profiles. - -Each task will create a new container on Azure, giving full isolation. This, however, comes at the cost of speed, as this separation of tasks introduces some overhead. Please checkout the step-by-step guide for using Azure Container Instance as the execution mode - - -.. code-block:: python - - docker_cosmos_dag = DbtDag( - # ... - execution_config=ExecutionConfig( - execution_mode=ExecutionMode.AZURE_CONTAINER_INSTANCE - ), - operator_args={ - "ci_conn_id": "aci", - "registry_conn_id": "acr", - "resource_group": "my-rg", - "name": "my-aci-{{ ti.task_id.replace('.','-').replace('_','-') }}", - "region": "West Europe", - "image": "dbt-jaffle-shop:1.0.0", - }, - ) - -GCP Cloud Run Job ------------------------- -.. versionadded:: 1.7 - -The ``gcp_cloud_run_job`` execution mode is particularly useful for users who prefer to run their ``dbt`` commands on Google Cloud infrastructure, taking advantage of Cloud Run's scalability, isolation, and managed service capabilities. - -For the ``gcp_cloud_run_job`` execution mode to work, a Cloud Run Job instance must first be created using a previously built Docker container. This container should include the latest ``dbt`` pipelines and profiles. You can find more details in the `Cloud Run Job creation guide `__ . - -This execution mode allows users to run ``dbt`` core CLI commands in a Google Cloud Run Job instance. This mode leverages the ``CloudRunExecuteJobOperator`` from the Google Cloud Airflow provider to execute commands within a Cloud Run Job instance, where ``dbt`` is already installed. Similarly to the ``Docker`` and ``Kubernetes`` execution modes, a Docker container should be available, containing the up-to-date ``dbt`` pipelines and profiles. - -Each task will create a new Cloud Run Job execution, giving full isolation. The separation of tasks adds extra overhead; however, that can be mitigated by using the ``concurrency`` parameter in ``DbtDag``, which will result in parallelized execution of ``dbt`` models. - - -.. code-block:: python - - gcp_cloud_run_job_cosmos_dag = DbtDag( - # ... - execution_config=ExecutionConfig(execution_mode=ExecutionMode.GCP_CLOUD_RUN_JOB), - operator_args={ - "project_id": "my-gcp-project-id", - "region": "europe-west1", - "job_name": "my-crj-{{ ti.task_id.replace('.','-').replace('_','-') }}", - }, - ) - - -AWS ECS ---------- -.. versionadded:: 1.9.0 - -Using ``AWS Elastic Container Service (ECS)`` as the execution mode provides an isolated and scalable way to run ``dbt`` tasks within an AWS ECS service. This execution mode ensures that each ``dbt`` run is performed inside a dedicated container running in an ECS task. - -This execution mode requires the user to have an AWS environment configured to run ECS tasks (see :ref:``aws-ecs`` for more details on the exact requirements). Similar to the ``Docker`` and ``Kubernetes`` execution modes, a Docker container should be available, containing the up-to-date ``dbt`` pipelines and profiles. - -Each task will create a new ECS task execution, providing full isolation. However, this separation introduces some overhead in execution time due to container startup and provisioning. For users who require faster execution times, configuring appropriate ECS task definitions and cluster optimizations can help mitigate these delays. - -Please refer to the step-by-step guide for using AWS ECS as the execution mode. - -.. code-block:: python - - aws_ecs_cosmos_dag = DbtDag( - # ... - execution_config=ExecutionConfig(execution_mode=ExecutionMode.AWS_ECS), - operator_args={ - "aws_conn_id": "aws_default", - "cluster": "my-ecs-cluster", - "task_definition": "my-dbt-task", - "container_name": "dbt-container", - "launch_type": "FARGATE", - "deferrable": True, - "network_configuration": { - "awsvpcConfiguration": { - "subnets": ["<<>>"], - "assignPublicIp": "ENABLED", - }, - }, - "environment_variables": {"DBT_PROFILE_NAME": "default"}, - }, - ) - -.. _airflow-async-execution-mode: - -Airflow Async -------------- - -.. versionadded:: 1.9.0 - -Although this execution mode was introduced in Cosmos 1.9, we strongly encourage users to use Cosmos 1.11, which has significant performance improvements. -In comparison to the ``local``, the ``airflow_async`` execution mode can reduce the execution time of a dbt project by up to 36%. - -The ``airflow_async`` execution mode is a way to run the dbt resources from your dbt project using Apache Airflow's -`Deferrable operators `__. -This execution mode could be preferred when you've long running resources and you want to run them asynchronously by -leveraging Airflow's deferrable operators. With that, you would be able to potentially observe higher throughput of tasks -as more dbt nodes will be run in parallel since they won't be blocking Airflow's worker slots. - -Example DAG: - -.. literalinclude:: ../../dev/dags/simple_dag_async.py - :language: python - :start-after: [START airflow_async_execution_mode_example] - :end-before: [END airflow_async_execution_mode_example] - -For a full step-by-step guide and limitations, check the :ref:`async-execution-mode` page. - - -Watcher Execution Mode (Experimental) -------------------------------------- - -.. versionadded:: 1.11.0 - -The ``watcher`` execution mode is an experimental execution mode that runs a single ``dbt build`` command from a producer task and has sensor tasks to watch the progress of the producer. -It is designed to improve DAG run time while maintaining the tasks lineage in the Airflow UI, and ability to retry failed tasks. - -Check the :ref:`watcher-execution-mode` for more details. - - -Watcher Kubernetes Execution Mode (Experimental) ------------------------------------------------- - -.. versionadded:: 1.13.0 - -The ``watcher_kubernetes`` execution mode combines the speed of the ``watcher`` execution mode with the isolation of the ``kubernetes`` execution mode. It runs a single ``dbt build`` command from a producer task inside a Kubernetes pod and has sensor tasks to watch the progress of the producer. - -Check the :ref:`watcher-kubernetes-execution-mode` for more details. - - -.. _invocation_modes: - -Invocation Modes -================ -.. versionadded:: 1.4 - -For ``ExecutionMode.LOCAL`` execution mode, Cosmos supports two invocation modes for running dbt: - -1. ``InvocationMode.SUBPROCESS``: In this mode, Cosmos runs dbt cli commands using the Python ``subprocess`` module and parses the output to capture logs and to raise exceptions. - -2. ``InvocationMode.DBT_RUNNER``: In this mode, Cosmos uses the ``dbtRunner`` available for `dbt programmatic invocations `__ to run dbt commands. \ - In order to use this mode, dbt must be installed in the same local environment. This mode does not have the overhead of spawning new subprocesses or parsing the output of dbt commands and is faster than ``InvocationMode.SUBPROCESS``. \ - This mode requires dbt version 1.5.0 or higher. It is up to the user to resolve :ref:`execution-modes-local-conflicts` when using this mode. - -The invocation mode can be set in the ``ExecutionConfig`` as shown below: - -.. code-block:: python - - from cosmos.constants import InvocationMode - - dag = DbtDag( - # ... - execution_config=ExecutionConfig( - execution_mode=ExecutionMode.LOCAL, - invocation_mode=InvocationMode.DBT_RUNNER, - ), - ) - -If the invocation mode is not set, Cosmos will attempt to use ``InvocationMode.DBT_RUNNER`` if dbt is installed in the same environment as the worker, otherwise it will fall back to ``InvocationMode.SUBPROCESS``. diff --git a/docs/getting_started/gcp-cloud-run-job.rst b/docs/getting_started/gcp-cloud-run-job.rst deleted file mode 100644 index fa4d0c60c4..0000000000 --- a/docs/getting_started/gcp-cloud-run-job.rst +++ /dev/null @@ -1,265 +0,0 @@ -.. _gcp-cloud-run-job: - -GCP Cloud Run Job Execution Mode -======================================= -.. versionadded:: 1.7 - -This tutorial will guide you through the steps required to use Cloud Run Job instance as the Execution Mode for your dbt code with Astronomer Cosmos. This guide will walk you through the steps required to build the following architecture: - -.. figure:: https://github.com/astronomer/astronomer-cosmos/raw/main/docs/_static/cosmos_gcp_crj_schematic.png - :width: 600 - -Prerequisites -+++++++++++++ -1. Docker with docker daemon (Docker Desktop on MacOS). Follow the `Docker installation guide `_. -2. Airflow -3. Google Cloud SDK (`install guide `_) -4. Astronomer-cosmos package containing the dbt Cloud Run Job operators -5. GCP account with: - 1. A GCP project (`setup guide `_) - 2. IAM roles: - * Basic Role: `Owner `_ (control over whole project) or - * Predefined Roles: `Artifact Registry Administrator `_, `Cloud Run Developer `_ (control over specific services) - 3. Enabled service APIs: - * Artifact Registry API - * Cloud Run Admin API - * BigQuery API - 4. A service account with BigQuery roles: `JobUser `_ and `DataEditor `_ -6. Docker image built with required dbt project and dbt DAG -7. dbt DAG with Cloud Run Job operators in the Airflow DAGs directory to run in Airflow - -.. note:: - - Google Cloud Platform provides free tier on many resources, as well as Free Trial with $300 in credit. Learn more `here `_. - -More information on how to achieve 2-6 is detailed below. - - -Step-by-step guide -++++++++++++++++++ - -**Install Airflow and Cosmos** - -Create a python virtualenv, activate it, upgrade pip to the latest version and install ``apache airflow`` & ``astronomer cosmos``: - -.. code-block:: bash - - python3 -m venv venv - source venv/bin/activate - python3 -m pip install --upgrade pip - pip install apache-airflow - pip install "astronomer-cosmos[dbt-bigquery,gcp-cloud-run-job]" - -**Setup gcloud and environment variables** - -Set environment variables that will be used to create cloud infrastructure. Replace placeholders with your unique GCP ``project id`` and ``region`` of the project: - -.. code-block:: bash - - export PROJECT_ID=<<>> - export REGION=<<>> - export REPO_NAME="astronomer-cosmos-dbt" - export IMAGE_NAME="$REGION-docker.pkg.dev/$PROJECT_ID/$REPO_NAME/cosmos-example" - export SERVICE_ACCOUNT_NAME="cloud-run-job-sa" - export DATASET_NAME="astronomer_cosmos_example" - export CLOUD_RUN_JOB_NAME="astronomer-cosmos-example" - -Before we do anything in the GCP project, we first need to authorize gcloud to access the Cloud Platform with Google user credentials: - -.. code-block:: bash - - gcloud auth login - -You'll receive a link to sign into Google Cloud SDK using a Google Account. - -Next, set default ``project id`` using below command: - -.. code-block:: bash - - gcloud config set project $PROJECT_ID - -In case BigQuery has never been used before in the project, run below command to enable BigQuery API: - -.. code-block:: bash - - gcloud services enable bigquery.googleapis.com - -**Setup Artifact Registry** - -In order to run a container in Cloud Run Job, it needs access to the container image. In our setup, we will use Artifact Registry repository that stores images. -To use Artifact Registry, you need to enable the API first: - -.. code-block:: bash - - gcloud services enable artifactregistry.googleapis.com - -To set an Artifact Registry repository up, you can use the following bash command: - -.. code-block:: bash - - gcloud artifacts repositories create $REPO_NAME \ - --repository-format=docker \ - --location=$REGION \ - --project $PROJECT_ID - -**Setup Service Account** - -In order to use dbt and make transformations in BigQuery, Cloud Run Job needs some BigQuery permissions. One way to achieve that is to set up a separate ``Service Account`` with needed permissions: - -.. code-block:: bash - - # create a service account - gcloud iam service-accounts create $SERVICE_ACCOUNT_NAME - -.. code-block:: bash - - # grant JobUser role - gcloud projects add-iam-policy-binding $PROJECT_ID \ - --member="serviceAccount:$SERVICE_ACCOUNT_NAME@$PROJECT_ID.iam.gserviceaccount.com" \ - --role="roles/bigquery.jobUser" - -.. code-block:: bash - - # grant DataEditor role - gcloud projects add-iam-policy-binding $PROJECT_ID \ - --member="serviceAccount:$SERVICE_ACCOUNT_NAME@$PROJECT_ID.iam.gserviceaccount.com" \ - --role="roles/bigquery.dataEditor" - -**Build the dbt Docker image** - -Now, we are going to download an example dbt project and build a Docker image with it. - -.. important:: - - You need to ensure Docker is using the right credentials to push images. For Artifact Registry, this can be done by running the following command: - - .. code-block:: bash - - gcloud auth print-access-token | docker login -u oauth2accesstoken --password-stdin https://$REGION-docker.pkg.dev - - The token will be valid for 1 hour. After that, you need to create another one, if still needed. - -Clone the `cosmos-example `_ repo: - -.. code-block:: bash - - git clone https://github.com/astronomer/cosmos-example.git - cd cosmos-example - -Open `Dockerfile `_ located in ``gcp_cloud_run_job_example`` folder and change environments variables ``GCP_PROJECT_ID`` and ``GCP_REGION`` to your GCP project id and project region. - -Build a Docker image using previously modified ``Dockerfile``, which will be used by Cloud Run Job: - -.. code-block:: bash - - docker build -t $IMAGE_NAME -f gcp_cloud_run_job_example/Dockerfile.gcp_cloud_run_job . - -.. important:: - - Make sure to stay in ``cosmos-example`` directory when running ``docker build`` command. - -After this, the image needs to be pushed to the Artifact Registry: - -.. code-block:: bash - - docker push $IMAGE_NAME - -Take a read of the Dockerfile to understand what it does so that you could use it as a reference in your project. - - - The dags directory containing the `dbt project jaffle_shop `_ is added to the image - - The `bigquery dbt profile `_ file is added to the image - - The dbt_project.yml is replaced with `bigquery_profile_dbt_project.yml `_ which contains the profile key pointing to postgres_profile as profile creation is not handled at the moment for K8s operators like in local mode. - -**Create Cloud Run Job instance** - -When the image is pushed to Artifact Registry, you can finally create Cloud Run Job with the image and previously created service account. - -First, enable Cloud Run Admin API using below command: - -.. code-block:: bash - - gcloud services enable run.googleapis.com - - -Next, set default Cloud Run region to your GCP region: - -.. code-block:: bash - - gcloud config set run/region $REGION - -Then, run below command to create Cloud Run Job instance: - -.. code-block:: bash - - gcloud run jobs create $CLOUD_RUN_JOB_NAME \ - --image=$IMAGE_NAME \ - --task-timeout=180s \ - --max-retries=0 \ - --cpu=1 \ - --memory=512Mi \ - --service-account=$SERVICE_ACCOUNT_NAME@$PROJECT_ID.iam.gserviceaccount.com - -**Setup Airflow Connections** - -Now, when you have the required Google Cloud infrastructure, you still need to check Airflow configuration to ensure the infrastructure can be used. You'll need a ``google_cloud_default`` connection in order to work on GCP resources. - -Check out an `example `_ of the ``airflow-settings.yml`` file. If you are using Astro CLI, filling in the right values here will be enough for this to work. - -**Setup and Trigger the DAG with Airflow** - -Open `jaffle_shop_gcp_cloud_run_job `_ DAG file and update ``GCP_PROJECT_ID`` and ``GCP_LOCATION`` constants with your GCP project id and project region. - -When the DAG is configured, copy the ``dags`` directory from ``cosmos-example`` repo to your Airflow home: - -.. code-block:: bash - - cp -r dags $AIRFLOW_HOME/ - -Run Airflow: - -.. code-block:: bash - - airflow standalone - -.. note:: - - You might need to run airflow standalone with ``sudo`` if your Airflow user is not able to access the docker socket URL or pull the images in the Kind cluster. - -Log in to Airflow through a web browser ``http://localhost:8080/``, using the user ``airflow`` and the password described in the ``standalone_admin_password.txt`` file. - -Enable and trigger a run of the `jaffle_shop_gcp_cloud_run_job `_ DAG. You will be able to see the following successful DAG run. - -.. figure:: https://github.com/astronomer/astronomer-cosmos/raw/main/docs/_static/jaffle_shop_gcp_cloud_run_job.png - :width: 800 - - -You can also verify the tables that were created using dbt in BigQuery Studio: - -.. figure:: https://github.com/astronomer/astronomer-cosmos/raw/main/docs/_static/jaffle_shop_big_query.png - :width: 800 - - -**Delete resources** - -After the successful tests, don't forget to delete Google Cloud resources to save up costs: - -.. code-block:: bash - - # Delete Cloud Run Job instance - - gcloud run jobs delete $CLOUD_RUN_JOB_NAME - -.. code-block:: bash - - # Delete BigQuery main and custom dataset specified in dbt schema.yml with all tables included - - bq rm -r -f -d $PROJECT_ID:$DATASET_NAME - - bq rm -r -f -d $PROJECT_ID:dbt_dev - -.. code-block:: bash - - # Delete Artifact Registry repository with all images included - - gcloud artifacts repositories delete $REPO_NAME \ - --location=$REGION diff --git a/docs/getting_started/index.rst b/docs/getting_started/index.rst index dab959c563..1dea6a496b 100644 --- a/docs/getting_started/index.rst +++ b/docs/getting_started/index.rst @@ -1,28 +1,32 @@ .. _getting-started: .. toctree:: + :maxdepth: 1 :hidden: - :caption: Contents: + :caption: Cosmos Fundamentals Astro CLI quickstart + Similar dbt and Airflow concepts + +.. toctree:: + :maxdepth: 1 + :hidden: + :caption: Get started with Cosmos + + Open-source Airflow Astro - MWAA - GCC - Open-Source - Execution Modes - Docker Execution Mode - Kubernetes Execution Mode - Azure Container Instance Execution Mode - AWS Container Run Job Execution Mode - GCP Cloud Run Job Execution Mode - Airflow Async Execution Mode - Watcher Execution Mode - Watcher Kubernetes Execution Mode - dbt and Airflow Similar Concepts + Google Cloud Composer (GCC) + Amazon Managed Workflows for Apache Airflow (MWAA) + + +.. toctree:: + :maxdepth: 1 + :hidden: + :caption: Operators + Operators Custom Airflow Properties - Getting Started =============== @@ -83,8 +87,15 @@ For more customization, check out the different execution modes that Cosmos supp For specific guides, see the following: -- `Executing dbt Dags with DockerOperators `__ -- `Executing dbt Dags with KubernetesPodOperators `__ -- `Executing dbt Dags with Watcher Kubernetes Mode `__ -- `Executing dbt Dags with AzureContainerInstancesOperators `__ -- `Executing dbt Dags with GcpCloudRunExecuteJobOperators `__ +- `Executing dbt DAGs with Docker Operators `__ +- `Executing dbt DAGs with KubernetesPodOperators `__ +- `Executing dbt DAGs with Watcher Kubernetes Mode `__ +- `Executing dbt DAGs with AzureContainerInstancesOperators `__ +- `Executing dbt DAGs with GcpCloudRunExecuteJobOperators `__ + + +Concepts Overview +----------------- + +How do dbt and Airflow concepts map to each other? Learn more `in this link `__. + diff --git a/docs/getting_started/kubernetes.rst b/docs/getting_started/kubernetes.rst deleted file mode 100644 index 607ba07bd7..0000000000 --- a/docs/getting_started/kubernetes.rst +++ /dev/null @@ -1,167 +0,0 @@ -.. _kubernetes: - -Kubernetes Execution Mode -============================================== - -The following tutorial illustrates how to run the Cosmos dbt Kubernetes Operator using a local Kubernetes (K8s) cluster. It assumes the following: - -- Postgres is run in the Kubernetes (K8s) cluster as a container -- Airflow is run locally, and it triggers a K8s Pod which runs dbt - -Requirements -++++++++++++ - -To test the DbtKubernetesOperators locally, we encourage you to install the following: - -- Local Airflow (either standalone or using Astro CLI) -- `Kind `_ to run K8s locally -- `Helm `_ to install Postgres in K8s -- `Docker `_ to create the dbt container image, which will allow Airflow to create a K8s pod which will run dbt - -At the moment, the user is expected to add to the Docker image both: - -- The dbt project files -- The dbt Profile, which contains the information for dbt to access the database while parsing the project from Apache Airflow nodes -- Handle secrets - -Additional KubernetesPodOperator parameters can be added to the ``operator_args`` parameter of the ``DbtKubernetesOperator``. - -For instance, - -.. literalinclude:: ../../dev/dags/jaffle_shop_kubernetes.py - :language: python - :start-after: [START kubernetes_tg_example] - :end-before: [END kubernetes_tg_example] - -Step-by-step instructions -+++++++++++++++++++++++++ - -Using installed `Kind `_, you can setup a local kubernetes cluster - -.. code-block:: bash - - kind create cluster - -Deploy a Postgres pod to Kind using `Helm `_ - -.. code-block:: bash - - helm repo add bitnami https://charts.bitnami.com/bitnami - helm repo update - helm install postgres bitnami/postgresql - -Retrieve the Postgres password and set it as an environment variable. - -.. code-block:: bash - - export POSTGRES_PASSWORD=$(kubectl get secret --namespace default postgres-postgresql -o jsonpath="{.data.postgres-password}" | base64 -d) - -Check that the environment variable was set and that it is not empty - -.. code-block:: bash - - echo $POSTGRES_PASSWORD - -Expose the Postgres to the host running Docker/Kind. - -.. code-block:: bash - - kubectl port-forward --namespace default postgres-postgresql-0 5432:5432 - -Check that you're able to connect to the exposed pod. - -.. code-block:: bash - - PGPASSWORD="$POSTGRES_PASSWORD" psql --host 127.0.0.1 -U postgres -d postgres -p 5432 - - postgres=# \dt - \q - -Create a K8s secret which contains the credentials to access Postgres. - -.. code-block:: bash - - kubectl create secret generic postgres-secrets --from-literal=host=postgres-postgresql.default.svc.cluster.local --from-literal=password=$POSTGRES_PASSWORD - -Clone the example repo that contains the Airflow DAG and dbt project files. - -.. code-block:: bash - - git clone https://github.com/astronomer/cosmos-example.git - cd cosmos-example/ - -Create a Docker image containing the dbt project files and dbt profile by using the `Dockerfile `_, which will be run in K8s. - -.. code-block:: bash - - docker build -t dbt-jaffle-shop:1.0.0 -f Dockerfile.postgres_profile_docker_k8s . - -.. note:: - - If running on M1, you may need to set the following environment variables to run the Docker build command in case it fails. - - .. code-block:: bash - - export DOCKER_BUILDKIT=0 - export COMPOSE_DOCKER_CLI_BUILD=0 - export DOCKER_DEFAULT_PLATFORM=linux/amd64 - -Take a look at the Dockerfile to understand its purpose so that you can use it as a reference in your project. - - - The `dbt profile `__ file is added to the image - - The dags directory containing the `dbt project jaffle_shop `_ is added to the image - - The dbt_project.yml is replaced with `postgres_profile_dbt_project.yml `_ which contains the profile key pointing to postgres_profile as profile creation is not handled at the moment for K8s operators like in local mode. - -Make the build image available in the Kind K8s cluster. - -.. code-block:: bash - - kind load docker-image dbt-jaffle-shop:1.0.0 - -Create a Python virtual environment and install the latest version of Astronomer Cosmos, which contains the K8s Operator. - -.. code-block:: bash - - python -m venv venv - source venv/bin/activate - pip install --upgrade pip - pip install "astronomer-cosmos[dbt-postgres]" apache-airflow-providers-cncf-kubernetes - -Make the `jaffle_shop_kubernetes.py `__ file at your Airflow DAG home: - -.. code-block:: bash - - cp -r dags $AIRFLOW_HOME/ - -Run Airflow - -.. code-block:: bash - - airflow standalone - -.. note:: - - You may need to run Airflow standalone with ``sudo`` if your Airflow user is unable to access the Docker socket URL or pull images in the Kind cluster. - -Log in to Airflow through a web browser ``http://localhost:8080/``, using the user ``airflow`` and the password described in the ``standalone_admin_password.txt`` file. - -Enable and trigger a run of the `jaffle_shop_k8s `_ DAG. You will be able to see the following successful DAG run. - -.. figure:: https://github.com/astronomer/astronomer-cosmos/raw/main/docs/_static/jaffle_shop_k8s_dag_run.png - :width: 800 - -.. _kubernetes-known-limitations: - -Known Limitations -+++++++++++++++++ - -The Kubernetes execution mode has the following limitations: - -- Does not emit OpenLineage events (there is an `open ticket #496 `__ to address this) -- Does not emit Airflow datasets, assets, and dataset aliases (there is an `open ticket #2329 `__ to address this) -- Does not handle installing dbt deps for users (there is an `open ticket #679 `__ to address this) -- Does not support `ProfileMapping `_ (there is an `open ticket #749 `__ to address this) -- Does not support `Callbacks `_ (there is an `open ticket #1575 `__ to address this) -- Does not expose Compiled SQL as a `templated field `_ -- Does not benefit from `Cosmos caching mechanisms `_ -- Does not support `generating dbt docs & uploading to an object store `_ (there is a `PR `_ to solve this for S3) diff --git a/docs/getting_started/mwaa.rst b/docs/getting_started/mwaa.rst index 5b7c41bde5..5b1da23439 100644 --- a/docs/getting_started/mwaa.rst +++ b/docs/getting_started/mwaa.rst @@ -1,7 +1,7 @@ .. _mwaa: -Getting Started on MWAA -======================= +Getting Started with Cosmos on Amazon Managed Workflows +======================================================= Users can face Python dependency issues when trying to use the Cosmos `Local Execution Mode `_ in Amazon Managed Workflows for `Apache Airflow® `_ (MWAA). diff --git a/docs/getting_started/open-source.rst b/docs/getting_started/open-source.rst index ba9bbdb15c..f5d1db832b 100644 --- a/docs/getting_started/open-source.rst +++ b/docs/getting_started/open-source.rst @@ -1,7 +1,7 @@ .. _open-source: -Getting Started on Open Source Airflow -====================================== +Getting Started with Cosmos on Open-source Airflow +================================================== When running open-source Airflow, your setup may vary. This guide assumes you have access to edit the underlying image. diff --git a/docs/getting_started/watcher-execution-mode.rst b/docs/getting_started/watcher-execution-mode.rst deleted file mode 100644 index af7589650c..0000000000 --- a/docs/getting_started/watcher-execution-mode.rst +++ /dev/null @@ -1,480 +0,0 @@ -.. _watcher-execution-mode: - -Introducing ``ExecutionMode.WATCHER``: Experimental High-Performance dbt Execution in Cosmos -============================================================================================ - -With the release of **Cosmos 1.11.0**, we are introducing a powerful new experimental execution mode — ``ExecutionMode.WATCHER`` — designed to drastically reduce dbt pipeline run times in Airflow. - -Early benchmarks show that ``ExecutionMode.WATCHER`` can cut total DAG runtime **by up to 80%**, bringing performance **on par with running dbt CLI locally**. Since this execution mode improves the performance by leveraging `dbt threading `_ and Airflow deferrable sensors, the performance gains will depend on three major factors: - -- The amount of dbt ``threads`` set either via the dbt profile configuration or the dbt ``--threads`` flag -- The topology of the dbt pipeline -- The ``poke_interval`` and ``timeout`` settings of the ``DbtConsumerWatcherSensor`` operator, which determine the frequency and duration of the sensor's polling. - -------------------------------------------------------------------------------- - -Background: The Problem with the Local Execution Mode in Cosmos ---------------------------------------------------------------- - -When running dbt via Cosmos using the default ``ExecutionMode.LOCAL``, each dbt model is executed as a separate Airflow task. - -This provides strong observability and task-level retry control — but it comes at a cost. Each model runs a new dbt process, which introduces significant overhead. - -Consider the `google/fhir-dbt-analytics `_ project: - -+-------------------------------------------------------------+-----------------------------------+------------------+ -| Run Type | Description | Total Runtime | -+=============================================================+===================================+==================+ -| Single ``dbt run`` (dbt CLI) | Runs the whole DAG in one command | ~5m 30s | -+-------------------------------------------------------------+-----------------------------------+------------------+ -| One ``dbt run`` per model, totalling 184 commands (dbt CLI) | Each model is its own task | ~32m | -+-------------------------------------------------------------+-----------------------------------+------------------+ - -This difference motivated a rethinking of how Cosmos interacts with dbt. - -------------------------------------------------------------------------------- - -Concept: ``ExecutionMode.WATCHER`` ----------------------------------- - -``ExecutionMode.WATCHER`` combines the **speed of a single dbt run** with the **observability and task management of Airflow**. - -It is built on two operator types: - -* ``DbtProducerWatcherOperator`` (`#1982 `_) - Runs dbt **once** across the entire pipeline, register to `dbt event callbacks `_ and sends model progress updates via Airflow **XComs**. - -* ``DbtConsumerWatcherSensor`` (`#1998 `_) - Watches those XComs and marks individual Airflow tasks as complete when their corresponding dbt models finish. - -Together, these operators let you: - -* Run dbt as a single command (for speed) -* Retain model-level observability (for clarity) -* Retry specific models (for resilience) - -------------------------------------------------------------------------------- - -Performance Gains ------------------ - -We used a dbt project developed by Google, the `google/fhir-dbt-analytics `_ project, that interfaces with BigQuery. It contains: -* 2 seeds -* 52 sources -* 185 models - -Initial benchmarks, using illustrate significant improvements: - -+-----------------------------------------------+-----------+--------------------+ -| Environment | Threads | Execution Time (s) | -+===============================================+===========+====================+ -| dbt build (dbt CLI) | 4 | 6–7 | -+-----------------------------------------------+-----------+--------------------+ -| dbt run per model (dbt CLI) | — | 30 | -| similar to the Cosmos ``ExecutionMode.LOCAL`` | | | -+-----------------------------------------------+-----------+--------------------+ -| Cosmos ``ExecutionMode.LOCAL`` (Astro CLI) | — | 10–15 | -+-----------------------------------------------+-----------+--------------------+ -| Cosmos ``ExecutionMode.WATCHER`` (Astro CLI) | 1 | 26 | -| | 2 | 14 | -| | 4 | 7 | -| | 8 | 4 | -| | 16 | 2 | -+-----------------------------------------------+-----------+--------------------+ -| Cosmos ``ExecutionMode.WATCHER`` (Astro Cloud | 8 | ≈5 | -| Standard Deployment with A10 workers | | | -+-----------------------------------------------+-----------+--------------------+ - -The last line represents the performance improvement in a real-world Airflow deployment, using `Astro Cloud `_. - -Depending on the dbt workflow topology, if your dbt DAG previously took 5 minutes with ``ExecutionMode.LOCAL``, you can expect it to complete in roughly **1 minute** with ``ExecutionMode.WATCHER``. - -We plan to repeat these benchmarks and share the code with the community in the future. - - -.. note:: - ``ExecutionMode.WATCHER`` relies on the ``threads`` value defined in your dbt profile. Start with a conservative value that matches the CPU capacity of your Airflow workers, then gradually increase it to find the sweet spot between faster runs and acceptable memory/CPU usage. - -When we ran the `astronomer/cosmos-benchmark `_ project with ``ExecutionMode.WATCHER``, that same ``threads`` setting directly affected runtime: moving from 1 to 8 threads reduced the end-to-end ``dbt build`` duration from roughly 26 seconds to about 4 seconds (see table above), while 16 threads squeezed it to around 2 seconds at the cost of higher CPU usage. Use those numbers as a reference point when evaluating how thread counts scale in your own environment. - -To increase the number of threads, edit your dbt ``profiles.yml`` (or Helm values if you manage the profile there) and update the ``threads`` key for the target you use with Cosmos: - -.. code-block:: yaml - - your_dbt_project: - target: prod - outputs: - prod: - type: postgres - host: your-host - user: your-user - password: your-password - schema: analytics - threads: 8 # increase or decrease to match available resources - - -If you prefer to manage threads through Cosmos profile mappings instead of editing ``profiles.yml`` directly, pass ``profile_args={"threads": }`` to your ``ProfileConfig``. For example, using the built-in ``PostgresUserPasswordProfileMapping``: - -.. code-block:: python - - from cosmos.config import ProfileConfig - from cosmos.profiles import PostgresUserPasswordProfileMapping - - profile_config = ProfileConfig( - profile_name="jaffle_shop", - target_name="prod", - profile_mapping=PostgresUserPasswordProfileMapping( - conn_id="postgres_connection", - profile_args={"threads": 8}, - ), - ) - - -------------------------------------------------------------------------------- - -Example Usage of ``ExecutionMode.WATCHER`` ------------------------------------------- - -There are two main ways to use the new execution mode in Cosmos — directly within a ``DbtDag``, or embedded as part of a ``DbtTaskGroup`` inside a larger DAG. - -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Example 1 — Using ``DbtDag`` with ``ExecutionMode.WATCHER`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -You can enable WATCHER mode directly in your ``DbtDag`` configuration. -This approach is best when your Airflow DAG is fully dedicated to a dbt project. - -.. literalinclude:: ../../dev/dags/example_watcher.py - :language: python - :start-after: [START example_watcher] - :end-before: [END example_watcher] - -As it can be observed, the only difference with the default ``ExecutionMode.LOCAL`` is the addition of the ``execution_config`` parameter with the ``execution_mode`` set to ``ExecutionMode.WATCHER``. The ``ExecutionMode`` enum can be imported from ``cosmos.constants``. For more information on the ``ExecutionMode.LOCAL``, please, check the `dedicated page `__ - -**How it works:** - -* Cosmos executes your dbt project once via a producer task. -* Model-level Airflow tasks act as watchers or sensors, updating their state as dbt completes each model. -* The DAG remains fully observable and retryable, with **dramatically improved runtime performance** (often 5× faster than ``ExecutionMode.LOCAL``). - -**How it looks like:** - -.. image:: /_static/jaffle_shop_watcher_dbt_dag_dag_run.png - :alt: Cosmos DbtDag with `ExecutionMode.WATCHER` - :align: center - -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Example 2 — Using ``DbtTaskGroup`` with ``ExecutionMode.WATCHER`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -If your Airflow DAG includes multiple stages or integrations (e.g., data ingestion → dbt → reporting), use ``DbtTaskGroup`` to embed your dbt project into a larger DAG — still benefiting from WATCHER performance. - -.. code-block:: python - :caption: example_watcher_taskgroup.py - :name: example_watcher_taskgroup - - from airflow.models import DAG - from airflow.operators.empty import EmptyOperator - from cosmos import DbtTaskGroup - - with DAG( - dag_id="example_watcher_taskgroup", - schedule="@daily", - start_date=datetime(2023, 1, 1), - catchup=False, - ): - """ - The simplest example of using Cosmos to render a dbt project as a TaskGroup. - """ - pre_dbt = EmptyOperator(task_id="pre_dbt") - - first_dbt_task_group = DbtTaskGroup( - group_id="first_dbt_task_group", - execution_config=ExecutionConfig( - execution_mode=ExecutionMode.WATCHER, - ), - project_config=ProjectConfig(DBT_PROJECT_PATH), - profile_config=profile_config, - operator_args=operator_args, - ) - - pre_dbt >> first_dbt_task_group - -**Key advantages:** - -* Integrates seamlessly into complex Airflow DAGs. -* Uses the same high-performance producer/consumer execution model. -* Each ``DbtTaskGroup`` behaves independently — allowing modular dbt runs within larger workflows. - -.. image:: /_static/jaffle_shop_watcher_dbt_taskgroup_dag_run.png - :alt: Cosmos DbtDag with `ExecutionMode.WATCHER` - :align: center - -------------------------------------------------------------------------------- - -Additional details -------------------- - -~~~~~~~~~~~~~~~~ -How retries work -~~~~~~~~~~~~~~~~ - -When the ``dbt build`` command run by ``DbtProducerWatcherOperator`` fails, it will notify all the ``DbtConsumerWatcherSensor``. - -The individual watcher tasks that subclass ``DbtConsumerWatcherSensor`` can retry the dbt command themselves, using the same behavior as ``ExecutionMode.LOCAL``. - -If a branch of the DAG fails, users can clear the status of a failed consumer task, including its downstream tasks, via the Airflow UI, and each of them will run in ``ExecutionMode.LOCAL``. - -**Producer retry behavior** - -.. versionadded:: 1.12.2 - -When the ``DbtProducerWatcherOperator`` is triggered for a retry (try_number > 1), it will not re-run the dbt build command and will succeed. In previous versions of Cosmos, the producer task would fail during retries. -This behavior is designed to support TaskGroup-level retries, as reported in `#2282 `_. - -**Why this matters:** - -- In earlier versions, attempting to retry the producer task would raise an ``AirflowException``, causing the retry to fail immediately. -- Now, the producer gracefully skips execution on retries, logging an informational message explaining that the retry was skipped to avoid running a second ``dbt build``. -- This allows users to retry entire TaskGroups and/or DAGs without the producer task blocking the retry flow. - -**Important considerations:** - -- The producer task should still be configured with ``retries=0`` (which Cosmos enforces by default) to avoid unintended duplicate ``dbt build`` runs. - -- By default, Cosmos sets ``retries`` to ``0`` in``DbtProducerWatcherOperator``. Users can retry manually by clearing the status of the producer task and all its downstream tasks, keeping in mind that the producer task will not re-run the ``dbt build`` command and will succeed. - -The overall retry behavior will be further improved once `#1978 `_ is implemented. - -^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Watcher dbt Execution Queue -^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -.. versionadded:: 1.14.0 - -In watcher execution mode, by default, consumer sensor tasks are lightweight sensors that wait for the producer task to complete. On their first attempt, they require minimal CPU and memory resources. However, when these tasks retry, they execute the dbt command for the node, which may require significantly more resources. - -The ``watcher_dbt_execution_queue`` configuration allows you to specify a different worker queue for retry attempts. This enables you to: - -- **Optimize resource allocation** — Use lightweight workers for initial sensor execution and high-resource workers for retries -- **Improve scheduling efficiency** — Prevent resource contention between initial sensor tasks and retry executions -- **Scale independently** — Scale retry queues separately based on retry workload patterns - -**Configuration:** - -Set the ``watcher_dbt_execution_queue`` in your Airflow configuration: - -.. code-block:: ini - - [cosmos] - watcher_dbt_execution_queue = high_memory_queue - -Or via environment variable: - -.. code-block:: bash - - export AIRFLOW__COSMOS__WATCHER_DBT_EXECUTION_QUEUE=high_memory_queue - -**How it works:** - -- For watcher producer tasks (``DbtProducerWatcherOperator``), the configured queue is used during their first execution -- For watcher consumer tasks (``DbtConsumerWatcherSensor``), from their first retry onwards, if ``watcher_dbt_execution_queue`` is configured, the task is automatically assigned to the specified queue -- This behavior is enforced by Cosmos via an `Airflow cluster policy `_ (``task_instance_mutation_hook``) that mutates ``task_instance.queue`` at runtime for retry attempts - -.. note:: - - For producer task execution, we encourage users to set the ``watcher_dbt_execution_queue`` configuration. If, for any reason, users prefer to use a different node pool for producer tasks without setting an Airflow Cluster Policy, they can set the ``queue`` argument via ``setup_operator_args``. This, however, would not solve the problem of assigning consumer retries to nodes that may have more memory and CPU available. - - The effective precedence is: - - ``watcher_dbt_execution_queue`` > explicit ``queue`` on the producer (from ``setup_operator_args``) > ``operator_args`` > your Airflow deployment’s default queue. - -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Installation of Airflow and dbt -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Since Cosmos 1.12.0, ``ExecutionMode.WATCHER`` works well regardless of whether dbt and Airflow are installed in the same Python virtual environment. - -When dbt and Airflow are installed in the same Python virtual environment, the ``ExecutionMode.WATCHER`` uses dbt `callback features `_. - -When dbt and Airflow are not installed in the same Python virtual environment, the ``ExecutionMode.WATCHER`` consumes the dbt `structured logging `_ to update the consumer tasks. - -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Synchronous versus Asynchronous sensor execution -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -In Cosmos 1.11.0, the ``DbtConsumerWatcherSensor`` operator is implemented as a synchronous XCom sensor, which continuously occupies the worker slot - even if they're just sleeping and checking periodically. - -Starting with Cosmos 1.12.0, the ``DbtConsumerWatcherSensor`` supports -`deferrable (asynchronous) execution `_. Deferrable execution frees up the Airflow worker slot, while task status monitoring is handled by the Airflow triggerer component, -which increases overall task throughput. By default, the sensor now runs in deferrable mode. - -------------------------------------------------------------------------------- - -Known Limitations -------------------- - -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Producer task implementation -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The producer task is implemented as a ``DbtProducerWatcherOperator`` and currently relies on dbt being installed alongside the Airflow deployment, as in the ``ExecutionMode.LOCAL`` implementation. - -The alternative to this implementation is to use ``ExecutionMode.WATCHER_KUBERNETES``, which is built on top of ``ExecutionMode.KUBERNETES``. Check :ref:`watcher-kubernetes-execution-mode` for more information. - -~~~~~~~~~~~~~~~~~~~~~~~~ -Individual dbt Operators -~~~~~~~~~~~~~~~~~~~~~~~~ - -The ``ExecutionMode.WATCHER`` efficiently implements the following operators: -* ``DbtSeedWatcherOperator`` -* ``DbtSnapshotWatcherOperator`` -* ``DbtRunWatcherOperator`` - -However, other operators that are available in the ``ExecutionMode.LOCAL`` mode are not implemented. - -The ``DbtBuildWatcherOperator`` is not implemented, since the build command is executed by the producer ``DbtProducerWatcherOperator`` operator. - -Additionally, since the ``dbt build`` command does not run ``source`` nodes, the operator ``DbtSourceWatcherOperator`` is equivalent to the ``DbtSourceLocalOperator`` operator, from ``ExecutionMode.LOCAL``. - -Finally, the following features are not implemented as operators under ``ExecutionMode.WATCHER``: - -* ``dbt ls`` -* ``dbt run-operation`` -* ``dbt docs`` -* ``dbt clone`` - -You can still invoke these operators using the default ``ExecutionMode.LOCAL`` mode. - -~~~~~~~~~~~~~ -Test behavior -~~~~~~~~~~~~~ - -By default, the watcher mode runs tests alongside models via the ``dbt build`` command being executed by the producer ``DbtProducerWatcherOperator`` operator. - -As a starting point, this execution mode does not support the ``TestBehavior.AFTER_EACH`` behavior, since the tests are not run as individual tasks. Since this is the default ``TestBehavior`` in Cosmos, we are injecting ``EmptyOperator`` as a starting point to ensure a seamless transition to the new mode. - -The ``TestBehavior.BUILD`` behavior is embedded in the producer ``DbtProducerWatcherOperator`` operator. - -The ``TestBehavior.NONE`` and ``TestBehavior.AFTER_ALL`` behave similarly to ``ExecutionMode.LOCAL``. - -~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Airflow Datasets and Assets -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -While the ``ExecutionMode.WATCHER`` supports the ``emit_datasets`` parameter, the Airflow Datasets and Assets are emitted from the ``DbtProducerWatcherOperator`` task instead of the consumer tasks, as done for other Cosmos' execution modes. - -~~~~~~~~~~~~~~~~~~~~~~ -Source freshness nodes -~~~~~~~~~~~~~~~~~~~~~~ - -Since Cosmos 1.6, it `supports the rendering of source nodes `_. - -We noticed some Cosmos users use this feature alongside `overriding Cosmos source nodes `_ as sensors or another operator that allows them to skip the following branch of the DAG if the source is not fresh. - -This use case is not currently supported by the ``ExecutionMode.WATCHER``, since the ``dbt build`` command does not run `source freshness checks `_. - -We have a follow-up ticket to `further investigate this use case `_. - - -Advanced config -------------------- - -~~~~~~~~~~~~~~~~ -Callback support -~~~~~~~~~~~~~~~~ - -The ``DbtProducerWatcherOperator`` and ``DbtConsumerWatcherSensor`` will use the user-defined callback function similar to ``ExecutionMode.LOCAL`` mode. - -You can define different ``callback`` behaviors for producer and consumer nodes by using ``operator_args`` to configure the consumer callback and ``setup_operator_args`` to override the callback for the producer, as described below. - -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Overriding ``operator_args`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The ``DbtProducerWatcherOperator`` and ``DbtConsumerWatcherSensor`` operators handle ``operator_args`` similar to the ``ExecutionMode.LOCAL`` mode. - -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Using Custom Args for the Producer and Watcher -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. versionadded:: 1.12.0 - -If you need to override ``operator_args`` for the ``DbtProducerWatcherOperator``, you can do so using ``setup_operator_args``. - -When using ``ExecutionMode.WATCHER``, you may want to configure specific properties, such as ``retries`` specifically for the ``DbtProducerWatcherOperator`` task. This can be useful for several reasons: -- Improved resilience - transient issues (e.g., temporary database or network failures) can be automatically retried. -- Reduced manual intervention - failed producer runs can recover without requiring operator restarts. -- Better reliability - retry behavior can be tuned independently from sensor tasks. - -Example: Configure the producer task with custom retry settings. - -.. code-block:: python - - from datetime import timedelta - from cosmos.config import ExecutionConfig - from cosmos.constants import ExecutionMode - - execution_config = ExecutionConfig( - execution_mode=ExecutionMode.WATCHER, - setup_operator_args={ - "retries": 0, - "retry_delay": timedelta(minutes=5), - }, - ) - -This allows you to customize ``DbtProducerWatcherOperator`` retry behavior without affecting the arguments used by the other sensor tasks. - -If configuring queues, we suggest using the previously mentioned ``watcher_dbt_execution_queue`` configuration instead of the ``setup_operator_args``. - -.. note:: - Please note that ``setup_operator_args`` is specific to Cosmos and is not related to Airflow setup or teardown task. - - -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Sensor slot allocation and polling -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Each ``DbtDag`` or ``DbtTaskGroup`` root node will startup during DAG runs at - potentially - the same time as the DAG Run. This may not happen, since it is dependent on the -concurrency settings and available task slots in the Airflow deployment. - -The consequence is that tasks may take longer to be updated if they are not sensing at the moment that the transformation happens. - -We plan to review this behaviour and alternative approaches in the future. - - -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Asynchronous sensor execution -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -- Deferrable execution is currently supported only for dbt models, seeds and snapshots. -- Deferrable execution applies only to the first task attempt (try number 1). For subsequent retries, the sensor falls back to synchronous execution. - -To disable asynchronous execution, set the ``deferrable`` flag to ``False`` in the ``operator_args``. - -.. literalinclude:: ../../dev/dags/example_watcher.py - :language: python - :start-after: [START example_watcher_synchronous] - :end-before: [END example_watcher_synchronous] - -------------------------------------------------------------------------------- - -Troubleshooting ---------------- - -Problem: "I changed from ``ExecutionMode.LOCAL`` to ``ExecutionMode.WATCHER``, but my DAG is running slower." -Answer: Please, check the number of threads that are being used by searching the producer task logs for a message similar to ``Concurrency: 1 threads (target='DEV')``. To leverage the Watcher mode, you should have a high number of threads, at least dbt's default of 4. Check the `dbt threading docs `_ for more information on how to set the number of threads. - - -Summary -------- - -``ExecutionMode.WATCHER`` represents a significant leap forward for running dbt in Airflow via Cosmos: - -* ✅ Up to **5× faster** dbt DAG runs -* ✅ Maintains **model-level visibility** in Airflow -* ✅ Enables **smarter resource allocation** -* ✅ Built on proven Cosmos rendering techniques - -This is an experimental feature, and we are looking for feedback from the community. - -Stay tuned for further documentation and base image support for the ``ExecutionMode.WATCHER`` in upcoming releases. diff --git a/docs/getting_started/watcher-kubernetes-execution-mode.rst b/docs/getting_started/watcher-kubernetes-execution-mode.rst deleted file mode 100644 index 16dbbffd0a..0000000000 --- a/docs/getting_started/watcher-kubernetes-execution-mode.rst +++ /dev/null @@ -1,214 +0,0 @@ -.. _watcher-kubernetes-execution-mode: - -``ExecutionMode.WATCHER_KUBERNETES``: High-Performance dbt Execution in Kubernetes -=================================================================================== - -.. versionadded:: 1.13.0 - -The ``ExecutionMode.WATCHER_KUBERNETES`` combines the **speed of the** :ref:`watcher-execution-mode` **with the isolation of** :ref:`kubernetes`. - -This execution mode is ideal for users who: - -* Want to leverage the performance benefits of the watcher execution mode -* Need to run dbt in isolated Kubernetes pods -* Prefer not to install dbt in their Airflow deployment - -------------------------------------------------------------------------------- - -Background ----------- - -The :ref:`watcher-execution-mode` introduced in Cosmos 1.11.0 significantly reduces dbt pipeline run times by running dbt as a single command while maintaining model-level observability in Airflow. - -However, the original ``ExecutionMode.WATCHER`` requires dbt to be installed alongside Airflow. The ``ExecutionMode.WATCHER_KUBERNETES`` removes this limitation by running the dbt command inside Kubernetes pods, similar to ``ExecutionMode.KUBERNETES``. - -For more details on the watcher concept and how it works, please refer to the :ref:`watcher-execution-mode` documentation. - -------------------------------------------------------------------------------- - -How to Use ----------- - -Users previously using ``ExecutionMode.KUBERNETES`` can simply replace the ``execution_mode`` to use ``ExecutionMode.WATCHER_KUBERNETES``. - -The following example shows how to configure a ``DbtDag`` with ``ExecutionMode.WATCHER_KUBERNETES``: - -.. code-block:: python - - from cosmos import DbtDag - from cosmos.config import ExecutionConfig - from cosmos.constants import ExecutionMode - - dag = DbtDag( - dag_id="jaffle_shop_watcher_kubernetes", - # ... other DAG parameters ... - execution_config=ExecutionConfig( - execution_mode=ExecutionMode.WATCHER_KUBERNETES, - dbt_project_path=K8S_PROJECT_DIR, - ), - operator_args={ - "image": DBT_IMAGE, - "get_logs": True, - "log_events_on_failure": True, - }, - ) - -**Key differences from** ``ExecutionMode.KUBERNETES``: - -* The ``execution_mode`` is set to ``ExecutionMode.WATCHER_KUBERNETES`` instead of ``ExecutionMode.KUBERNETES`` -* The producer task runs the entire ``dbt build`` command in a single Kubernetes pod -* Consumer tasks (sensors) watch for the completion of their corresponding dbt models - -For the complete setup including Kubernetes secrets, Docker image configuration, and profile setup, refer to the :ref:`kubernetes` documentation. - -------------------------------------------------------------------------------- - -Performance Gains ------------------ - -Early benchmarks using the ``jaffle_shop_watcher_kubernetes`` DAG show significant improvements: - -+-----------------------------------------------+------------------+ -| Execution Mode | Total Runtime | -+===============================================+==================+ -| ``ExecutionMode.KUBERNETES`` | 00:00:32.155 | -+-----------------------------------------------+------------------+ -| ``ExecutionMode.WATCHER_KUBERNETES`` | 00:00:11.783 | -+-----------------------------------------------+------------------+ - -This represents approximately a **63% reduction** in total DAG runtime. - -The performance improvement comes from: - -* Running dbt as a single command (reducing Kubernetes pod startup overhead) -* Leveraging dbt's native threading capabilities -* Eliminating repeated dbt initialization for each model - -------------------------------------------------------------------------------- - -Known Limitations ------------------ - -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Kubernetes Provider Version Compatibility -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -``ExecutionMode.WATCHER_KUBERNETES`` does not work with older versions of the ``apache-airflow-providers-cncf-kubernetes`` provider (<=10.7.0). - -Please ensure you have a compatible version installed: - -.. code-block:: bash - - pip install "apache-airflow-providers-cncf-kubernetes>10.7.0" - -We successfully tested against the most recent release of the provider (`10.12.2 `_). - -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Support for KPO deferrable mode -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The producer node created by the ``ExecutionMode.WATCHER_KUBERNETES`` producer task can be set to deferrable mode as long as: - -- The correct version of Airflow Kubernetes is installed (``>=10.12.2``). This version fixed a bug (`PR `_) that prevented setting callbacks and parsing the logs when the Kubernetes Operator run using ``deferrable``. The experience should be further improved once `this other PR is merged `_. - -.. code-block:: bash - - pip install "apache-airflow-providers-cncf-kubernetes>=10.12.2" - -- The arguments ``deferrable=True`` and ``is_delete_operator_pod=True`` are set: - -.. code-block:: python - - dag = DbtDag( - dag_id="jaffle_shop_watcher_kubernetes", - # ... other DAG parameters ... - execution_config=ExecutionConfig( - execution_mode=ExecutionMode.WATCHER_KUBERNETES, - dbt_project_path=K8S_PROJECT_DIR, - ), - operator_args={ - "deferrable": True, - "is_delete_operator_pod": True, - "image": DBT_IMAGE, - "get_logs": True, - "log_events_on_failure": True, - }, - ) - -Conversely, the consumer tasks that subclass ``DbtConsumerWatcherKubernetesSensor`` run in deferrable mode by default when operating as a sensor. They can also operate in deferrable mode if they are running dbt themselves upon retry. - -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Mandatory ``operator_args`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The ``operator_args`` must define ``get_logs`` and ``log_events_on_failure``: - -.. code-block: python - - dag = DbtDag( - dag_id="jaffle_shop_watcher_kubernetes", - # ... other DAG parameters ... - execution_config=ExecutionConfig( - execution_mode=ExecutionMode.WATCHER_KUBERNETES, - dbt_project_path=K8S_PROJECT_DIR, - ), - operator_args={ - # ... other KPO mandatory args ... - "get_logs": True, - "log_events_on_failure": True, - }, - ) - - -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Other Inherited Limitations -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The following limitations from ``ExecutionMode.WATCHER`` also apply to ``ExecutionMode.WATCHER_KUBERNETES``: - -* **Individual dbt Operators**: Only ``DbtSeedWatcherKubernetesOperator``, ``DbtSnapshotWatcherKubernetesOperator``, and ``DbtRunWatcherKubernetesOperator`` are implemented. The ``DbtTestWatcherKubernetesOperator`` is currently a placeholder. - -* **Test behavior**: The ``TestBehavior.AFTER_EACH`` is not supported. Tests are run as part of the ``dbt build`` command by the producer task. - -* **Source freshness nodes**: The ``dbt build`` command does not run source freshness checks. - -For more details on these limitations, refer to the :ref:`watcher-execution-mode` documentation. - -Additionally, the limitations from ``ExecutionMode.KUBERNETES`` also apply to ``ExecutionMode.WATCHER_KUBERNETES``. For details, refer to the :ref:`kubernetes-known-limitations` documentation. - -------------------------------------------------------------------------------- - -Example DAG ------------ - -Below is a complete example of a DAG using ``ExecutionMode.WATCHER_KUBERNETES``: - -.. literalinclude:: ../../dev/dags/jaffle_shop_watcher_kubernetes.py - :language: python - -------------------------------------------------------------------------------- - -Prerequisites -------------- - -Before using ``ExecutionMode.WATCHER_KUBERNETES``, ensure you have: - -1. A Kubernetes cluster configured and accessible from your Airflow deployment -2. A Docker image containing your dbt project and profile -3. The ``apache-airflow-providers-cncf-kubernetes`` provider installed (version >10.7.0) - -For detailed setup instructions, refer to the :ref:`kubernetes` documentation. - -------------------------------------------------------------------------------- - -Summary -------- - -``ExecutionMode.WATCHER_KUBERNETES`` provides: - -* ✅ **~63% faster** dbt DAG runs compared to ``ExecutionMode.KUBERNETES`` -* ✅ **Isolation** between dbt and Airflow dependencies -* ✅ **Model-level visibility** in Airflow -* ✅ **Easy migration** from ``ExecutionMode.KUBERNETES`` - -This execution mode is ideal for teams who want the performance benefits of the watcher mode while maintaining the isolation provided by Kubernetes execution. From e085c2a23a0eb47b470d0755f3067c06b49a8267 Mon Sep 17 00:00:00 2001 From: L Zdanski <25642903+lzdanski@users.noreply.github.com> Date: Fri, 27 Feb 2026 16:57:44 -0500 Subject: [PATCH 03/29] rename to guides --- docs/{configuration => guides}/caching.rst | 0 docs/{configuration => guides}/callbacks/callbacks.rst | 0 docs/{configuration => guides}/compiled-sql.rst | 0 .../configure-tests/testing-behavior.rst | 0 docs/{configuration => guides}/cosmos-conf.rst | 0 docs/{configuration => guides}/dag-customization.rst | 0 docs/{configuration => guides}/dbt-docs/generating-docs.rst | 0 docs/{configuration => guides}/dbt-docs/hosting-docs.rst | 0 docs/{configuration => guides}/dbt-fusion.rst | 0 docs/{configuration => guides}/execution-config.rst | 0 .../{configuration => guides}/execution-modes-local-conflicts.rst | 0 docs/{configuration => guides}/index.rst | 0 docs/{configuration => guides}/lineage.rst | 0 docs/{configuration => guides}/logging.rst | 0 docs/{configuration => guides}/memory_optimization.rst | 0 docs/{configuration => guides}/multi-project.rst | 0 docs/{configuration => guides}/operator-args.rst | 0 docs/{configuration => guides}/parsing-methods.rst | 0 docs/{configuration => guides}/partial-parsing.rst | 0 docs/{configuration => guides}/profile-config.rst | 0 docs/{configuration => guides}/project-config.rst | 0 docs/{configuration => guides}/render-config.rst | 0 .../run-dbt/airflow-worker/async-execution-mode.rst | 0 docs/{configuration => guides}/run-dbt/airflow-worker/index.rst | 0 .../run-dbt/airflow-worker/watcher-execution-mode.rst | 0 .../run-dbt/container/aws-container-run-job.rst | 0 .../run-dbt/container/azure-container-instance.rst | 0 docs/{configuration => guides}/run-dbt/container/docker.rst | 0 .../run-dbt/container/gcp-cloud-run-job.rst | 0 docs/{configuration => guides}/run-dbt/container/index.rst | 0 docs/{configuration => guides}/run-dbt/container/kubernetes.rst | 0 .../run-dbt/container/watcher-kubernetes-execution-mode.rst | 0 docs/{configuration => guides}/run-dbt/execution-modes.rst | 0 docs/{configuration => guides}/scheduling.rst | 0 docs/{configuration => guides}/selecting-excluding.rst | 0 docs/{configuration => guides}/source-nodes-rendering.rst | 0 docs/{configuration => guides}/task-display-name.rst | 0 37 files changed, 0 insertions(+), 0 deletions(-) rename docs/{configuration => guides}/caching.rst (100%) rename docs/{configuration => guides}/callbacks/callbacks.rst (100%) rename docs/{configuration => guides}/compiled-sql.rst (100%) rename docs/{configuration => guides}/configure-tests/testing-behavior.rst (100%) rename docs/{configuration => guides}/cosmos-conf.rst (100%) rename docs/{configuration => guides}/dag-customization.rst (100%) rename docs/{configuration => guides}/dbt-docs/generating-docs.rst (100%) rename docs/{configuration => guides}/dbt-docs/hosting-docs.rst (100%) rename docs/{configuration => guides}/dbt-fusion.rst (100%) rename docs/{configuration => guides}/execution-config.rst (100%) rename docs/{configuration => guides}/execution-modes-local-conflicts.rst (100%) rename docs/{configuration => guides}/index.rst (100%) rename docs/{configuration => guides}/lineage.rst (100%) rename docs/{configuration => guides}/logging.rst (100%) rename docs/{configuration => guides}/memory_optimization.rst (100%) rename docs/{configuration => guides}/multi-project.rst (100%) rename docs/{configuration => guides}/operator-args.rst (100%) rename docs/{configuration => guides}/parsing-methods.rst (100%) rename docs/{configuration => guides}/partial-parsing.rst (100%) rename docs/{configuration => guides}/profile-config.rst (100%) rename docs/{configuration => guides}/project-config.rst (100%) rename docs/{configuration => guides}/render-config.rst (100%) rename docs/{configuration => guides}/run-dbt/airflow-worker/async-execution-mode.rst (100%) rename docs/{configuration => guides}/run-dbt/airflow-worker/index.rst (100%) rename docs/{configuration => guides}/run-dbt/airflow-worker/watcher-execution-mode.rst (100%) rename docs/{configuration => guides}/run-dbt/container/aws-container-run-job.rst (100%) rename docs/{configuration => guides}/run-dbt/container/azure-container-instance.rst (100%) rename docs/{configuration => guides}/run-dbt/container/docker.rst (100%) rename docs/{configuration => guides}/run-dbt/container/gcp-cloud-run-job.rst (100%) rename docs/{configuration => guides}/run-dbt/container/index.rst (100%) rename docs/{configuration => guides}/run-dbt/container/kubernetes.rst (100%) rename docs/{configuration => guides}/run-dbt/container/watcher-kubernetes-execution-mode.rst (100%) rename docs/{configuration => guides}/run-dbt/execution-modes.rst (100%) rename docs/{configuration => guides}/scheduling.rst (100%) rename docs/{configuration => guides}/selecting-excluding.rst (100%) rename docs/{configuration => guides}/source-nodes-rendering.rst (100%) rename docs/{configuration => guides}/task-display-name.rst (100%) diff --git a/docs/configuration/caching.rst b/docs/guides/caching.rst similarity index 100% rename from docs/configuration/caching.rst rename to docs/guides/caching.rst diff --git a/docs/configuration/callbacks/callbacks.rst b/docs/guides/callbacks/callbacks.rst similarity index 100% rename from docs/configuration/callbacks/callbacks.rst rename to docs/guides/callbacks/callbacks.rst diff --git a/docs/configuration/compiled-sql.rst b/docs/guides/compiled-sql.rst similarity index 100% rename from docs/configuration/compiled-sql.rst rename to docs/guides/compiled-sql.rst diff --git a/docs/configuration/configure-tests/testing-behavior.rst b/docs/guides/configure-tests/testing-behavior.rst similarity index 100% rename from docs/configuration/configure-tests/testing-behavior.rst rename to docs/guides/configure-tests/testing-behavior.rst diff --git a/docs/configuration/cosmos-conf.rst b/docs/guides/cosmos-conf.rst similarity index 100% rename from docs/configuration/cosmos-conf.rst rename to docs/guides/cosmos-conf.rst diff --git a/docs/configuration/dag-customization.rst b/docs/guides/dag-customization.rst similarity index 100% rename from docs/configuration/dag-customization.rst rename to docs/guides/dag-customization.rst diff --git a/docs/configuration/dbt-docs/generating-docs.rst b/docs/guides/dbt-docs/generating-docs.rst similarity index 100% rename from docs/configuration/dbt-docs/generating-docs.rst rename to docs/guides/dbt-docs/generating-docs.rst diff --git a/docs/configuration/dbt-docs/hosting-docs.rst b/docs/guides/dbt-docs/hosting-docs.rst similarity index 100% rename from docs/configuration/dbt-docs/hosting-docs.rst rename to docs/guides/dbt-docs/hosting-docs.rst diff --git a/docs/configuration/dbt-fusion.rst b/docs/guides/dbt-fusion.rst similarity index 100% rename from docs/configuration/dbt-fusion.rst rename to docs/guides/dbt-fusion.rst diff --git a/docs/configuration/execution-config.rst b/docs/guides/execution-config.rst similarity index 100% rename from docs/configuration/execution-config.rst rename to docs/guides/execution-config.rst diff --git a/docs/configuration/execution-modes-local-conflicts.rst b/docs/guides/execution-modes-local-conflicts.rst similarity index 100% rename from docs/configuration/execution-modes-local-conflicts.rst rename to docs/guides/execution-modes-local-conflicts.rst diff --git a/docs/configuration/index.rst b/docs/guides/index.rst similarity index 100% rename from docs/configuration/index.rst rename to docs/guides/index.rst diff --git a/docs/configuration/lineage.rst b/docs/guides/lineage.rst similarity index 100% rename from docs/configuration/lineage.rst rename to docs/guides/lineage.rst diff --git a/docs/configuration/logging.rst b/docs/guides/logging.rst similarity index 100% rename from docs/configuration/logging.rst rename to docs/guides/logging.rst diff --git a/docs/configuration/memory_optimization.rst b/docs/guides/memory_optimization.rst similarity index 100% rename from docs/configuration/memory_optimization.rst rename to docs/guides/memory_optimization.rst diff --git a/docs/configuration/multi-project.rst b/docs/guides/multi-project.rst similarity index 100% rename from docs/configuration/multi-project.rst rename to docs/guides/multi-project.rst diff --git a/docs/configuration/operator-args.rst b/docs/guides/operator-args.rst similarity index 100% rename from docs/configuration/operator-args.rst rename to docs/guides/operator-args.rst diff --git a/docs/configuration/parsing-methods.rst b/docs/guides/parsing-methods.rst similarity index 100% rename from docs/configuration/parsing-methods.rst rename to docs/guides/parsing-methods.rst diff --git a/docs/configuration/partial-parsing.rst b/docs/guides/partial-parsing.rst similarity index 100% rename from docs/configuration/partial-parsing.rst rename to docs/guides/partial-parsing.rst diff --git a/docs/configuration/profile-config.rst b/docs/guides/profile-config.rst similarity index 100% rename from docs/configuration/profile-config.rst rename to docs/guides/profile-config.rst diff --git a/docs/configuration/project-config.rst b/docs/guides/project-config.rst similarity index 100% rename from docs/configuration/project-config.rst rename to docs/guides/project-config.rst diff --git a/docs/configuration/render-config.rst b/docs/guides/render-config.rst similarity index 100% rename from docs/configuration/render-config.rst rename to docs/guides/render-config.rst diff --git a/docs/configuration/run-dbt/airflow-worker/async-execution-mode.rst b/docs/guides/run-dbt/airflow-worker/async-execution-mode.rst similarity index 100% rename from docs/configuration/run-dbt/airflow-worker/async-execution-mode.rst rename to docs/guides/run-dbt/airflow-worker/async-execution-mode.rst diff --git a/docs/configuration/run-dbt/airflow-worker/index.rst b/docs/guides/run-dbt/airflow-worker/index.rst similarity index 100% rename from docs/configuration/run-dbt/airflow-worker/index.rst rename to docs/guides/run-dbt/airflow-worker/index.rst diff --git a/docs/configuration/run-dbt/airflow-worker/watcher-execution-mode.rst b/docs/guides/run-dbt/airflow-worker/watcher-execution-mode.rst similarity index 100% rename from docs/configuration/run-dbt/airflow-worker/watcher-execution-mode.rst rename to docs/guides/run-dbt/airflow-worker/watcher-execution-mode.rst diff --git a/docs/configuration/run-dbt/container/aws-container-run-job.rst b/docs/guides/run-dbt/container/aws-container-run-job.rst similarity index 100% rename from docs/configuration/run-dbt/container/aws-container-run-job.rst rename to docs/guides/run-dbt/container/aws-container-run-job.rst diff --git a/docs/configuration/run-dbt/container/azure-container-instance.rst b/docs/guides/run-dbt/container/azure-container-instance.rst similarity index 100% rename from docs/configuration/run-dbt/container/azure-container-instance.rst rename to docs/guides/run-dbt/container/azure-container-instance.rst diff --git a/docs/configuration/run-dbt/container/docker.rst b/docs/guides/run-dbt/container/docker.rst similarity index 100% rename from docs/configuration/run-dbt/container/docker.rst rename to docs/guides/run-dbt/container/docker.rst diff --git a/docs/configuration/run-dbt/container/gcp-cloud-run-job.rst b/docs/guides/run-dbt/container/gcp-cloud-run-job.rst similarity index 100% rename from docs/configuration/run-dbt/container/gcp-cloud-run-job.rst rename to docs/guides/run-dbt/container/gcp-cloud-run-job.rst diff --git a/docs/configuration/run-dbt/container/index.rst b/docs/guides/run-dbt/container/index.rst similarity index 100% rename from docs/configuration/run-dbt/container/index.rst rename to docs/guides/run-dbt/container/index.rst diff --git a/docs/configuration/run-dbt/container/kubernetes.rst b/docs/guides/run-dbt/container/kubernetes.rst similarity index 100% rename from docs/configuration/run-dbt/container/kubernetes.rst rename to docs/guides/run-dbt/container/kubernetes.rst diff --git a/docs/configuration/run-dbt/container/watcher-kubernetes-execution-mode.rst b/docs/guides/run-dbt/container/watcher-kubernetes-execution-mode.rst similarity index 100% rename from docs/configuration/run-dbt/container/watcher-kubernetes-execution-mode.rst rename to docs/guides/run-dbt/container/watcher-kubernetes-execution-mode.rst diff --git a/docs/configuration/run-dbt/execution-modes.rst b/docs/guides/run-dbt/execution-modes.rst similarity index 100% rename from docs/configuration/run-dbt/execution-modes.rst rename to docs/guides/run-dbt/execution-modes.rst diff --git a/docs/configuration/scheduling.rst b/docs/guides/scheduling.rst similarity index 100% rename from docs/configuration/scheduling.rst rename to docs/guides/scheduling.rst diff --git a/docs/configuration/selecting-excluding.rst b/docs/guides/selecting-excluding.rst similarity index 100% rename from docs/configuration/selecting-excluding.rst rename to docs/guides/selecting-excluding.rst diff --git a/docs/configuration/source-nodes-rendering.rst b/docs/guides/source-nodes-rendering.rst similarity index 100% rename from docs/configuration/source-nodes-rendering.rst rename to docs/guides/source-nodes-rendering.rst diff --git a/docs/configuration/task-display-name.rst b/docs/guides/task-display-name.rst similarity index 100% rename from docs/configuration/task-display-name.rst rename to docs/guides/task-display-name.rst From febd2f4326764f59ef413314c878b44d5fd51b0a Mon Sep 17 00:00:00 2001 From: L Zdanski <25642903+lzdanski@users.noreply.github.com> Date: Fri, 27 Feb 2026 17:26:26 -0500 Subject: [PATCH 04/29] update hradcoded URLs --- docs/getting_started/index.rst | 10 +++++----- docs/guides/execution-modes-local-conflicts.rst | 4 ++-- docs/guides/index.rst | 6 +++--- .../run-dbt/airflow-worker/watcher-execution-mode.rst | 2 +- docs/guides/run-dbt/container/kubernetes.rst | 8 ++++---- docs/index.rst | 2 +- 6 files changed, 16 insertions(+), 16 deletions(-) diff --git a/docs/getting_started/index.rst b/docs/getting_started/index.rst index 1dea6a496b..762adb2c1c 100644 --- a/docs/getting_started/index.rst +++ b/docs/getting_started/index.rst @@ -87,11 +87,11 @@ For more customization, check out the different execution modes that Cosmos supp For specific guides, see the following: -- `Executing dbt DAGs with Docker Operators `__ -- `Executing dbt DAGs with KubernetesPodOperators `__ -- `Executing dbt DAGs with Watcher Kubernetes Mode `__ -- `Executing dbt DAGs with AzureContainerInstancesOperators `__ -- `Executing dbt DAGs with GcpCloudRunExecuteJobOperators `__ +- `Executing dbt DAGs with Docker Operators `__ +- `Executing dbt DAGs with KubernetesPodOperators `__ +- `Executing dbt DAGs with Watcher Kubernetes Mode `__ +- `Executing dbt DAGs with AzureContainerInstancesOperators `__ +- `Executing dbt DAGs with GcpCloudRunExecuteJobOperators `__ Concepts Overview diff --git a/docs/guides/execution-modes-local-conflicts.rst b/docs/guides/execution-modes-local-conflicts.rst index 9fec173751..0f9120127c 100644 --- a/docs/guides/execution-modes-local-conflicts.rst +++ b/docs/guides/execution-modes-local-conflicts.rst @@ -10,8 +10,8 @@ When using the `Local Execution Mode `__, users may If you find errors, we recommend users isolating the installation of dbt from the Airflow installation. With the `Local Execution Mode `__, this can be accomplished by installing dbt in a separate -Python virtualenv and setting the `ExecutionConfig.dbt_executable_path <../configuration/execution-config.html>`_ and -`RenderConfig.dbt_executable_path <../configuration/render-config.html>`_ parameters. +Python virtualenv and setting the `ExecutionConfig.dbt_executable_path <../guides/execution-config.html>`_ and +`RenderConfig.dbt_executable_path <../guides/render-config.html>`_ parameters. The page `execution modes `__ describes many other methods that support isolating dbt from Airflow. diff --git a/docs/guides/index.rst b/docs/guides/index.rst index d699e6189e..df227dea90 100644 --- a/docs/guides/index.rst +++ b/docs/guides/index.rst @@ -1,7 +1,7 @@ -.. _configuration: +.. _guides: -Configuration -============= +Guides +====== Cosmos offers a number of configuration options to customize its behavior. For more info, check out the links on the left or the table of contents below. diff --git a/docs/guides/run-dbt/airflow-worker/watcher-execution-mode.rst b/docs/guides/run-dbt/airflow-worker/watcher-execution-mode.rst index af7589650c..f33ef15900 100644 --- a/docs/guides/run-dbt/airflow-worker/watcher-execution-mode.rst +++ b/docs/guides/run-dbt/airflow-worker/watcher-execution-mode.rst @@ -370,7 +370,7 @@ Source freshness nodes Since Cosmos 1.6, it `supports the rendering of source nodes `_. -We noticed some Cosmos users use this feature alongside `overriding Cosmos source nodes `_ as sensors or another operator that allows them to skip the following branch of the DAG if the source is not fresh. +We noticed some Cosmos users use this feature alongside `overriding Cosmos source nodes `_ as sensors or another operator that allows them to skip the following branch of the DAG if the source is not fresh. This use case is not currently supported by the ``ExecutionMode.WATCHER``, since the ``dbt build`` command does not run `source freshness checks `_. diff --git a/docs/guides/run-dbt/container/kubernetes.rst b/docs/guides/run-dbt/container/kubernetes.rst index 607ba07bd7..4ea8ccd4b9 100644 --- a/docs/guides/run-dbt/container/kubernetes.rst +++ b/docs/guides/run-dbt/container/kubernetes.rst @@ -161,7 +161,7 @@ The Kubernetes execution mode has the following limitations: - Does not emit Airflow datasets, assets, and dataset aliases (there is an `open ticket #2329 `__ to address this) - Does not handle installing dbt deps for users (there is an `open ticket #679 `__ to address this) - Does not support `ProfileMapping `_ (there is an `open ticket #749 `__ to address this) -- Does not support `Callbacks `_ (there is an `open ticket #1575 `__ to address this) -- Does not expose Compiled SQL as a `templated field `_ -- Does not benefit from `Cosmos caching mechanisms `_ -- Does not support `generating dbt docs & uploading to an object store `_ (there is a `PR `_ to solve this for S3) +- Does not support `Callbacks `_ (there is an `open ticket #1575 `__ to address this) +- Does not expose Compiled SQL as a `templated field `_ +- Does not benefit from `Cosmos caching mechanisms `_ +- Does not support `generating dbt docs & uploading to an object store `_ (there is a `PR `_ to solve this for S3) diff --git a/docs/index.rst b/docs/index.rst index e2c88b0c83..83b2030f15 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -7,7 +7,7 @@ Home Getting Started - Configuration + Guides Profiles Project Policies From 6baf8da99616030dc9869d5a6dff96ca9d72a1b6 Mon Sep 17 00:00:00 2001 From: L Zdanski <25642903+lzdanski@users.noreply.github.com> Date: Fri, 27 Feb 2026 18:08:47 -0500 Subject: [PATCH 05/29] redistribute files --- .../{ => cosmos_devex}/compiled-sql.rst | 0 docs/guides/{ => cosmos_devex}/lineage.rst | 0 docs/guides/{ => cosmos_devex}/logging.rst | 0 .../generating-docs.rst | 0 .../{dbt-docs => dbt_docs}/hosting-docs.rst | 0 docs/guides/{ => dbt_setup}/dbt-fusion.rst | 0 .../{ => multi_project}/multi-project.rst | 0 .../{ => optimize_performance}/caching.rst | 0 .../memory_optimization.rst | 0 .../partial-parsing.rst | 0 .../selecting-excluding.rst | 0 .../airflow-worker/async-execution-mode.rst | 0 .../execution-modes-local-conflicts.rst | 0 .../airflow-worker/index.rst | 0 .../airflow-worker/watcher-execution-mode.rst | 0 .../{ => run_dbt}/callbacks/callbacks.rst | 0 .../container/aws-container-run-job.rst | 0 .../container/azure-container-instance.rst | 0 .../{run-dbt => run_dbt}/container/docker.rst | 0 .../container/gcp-cloud-run-job.rst | 0 .../{run-dbt => run_dbt}/container/index.rst | 0 .../container/kubernetes.rst | 0 .../watcher-kubernetes-execution-mode.rst | 0 docs/guides/run_dbt/customization/index.rst | 9 ++++ .../customization}/operator-args.rst | 0 .../customization}/scheduling.rst | 0 .../customization}/task-display-name.rst | 0 .../execution-modes.rst => run_dbt/index.rst} | 3 ++ .../run_dbt/operators}/operators.rst | 0 .../configure-tests/testing-behavior.rst | 0 .../custom-airflow-properties.rst | 0 .../dag-customization.rst | 0 .../parsing-methods.rst | 0 .../render-config.rst | 0 .../source-nodes-rendering.rst | 0 docs/index.rst | 2 +- docs/policy/contributing.rst | 45 +++++++++++++++++++ 37 files changed, 58 insertions(+), 1 deletion(-) rename docs/guides/{ => cosmos_devex}/compiled-sql.rst (100%) rename docs/guides/{ => cosmos_devex}/lineage.rst (100%) rename docs/guides/{ => cosmos_devex}/logging.rst (100%) rename docs/guides/{dbt-docs => dbt_docs}/generating-docs.rst (100%) rename docs/guides/{dbt-docs => dbt_docs}/hosting-docs.rst (100%) rename docs/guides/{ => dbt_setup}/dbt-fusion.rst (100%) rename docs/guides/{ => multi_project}/multi-project.rst (100%) rename docs/guides/{ => optimize_performance}/caching.rst (100%) rename docs/guides/{ => optimize_performance}/memory_optimization.rst (100%) rename docs/guides/{ => optimize_performance}/partial-parsing.rst (100%) rename docs/guides/{ => optimize_performance}/selecting-excluding.rst (100%) rename docs/guides/{run-dbt => run_dbt}/airflow-worker/async-execution-mode.rst (100%) rename docs/guides/{ => run_dbt/airflow-worker}/execution-modes-local-conflicts.rst (100%) rename docs/guides/{run-dbt => run_dbt}/airflow-worker/index.rst (100%) rename docs/guides/{run-dbt => run_dbt}/airflow-worker/watcher-execution-mode.rst (100%) rename docs/guides/{ => run_dbt}/callbacks/callbacks.rst (100%) rename docs/guides/{run-dbt => run_dbt}/container/aws-container-run-job.rst (100%) rename docs/guides/{run-dbt => run_dbt}/container/azure-container-instance.rst (100%) rename docs/guides/{run-dbt => run_dbt}/container/docker.rst (100%) rename docs/guides/{run-dbt => run_dbt}/container/gcp-cloud-run-job.rst (100%) rename docs/guides/{run-dbt => run_dbt}/container/index.rst (100%) rename docs/guides/{run-dbt => run_dbt}/container/kubernetes.rst (100%) rename docs/guides/{run-dbt => run_dbt}/container/watcher-kubernetes-execution-mode.rst (100%) create mode 100644 docs/guides/run_dbt/customization/index.rst rename docs/guides/{ => run_dbt/customization}/operator-args.rst (100%) rename docs/guides/{ => run_dbt/customization}/scheduling.rst (100%) rename docs/guides/{ => run_dbt/customization}/task-display-name.rst (100%) rename docs/guides/{run-dbt/execution-modes.rst => run_dbt/index.rst} (99%) rename docs/{getting_started => guides/run_dbt/operators}/operators.rst (100%) rename docs/guides/{ => translate_dbt_to_airflow}/configure-tests/testing-behavior.rst (100%) rename docs/{getting_started => guides/translate_dbt_to_airflow}/custom-airflow-properties.rst (100%) rename docs/guides/{ => translate_dbt_to_airflow}/dag-customization.rst (100%) rename docs/guides/{ => translate_dbt_to_airflow}/parsing-methods.rst (100%) rename docs/guides/{ => translate_dbt_to_airflow}/render-config.rst (100%) rename docs/guides/{ => translate_dbt_to_airflow}/source-nodes-rendering.rst (100%) diff --git a/docs/guides/compiled-sql.rst b/docs/guides/cosmos_devex/compiled-sql.rst similarity index 100% rename from docs/guides/compiled-sql.rst rename to docs/guides/cosmos_devex/compiled-sql.rst diff --git a/docs/guides/lineage.rst b/docs/guides/cosmos_devex/lineage.rst similarity index 100% rename from docs/guides/lineage.rst rename to docs/guides/cosmos_devex/lineage.rst diff --git a/docs/guides/logging.rst b/docs/guides/cosmos_devex/logging.rst similarity index 100% rename from docs/guides/logging.rst rename to docs/guides/cosmos_devex/logging.rst diff --git a/docs/guides/dbt-docs/generating-docs.rst b/docs/guides/dbt_docs/generating-docs.rst similarity index 100% rename from docs/guides/dbt-docs/generating-docs.rst rename to docs/guides/dbt_docs/generating-docs.rst diff --git a/docs/guides/dbt-docs/hosting-docs.rst b/docs/guides/dbt_docs/hosting-docs.rst similarity index 100% rename from docs/guides/dbt-docs/hosting-docs.rst rename to docs/guides/dbt_docs/hosting-docs.rst diff --git a/docs/guides/dbt-fusion.rst b/docs/guides/dbt_setup/dbt-fusion.rst similarity index 100% rename from docs/guides/dbt-fusion.rst rename to docs/guides/dbt_setup/dbt-fusion.rst diff --git a/docs/guides/multi-project.rst b/docs/guides/multi_project/multi-project.rst similarity index 100% rename from docs/guides/multi-project.rst rename to docs/guides/multi_project/multi-project.rst diff --git a/docs/guides/caching.rst b/docs/guides/optimize_performance/caching.rst similarity index 100% rename from docs/guides/caching.rst rename to docs/guides/optimize_performance/caching.rst diff --git a/docs/guides/memory_optimization.rst b/docs/guides/optimize_performance/memory_optimization.rst similarity index 100% rename from docs/guides/memory_optimization.rst rename to docs/guides/optimize_performance/memory_optimization.rst diff --git a/docs/guides/partial-parsing.rst b/docs/guides/optimize_performance/partial-parsing.rst similarity index 100% rename from docs/guides/partial-parsing.rst rename to docs/guides/optimize_performance/partial-parsing.rst diff --git a/docs/guides/selecting-excluding.rst b/docs/guides/optimize_performance/selecting-excluding.rst similarity index 100% rename from docs/guides/selecting-excluding.rst rename to docs/guides/optimize_performance/selecting-excluding.rst diff --git a/docs/guides/run-dbt/airflow-worker/async-execution-mode.rst b/docs/guides/run_dbt/airflow-worker/async-execution-mode.rst similarity index 100% rename from docs/guides/run-dbt/airflow-worker/async-execution-mode.rst rename to docs/guides/run_dbt/airflow-worker/async-execution-mode.rst diff --git a/docs/guides/execution-modes-local-conflicts.rst b/docs/guides/run_dbt/airflow-worker/execution-modes-local-conflicts.rst similarity index 100% rename from docs/guides/execution-modes-local-conflicts.rst rename to docs/guides/run_dbt/airflow-worker/execution-modes-local-conflicts.rst diff --git a/docs/guides/run-dbt/airflow-worker/index.rst b/docs/guides/run_dbt/airflow-worker/index.rst similarity index 100% rename from docs/guides/run-dbt/airflow-worker/index.rst rename to docs/guides/run_dbt/airflow-worker/index.rst diff --git a/docs/guides/run-dbt/airflow-worker/watcher-execution-mode.rst b/docs/guides/run_dbt/airflow-worker/watcher-execution-mode.rst similarity index 100% rename from docs/guides/run-dbt/airflow-worker/watcher-execution-mode.rst rename to docs/guides/run_dbt/airflow-worker/watcher-execution-mode.rst diff --git a/docs/guides/callbacks/callbacks.rst b/docs/guides/run_dbt/callbacks/callbacks.rst similarity index 100% rename from docs/guides/callbacks/callbacks.rst rename to docs/guides/run_dbt/callbacks/callbacks.rst diff --git a/docs/guides/run-dbt/container/aws-container-run-job.rst b/docs/guides/run_dbt/container/aws-container-run-job.rst similarity index 100% rename from docs/guides/run-dbt/container/aws-container-run-job.rst rename to docs/guides/run_dbt/container/aws-container-run-job.rst diff --git a/docs/guides/run-dbt/container/azure-container-instance.rst b/docs/guides/run_dbt/container/azure-container-instance.rst similarity index 100% rename from docs/guides/run-dbt/container/azure-container-instance.rst rename to docs/guides/run_dbt/container/azure-container-instance.rst diff --git a/docs/guides/run-dbt/container/docker.rst b/docs/guides/run_dbt/container/docker.rst similarity index 100% rename from docs/guides/run-dbt/container/docker.rst rename to docs/guides/run_dbt/container/docker.rst diff --git a/docs/guides/run-dbt/container/gcp-cloud-run-job.rst b/docs/guides/run_dbt/container/gcp-cloud-run-job.rst similarity index 100% rename from docs/guides/run-dbt/container/gcp-cloud-run-job.rst rename to docs/guides/run_dbt/container/gcp-cloud-run-job.rst diff --git a/docs/guides/run-dbt/container/index.rst b/docs/guides/run_dbt/container/index.rst similarity index 100% rename from docs/guides/run-dbt/container/index.rst rename to docs/guides/run_dbt/container/index.rst diff --git a/docs/guides/run-dbt/container/kubernetes.rst b/docs/guides/run_dbt/container/kubernetes.rst similarity index 100% rename from docs/guides/run-dbt/container/kubernetes.rst rename to docs/guides/run_dbt/container/kubernetes.rst diff --git a/docs/guides/run-dbt/container/watcher-kubernetes-execution-mode.rst b/docs/guides/run_dbt/container/watcher-kubernetes-execution-mode.rst similarity index 100% rename from docs/guides/run-dbt/container/watcher-kubernetes-execution-mode.rst rename to docs/guides/run_dbt/container/watcher-kubernetes-execution-mode.rst diff --git a/docs/guides/run_dbt/customization/index.rst b/docs/guides/run_dbt/customization/index.rst new file mode 100644 index 0000000000..44021154dc --- /dev/null +++ b/docs/guides/run_dbt/customization/index.rst @@ -0,0 +1,9 @@ +Additional Customization +======================== + +.. toctree:: + :maxdepth: 1 + :caption: Additional Customization + + operator-args + scheduling diff --git a/docs/guides/operator-args.rst b/docs/guides/run_dbt/customization/operator-args.rst similarity index 100% rename from docs/guides/operator-args.rst rename to docs/guides/run_dbt/customization/operator-args.rst diff --git a/docs/guides/scheduling.rst b/docs/guides/run_dbt/customization/scheduling.rst similarity index 100% rename from docs/guides/scheduling.rst rename to docs/guides/run_dbt/customization/scheduling.rst diff --git a/docs/guides/task-display-name.rst b/docs/guides/run_dbt/customization/task-display-name.rst similarity index 100% rename from docs/guides/task-display-name.rst rename to docs/guides/run_dbt/customization/task-display-name.rst diff --git a/docs/guides/run-dbt/execution-modes.rst b/docs/guides/run_dbt/index.rst similarity index 99% rename from docs/guides/run-dbt/execution-modes.rst rename to docs/guides/run_dbt/index.rst index a9bd3f1e2b..1b73d56a88 100644 --- a/docs/guides/run-dbt/execution-modes.rst +++ b/docs/guides/run_dbt/index.rst @@ -1,5 +1,8 @@ .. _execution-modes: + + + Execution Modes =============== diff --git a/docs/getting_started/operators.rst b/docs/guides/run_dbt/operators/operators.rst similarity index 100% rename from docs/getting_started/operators.rst rename to docs/guides/run_dbt/operators/operators.rst diff --git a/docs/guides/configure-tests/testing-behavior.rst b/docs/guides/translate_dbt_to_airflow/configure-tests/testing-behavior.rst similarity index 100% rename from docs/guides/configure-tests/testing-behavior.rst rename to docs/guides/translate_dbt_to_airflow/configure-tests/testing-behavior.rst diff --git a/docs/getting_started/custom-airflow-properties.rst b/docs/guides/translate_dbt_to_airflow/custom-airflow-properties.rst similarity index 100% rename from docs/getting_started/custom-airflow-properties.rst rename to docs/guides/translate_dbt_to_airflow/custom-airflow-properties.rst diff --git a/docs/guides/dag-customization.rst b/docs/guides/translate_dbt_to_airflow/dag-customization.rst similarity index 100% rename from docs/guides/dag-customization.rst rename to docs/guides/translate_dbt_to_airflow/dag-customization.rst diff --git a/docs/guides/parsing-methods.rst b/docs/guides/translate_dbt_to_airflow/parsing-methods.rst similarity index 100% rename from docs/guides/parsing-methods.rst rename to docs/guides/translate_dbt_to_airflow/parsing-methods.rst diff --git a/docs/guides/render-config.rst b/docs/guides/translate_dbt_to_airflow/render-config.rst similarity index 100% rename from docs/guides/render-config.rst rename to docs/guides/translate_dbt_to_airflow/render-config.rst diff --git a/docs/guides/source-nodes-rendering.rst b/docs/guides/translate_dbt_to_airflow/source-nodes-rendering.rst similarity index 100% rename from docs/guides/source-nodes-rendering.rst rename to docs/guides/translate_dbt_to_airflow/source-nodes-rendering.rst diff --git a/docs/index.rst b/docs/index.rst index 83b2030f15..9ccb67cdca 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -2,7 +2,7 @@ .. toctree:: :hidden: - :maxdepth: 2 + :maxdepth: 0 :caption: Contents: Home diff --git a/docs/policy/contributing.rst b/docs/policy/contributing.rst index a1e9c8ae5c..23d94453da 100644 --- a/docs/policy/contributing.rst +++ b/docs/policy/contributing.rst @@ -206,3 +206,48 @@ To run the checks manually, run: .. code-block:: bash pre-commit run --all-files + +Writing Docs +____________ + +`Hatch `_ is a unified command-line tool for managing dependencies and environment isolation for Python developers. In Cosmos, we use a Hatch to declare the dependencies required for the project itself, as well as for tests and documentation builds. + +If you don’t already have Hatch installed, please `install it `_ before proceeding. As an example, on macOS, you can do so with: + +.. code-block:: bash + + brew install hatch + + +You can run the docs locally by running the following: + +.. code-block:: bash + + hatch run docs:serve + + + +Building +________ + +We use ``hatch`` to build the project. To build the project, run: + +.. code-block:: bash + + hatch build + + +Releasing +_________ + +We use GitHub actions to create and deploy new releases. To create a new release, first create a new version using: + +.. code-block:: bash + + hatch version minor + + +``hatch`` will automatically update the version for you. Then, create a new release on GitHub with the new version. The release will be automatically deployed to PyPI. + +.. note:: + You can update the version in a few different ways. Check out the `hatch docs `_ to learn more. From 577b61ce6bd753cb9770cfaddf7f7779c37d6876 Mon Sep 17 00:00:00 2001 From: L Zdanski <25642903+lzdanski@users.noreply.github.com> Date: Sun, 1 Mar 2026 19:28:24 -0500 Subject: [PATCH 06/29] improve build errors --- docs/getting_started/index.rst | 19 +++------ docs/guides/cosmos_devex/index.rst | 14 +++++++ .../task-display-name.rst | 0 docs/guides/index.rst | 42 +++++-------------- docs/guides/optimize_performance/index.rst | 13 ++++++ docs/guides/run_dbt/index.rst | 25 ++++++++--- .../guides/translate_dbt_to_airflow/index.rst | 26 ++++++++++++ ...des-rendering.rst => managing-sources.rst} | 6 +-- 8 files changed, 91 insertions(+), 54 deletions(-) create mode 100644 docs/guides/cosmos_devex/index.rst rename docs/guides/{run_dbt/customization => cosmos_devex}/task-display-name.rst (100%) create mode 100644 docs/guides/optimize_performance/index.rst create mode 100644 docs/guides/translate_dbt_to_airflow/index.rst rename docs/guides/translate_dbt_to_airflow/{source-nodes-rendering.rst => managing-sources.rst} (97%) diff --git a/docs/getting_started/index.rst b/docs/getting_started/index.rst index 762adb2c1c..6ed3154c98 100644 --- a/docs/getting_started/index.rst +++ b/docs/getting_started/index.rst @@ -18,15 +18,6 @@ Google Cloud Composer (GCC) Amazon Managed Workflows for Apache Airflow (MWAA) - -.. toctree:: - :maxdepth: 1 - :hidden: - :caption: Operators - - Operators - Custom Airflow Properties - Getting Started =============== @@ -87,11 +78,11 @@ For more customization, check out the different execution modes that Cosmos supp For specific guides, see the following: -- `Executing dbt DAGs with Docker Operators `__ -- `Executing dbt DAGs with KubernetesPodOperators `__ -- `Executing dbt DAGs with Watcher Kubernetes Mode `__ -- `Executing dbt DAGs with AzureContainerInstancesOperators `__ -- `Executing dbt DAGs with GcpCloudRunExecuteJobOperators `__ +- `Executing dbt DAGs with Docker Operators `__ +- `Executing dbt DAGs with KubernetesPodOperators `__ +- `Executing dbt DAGs with Watcher Kubernetes Mode `__ +- `Executing dbt DAGs with AzureContainerInstancesOperators `__ +- `Executing dbt DAGs with GcpCloudRunExecuteJobOperators `__ Concepts Overview diff --git a/docs/guides/cosmos_devex/index.rst b/docs/guides/cosmos_devex/index.rst new file mode 100644 index 0000000000..614e4c3c17 --- /dev/null +++ b/docs/guides/cosmos_devex/index.rst @@ -0,0 +1,14 @@ +.. _cosmos_devex: + + +Cosmos DevEx +============ + +.. toctree:: + :maxdepth: 1 + :caption: Cosmos DevEx + + lineage + compiled-sql + logging + task-display-name \ No newline at end of file diff --git a/docs/guides/run_dbt/customization/task-display-name.rst b/docs/guides/cosmos_devex/task-display-name.rst similarity index 100% rename from docs/guides/run_dbt/customization/task-display-name.rst rename to docs/guides/cosmos_devex/task-display-name.rst diff --git a/docs/guides/index.rst b/docs/guides/index.rst index df227dea90..6234012779 100644 --- a/docs/guides/index.rst +++ b/docs/guides/index.rst @@ -10,70 +10,48 @@ Cosmos offers a number of configuration options to customize its behavior. For m :hidden: :caption: Translating dbt into Airflow - Source Nodes Rendering - Post-rendering DAG customization + translate_dbt_to_airflow/index .. toctree:: :maxdepth: 3 :hidden: :caption: How Cosmos runs dbt - execution-modes-local-conflicts - run-dbt/execution-modes - -.. toctree:: - :maxdepth: 1 - :hidden: - :caption: Configure tests - - configure-tests/testing-behavior - -.. toctree:: - :maxdepth: 1 - :hidden: - :caption: Callbacks - - callbacks/callbacks + run_dbt/index .. toctree:: :maxdepth: 1 :hidden: :caption: Multi-project Setups - Multi-Project Setups + multi_project/multi-project .. toctree:: :maxdepth: 1 :hidden: - :caption: Operators + :caption: Documentation - Operator Args + dbt-docs/generating-docs + dbt-docs/hosting-docs .. toctree:: :maxdepth: 1 :hidden: - :caption: Documentation + :caption: Cosmos DevEx - dbt-docs/generating-docs - dbt-docs/hosting-docs + cosmos_devex/index .. toctree:: :maxdepth: 1 :hidden: :caption: Optimizing Performance - Memory Optimization - dbt Fusion - Selecting & Excluding - Parsing Methods - Partial Parsing - Caching - Render Config + optimize_performance/index .. toctree:: :maxdepth: 1 :hidden: - :caption: Configurations + :caption: Configuration References Project Config Profile Config diff --git a/docs/guides/optimize_performance/index.rst b/docs/guides/optimize_performance/index.rst new file mode 100644 index 0000000000..0ed84470d0 --- /dev/null +++ b/docs/guides/optimize_performance/index.rst @@ -0,0 +1,13 @@ +.. _optimize-performance: + +Optimize your Cosmos Performance +================================ + +.. toctree:: + :maxdepth: 1 + :caption: Optimize Performance + + partial-parsing + memory_optimization + selecting-excluding + caching diff --git a/docs/guides/run_dbt/index.rst b/docs/guides/run_dbt/index.rst index 1b73d56a88..2827fe39dd 100644 --- a/docs/guides/run_dbt/index.rst +++ b/docs/guides/run_dbt/index.rst @@ -1,10 +1,7 @@ .. _execution-modes: - - - -Execution Modes -=============== +How Cosmos runs dbt +=================== .. toctree:: :maxdepth: 3 @@ -18,6 +15,24 @@ Execution Modes container/index +.. toctree:: + :maxdepth: 3 + :caption: Callbacks + + callbacks/callbacks + +.. toctree:: + :maxdepth: 3 + :caption: Operators + + operators/operators + +.. toctree:: + :maxdepth: 3 + :caption: Customize Airflow + + customization/index + Cosmos can run ``dbt`` commands using several different approaches, called ``execution modes``: diff --git a/docs/guides/translate_dbt_to_airflow/index.rst b/docs/guides/translate_dbt_to_airflow/index.rst new file mode 100644 index 0000000000..d0f8cdbefc --- /dev/null +++ b/docs/guides/translate_dbt_to_airflow/index.rst @@ -0,0 +1,26 @@ +.. _translate-dbt-to-airflow + +Translate dbt code into Airflow +=============================== + +.. toctree:: + :maxdepth: 1 + :caption: Mapping dbt into dags + + parsing-methods + custom-airflow-properties + + +.. toctree:: + :maxdepth: 1 + :caption: Configure tests + + configure-tests/testing-behavior + +.. toctree:: + :maxdepth: 1 + :caption: Translate nodes + + source-nodes-rendering + render-config + dag-customization \ No newline at end of file diff --git a/docs/guides/translate_dbt_to_airflow/source-nodes-rendering.rst b/docs/guides/translate_dbt_to_airflow/managing-sources.rst similarity index 97% rename from docs/guides/translate_dbt_to_airflow/source-nodes-rendering.rst rename to docs/guides/translate_dbt_to_airflow/managing-sources.rst index 9bfcf0e97b..0d7a9d9644 100644 --- a/docs/guides/translate_dbt_to_airflow/source-nodes-rendering.rst +++ b/docs/guides/translate_dbt_to_airflow/managing-sources.rst @@ -1,7 +1,7 @@ -.. _source-nodes-rendering: +.. _managing-sources: -Source Nodes Rendering -====================== +Managing Sources +================ .. note:: This feature is only available for dbt-core >= 1.5 and cosmos >= 1.6.0. From d0e7343ac781d3d36b5a8d7e0acc6e9df81eab85 Mon Sep 17 00:00:00 2001 From: L Zdanski <25642903+lzdanski@users.noreply.github.com> Date: Sun, 1 Mar 2026 19:36:24 -0500 Subject: [PATCH 07/29] fix index --- docs/guides/index.rst | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/docs/guides/index.rst b/docs/guides/index.rst index 6234012779..50f3059402 100644 --- a/docs/guides/index.rst +++ b/docs/guides/index.rst @@ -5,6 +5,13 @@ Guides Cosmos offers a number of configuration options to customize its behavior. For more info, check out the links on the left or the table of contents below. +.. toctree:: + :maxdepth: 1 + :hidden: + :caption: Set up dbt with Airflow + + dbt_setup/dbt-fusion + .. toctree:: :maxdepth: 1 :hidden: @@ -31,8 +38,8 @@ Cosmos offers a number of configuration options to customize its behavior. For m :hidden: :caption: Documentation - dbt-docs/generating-docs - dbt-docs/hosting-docs + dbt_docs/generating-docs + dbt_docs/hosting-docs .. toctree:: :maxdepth: 1 @@ -56,16 +63,4 @@ Cosmos offers a number of configuration options to customize its behavior. For m Project Config Profile Config Execution Config - - -.. toctree:: - :maxdepth: 1 - :hidden: - :caption: Customizing Airflow - - Configuring in Airflow - Configuring Lineage - Scheduling - Compiled SQL - Logging - Task display name \ No newline at end of file + Cosmos Config From 06969b1d4d7609ed6b02ecec1577af5fd51cf888 Mon Sep 17 00:00:00 2001 From: L Zdanski <25642903+lzdanski@users.noreply.github.com> Date: Sun, 1 Mar 2026 20:04:34 -0500 Subject: [PATCH 08/29] fix rel-link build errors --- docs/guides/multi_project/multi-project.rst | 4 ++-- docs/guides/optimize_performance/caching.rst | 4 ++-- .../airflow-worker/watcher-execution-mode.rst | 4 ++-- docs/guides/run_dbt/callbacks/callbacks.rst | 4 ++-- docs/guides/run_dbt/container/kubernetes.rst | 2 +- .../container/watcher-kubernetes-execution-mode.rst | 2 +- docs/guides/run_dbt/customization/scheduling.rst | 2 +- docs/guides/run_dbt/index.rst | 8 ++++---- docs/guides/run_dbt/operators/operators.rst | 4 ++-- .../configure-tests/testing-behavior.rst | 12 ++++++------ .../custom-airflow-properties.rst | 2 +- docs/guides/translate_dbt_to_airflow/index.rst | 2 +- .../translate_dbt_to_airflow/managing-sources.rst | 2 +- .../translate_dbt_to_airflow/parsing-methods.rst | 8 ++++---- .../translate_dbt_to_airflow/render-config.rst | 2 +- 15 files changed, 31 insertions(+), 31 deletions(-) diff --git a/docs/guides/multi_project/multi-project.rst b/docs/guides/multi_project/multi-project.rst index f2868f840c..7e506e5471 100644 --- a/docs/guides/multi_project/multi-project.rst +++ b/docs/guides/multi_project/multi-project.rst @@ -169,7 +169,7 @@ You can use either separate DAGs or a combined DAG with task groups. **Option 1: Combined DAG with Task Groups using dbt ls Load Mode (Recommended)** -.. literalinclude:: ../../dev/dags/cross_project_dbt_ls_dag.py +.. literalinclude:: ../../../dev/dags/cross_project_dbt_ls_dag.py :language: python :start-after: [START cross_project_dbt_ls_dag] :end-before: [END cross_project_dbt_ls_dag] @@ -178,7 +178,7 @@ You can use either separate DAGs or a combined DAG with task groups. This option uses pre-generated ``manifest.json`` files for faster DAG parsing (no ``dbt ls`` execution required). -.. literalinclude:: ../../dev/dags/cross_project_manifest_dag.py +.. literalinclude:: ../../../dev/dags/cross_project_manifest_dag.py :language: python :start-after: [START cross_project_manifest_dag] :end-before: [END cross_project_manifest_dag] diff --git a/docs/guides/optimize_performance/caching.rst b/docs/guides/optimize_performance/caching.rst index 7289d00742..5bf8a6406c 100644 --- a/docs/guides/optimize_performance/caching.rst +++ b/docs/guides/optimize_performance/caching.rst @@ -84,7 +84,7 @@ The method deletes the Cosmos cache stored in Airflow Variables based on the las As an example, the following clean-up DAG will delete any cache associated with Cosmos that has not been used for the last five days: -.. literalinclude:: ../../dev/dags/example_cosmos_cleanup_dag.py +.. literalinclude:: ../../../dev/dags/example_cosmos_cleanup_dag.py :language: python :start-after: [START cache_example] :end-before: [END cache_example] @@ -161,7 +161,7 @@ The method deletes the Cosmos cache stored in Airflow Variables based on the las As an example, the following clean-up DAG will delete any cache associated with Cosmos that has not been used for the last five days: -.. literalinclude:: ../../dev/dags/example_cosmos_cleanup_dag.py +.. literalinclude:: ../../../dev/dags/example_cosmos_cleanup_dag.py :language: python :start-after: [START cache_example] :end-before: [END cache_example] diff --git a/docs/guides/run_dbt/airflow-worker/watcher-execution-mode.rst b/docs/guides/run_dbt/airflow-worker/watcher-execution-mode.rst index f33ef15900..05bb21c7f7 100644 --- a/docs/guides/run_dbt/airflow-worker/watcher-execution-mode.rst +++ b/docs/guides/run_dbt/airflow-worker/watcher-execution-mode.rst @@ -144,7 +144,7 @@ Example 1 — Using ``DbtDag`` with ``ExecutionMode.WATCHER`` You can enable WATCHER mode directly in your ``DbtDag`` configuration. This approach is best when your Airflow DAG is fully dedicated to a dbt project. -.. literalinclude:: ../../dev/dags/example_watcher.py +.. literalinclude:: ../../../../dev/dags/example_watcher.py :language: python :start-after: [START example_watcher] :end-before: [END example_watcher] @@ -451,7 +451,7 @@ Asynchronous sensor execution To disable asynchronous execution, set the ``deferrable`` flag to ``False`` in the ``operator_args``. -.. literalinclude:: ../../dev/dags/example_watcher.py +.. literalinclude:: ../../../../dev/dags/example_watcher.py :language: python :start-after: [START example_watcher_synchronous] :end-before: [END example_watcher_synchronous] diff --git a/docs/guides/run_dbt/callbacks/callbacks.rst b/docs/guides/run_dbt/callbacks/callbacks.rst index c754245525..4b602ece3f 100644 --- a/docs/guides/run_dbt/callbacks/callbacks.rst +++ b/docs/guides/run_dbt/callbacks/callbacks.rst @@ -34,7 +34,7 @@ Example: Using Callbacks with a Single Operator To demonstrate how to specify a callback function for uploading files from the target directory, here’s an example using a single operator in an Airflow DAG: -.. literalinclude:: ../../dev/dags/example_operators.py +.. literalinclude:: ../../../../dev/dags/example_operators.py :language: python :start-after: [START single_operator_callback] :end-before: [END single_operator_callback] @@ -46,7 +46,7 @@ You can leverage the :ref:`remote_target_path` configuration to upload files from the target directory to a remote storage. Below is an example of how to define a callback helper function in your ``DbtDag`` that utilizes this configuration: -.. literalinclude:: ../../dev/dags/cosmos_callback_dag.py +.. literalinclude:: ../../../../dev/dags/cosmos_callback_dag.py :language: python :start-after: [START cosmos_callback_example] :end-before: [END cosmos_callback_example] diff --git a/docs/guides/run_dbt/container/kubernetes.rst b/docs/guides/run_dbt/container/kubernetes.rst index 4ea8ccd4b9..d200589429 100644 --- a/docs/guides/run_dbt/container/kubernetes.rst +++ b/docs/guides/run_dbt/container/kubernetes.rst @@ -28,7 +28,7 @@ Additional KubernetesPodOperator parameters can be added to the ``operator_args` For instance, -.. literalinclude:: ../../dev/dags/jaffle_shop_kubernetes.py +.. literalinclude:: ../../../../dev/dags/jaffle_shop_kubernetes.py :language: python :start-after: [START kubernetes_tg_example] :end-before: [END kubernetes_tg_example] diff --git a/docs/guides/run_dbt/container/watcher-kubernetes-execution-mode.rst b/docs/guides/run_dbt/container/watcher-kubernetes-execution-mode.rst index 16dbbffd0a..d3f8a80a49 100644 --- a/docs/guides/run_dbt/container/watcher-kubernetes-execution-mode.rst +++ b/docs/guides/run_dbt/container/watcher-kubernetes-execution-mode.rst @@ -183,7 +183,7 @@ Example DAG Below is a complete example of a DAG using ``ExecutionMode.WATCHER_KUBERNETES``: -.. literalinclude:: ../../dev/dags/jaffle_shop_watcher_kubernetes.py +.. literalinclude:: ../../../../dev/dags/jaffle_shop_watcher_kubernetes.py :language: python ------------------------------------------------------------------------------- diff --git a/docs/guides/run_dbt/customization/scheduling.rst b/docs/guides/run_dbt/customization/scheduling.rst index 2d4e729c5b..0040135d37 100644 --- a/docs/guides/run_dbt/customization/scheduling.rst +++ b/docs/guides/run_dbt/customization/scheduling.rst @@ -77,7 +77,7 @@ This example DAG: .. The following renders in Sphinx but not Github: -.. literalinclude:: ../../dev/dags/basic_cosmos_dag.py +.. literalinclude:: ../../../../dev/dags/basic_cosmos_dag.py :language: python :start-after: [START local_example] :end-before: [END local_example] diff --git a/docs/guides/run_dbt/index.rst b/docs/guides/run_dbt/index.rst index 2827fe39dd..a8c96d1b93 100644 --- a/docs/guides/run_dbt/index.rst +++ b/docs/guides/run_dbt/index.rst @@ -127,7 +127,7 @@ When using the ``local`` execution mode, Cosmos converts Airflow Connections int Example of how to use, for instance, when ``dbt`` was installed together with Cosmos: -.. literalinclude:: ../../dev/dags/basic_cosmos_dag.py +.. literalinclude:: ../../../dev/dags/basic_cosmos_dag.py :language: python :start-after: [START local_example] :end-before: [END local_example] @@ -153,7 +153,7 @@ Some drawbacks of this approach: Example of how to use: -.. literalinclude:: ../../dev/dags/example_virtualenv.py +.. literalinclude:: ../../../dev/dags/example_virtualenv.py :language: python :start-after: [START virtualenv_example] :end-before: [END virtualenv_example] @@ -201,7 +201,7 @@ Check the step-by-step guide on using the ``kubernetes`` execution mode at :ref: Example DAG: -.. literalinclude:: ../../dev/dags/jaffle_shop_kubernetes.py +.. literalinclude:: ../../../dev/dags/jaffle_shop_kubernetes.py :language: python :start-after: [START kubernetes_seed_example] :end-before: [END kubernetes_seed_example] @@ -345,7 +345,7 @@ as more dbt nodes will be run in parallel since they won't be blocking Airflow's Example DAG: -.. literalinclude:: ../../dev/dags/simple_dag_async.py +.. literalinclude:: ../../../dev/dags/simple_dag_async.py :language: python :start-after: [START airflow_async_execution_mode_example] :end-before: [END airflow_async_execution_mode_example] diff --git a/docs/guides/run_dbt/operators/operators.rst b/docs/guides/run_dbt/operators/operators.rst index 9f6658b6b1..448e037e77 100644 --- a/docs/guides/run_dbt/operators/operators.rst +++ b/docs/guides/run_dbt/operators/operators.rst @@ -18,7 +18,7 @@ The ``DbtCloneLocalOperator`` implement `dbt clone Date: Sun, 1 Mar 2026 20:32:36 -0500 Subject: [PATCH 09/29] fix typo --- docs/guides/translate_dbt_to_airflow/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/guides/translate_dbt_to_airflow/index.rst b/docs/guides/translate_dbt_to_airflow/index.rst index faaf31b950..35adec52e4 100644 --- a/docs/guides/translate_dbt_to_airflow/index.rst +++ b/docs/guides/translate_dbt_to_airflow/index.rst @@ -1,4 +1,4 @@ -.. _translate-dbt-to-airflow +.. _translate-dbt-to-airflow: Translate dbt code into Airflow =============================== From 32c92a95a8982ca3e2b54886ab5f8ea1786ff0d0 Mon Sep 17 00:00:00 2001 From: L Zdanski <25642903+lzdanski@users.noreply.github.com> Date: Sun, 1 Mar 2026 20:59:08 -0500 Subject: [PATCH 10/29] reformat ref structure --- docs/guides/index.rst | 13 +++----- docs/guides/run_dbt/index.rst | 33 +------------------ docs/index.rst | 1 + .../optimize_performance/caching.rst | 4 +-- .../optimize_performance/index.rst | 0 .../memory_optimization.rst | 0 .../optimize_performance/partial-parsing.rst | 0 .../selecting-excluding.rst | 0 8 files changed, 9 insertions(+), 42 deletions(-) rename docs/{guides => }/optimize_performance/caching.rst (98%) rename docs/{guides => }/optimize_performance/index.rst (100%) rename docs/{guides => }/optimize_performance/memory_optimization.rst (100%) rename docs/{guides => }/optimize_performance/partial-parsing.rst (100%) rename docs/{guides => }/optimize_performance/selecting-excluding.rst (100%) diff --git a/docs/guides/index.rst b/docs/guides/index.rst index 50f3059402..140a9c4554 100644 --- a/docs/guides/index.rst +++ b/docs/guides/index.rst @@ -24,7 +24,11 @@ Cosmos offers a number of configuration options to customize its behavior. For m :hidden: :caption: How Cosmos runs dbt - run_dbt/index + run_dbt/airflow-worker/index + run_dbt/container/index + run_dbt/callbacks/callbacks + run_dbt/operators/operators + run_dbt/customization/index .. toctree:: :maxdepth: 1 @@ -48,13 +52,6 @@ Cosmos offers a number of configuration options to customize its behavior. For m cosmos_devex/index -.. toctree:: - :maxdepth: 1 - :hidden: - :caption: Optimizing Performance - - optimize_performance/index - .. toctree:: :maxdepth: 1 :hidden: diff --git a/docs/guides/run_dbt/index.rst b/docs/guides/run_dbt/index.rst index a8c96d1b93..71d581c25b 100644 --- a/docs/guides/run_dbt/index.rst +++ b/docs/guides/run_dbt/index.rst @@ -1,39 +1,8 @@ .. _execution-modes: -How Cosmos runs dbt +Execution Modes =================== -.. toctree:: - :maxdepth: 3 - :caption: Run dbt in the Airflow worker - - airflow-worker/index - -.. toctree:: - :maxdepth: 3 - :caption: Run dbt in a container - - container/index - -.. toctree:: - :maxdepth: 3 - :caption: Callbacks - - callbacks/callbacks - -.. toctree:: - :maxdepth: 3 - :caption: Operators - - operators/operators - -.. toctree:: - :maxdepth: 3 - :caption: Customize Airflow - - customization/index - - Cosmos can run ``dbt`` commands using several different approaches, called ``execution modes``: 1. **local**: Run ``dbt`` commands using a local ``dbt`` installation (default) diff --git a/docs/index.rst b/docs/index.rst index 9ccb67cdca..7bef1e1be2 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -8,6 +8,7 @@ Home Getting Started Guides + Optimize Performance Profiles Project Policies diff --git a/docs/guides/optimize_performance/caching.rst b/docs/optimize_performance/caching.rst similarity index 98% rename from docs/guides/optimize_performance/caching.rst rename to docs/optimize_performance/caching.rst index 5bf8a6406c..7289d00742 100644 --- a/docs/guides/optimize_performance/caching.rst +++ b/docs/optimize_performance/caching.rst @@ -84,7 +84,7 @@ The method deletes the Cosmos cache stored in Airflow Variables based on the las As an example, the following clean-up DAG will delete any cache associated with Cosmos that has not been used for the last five days: -.. literalinclude:: ../../../dev/dags/example_cosmos_cleanup_dag.py +.. literalinclude:: ../../dev/dags/example_cosmos_cleanup_dag.py :language: python :start-after: [START cache_example] :end-before: [END cache_example] @@ -161,7 +161,7 @@ The method deletes the Cosmos cache stored in Airflow Variables based on the las As an example, the following clean-up DAG will delete any cache associated with Cosmos that has not been used for the last five days: -.. literalinclude:: ../../../dev/dags/example_cosmos_cleanup_dag.py +.. literalinclude:: ../../dev/dags/example_cosmos_cleanup_dag.py :language: python :start-after: [START cache_example] :end-before: [END cache_example] diff --git a/docs/guides/optimize_performance/index.rst b/docs/optimize_performance/index.rst similarity index 100% rename from docs/guides/optimize_performance/index.rst rename to docs/optimize_performance/index.rst diff --git a/docs/guides/optimize_performance/memory_optimization.rst b/docs/optimize_performance/memory_optimization.rst similarity index 100% rename from docs/guides/optimize_performance/memory_optimization.rst rename to docs/optimize_performance/memory_optimization.rst diff --git a/docs/guides/optimize_performance/partial-parsing.rst b/docs/optimize_performance/partial-parsing.rst similarity index 100% rename from docs/guides/optimize_performance/partial-parsing.rst rename to docs/optimize_performance/partial-parsing.rst diff --git a/docs/guides/optimize_performance/selecting-excluding.rst b/docs/optimize_performance/selecting-excluding.rst similarity index 100% rename from docs/guides/optimize_performance/selecting-excluding.rst rename to docs/optimize_performance/selecting-excluding.rst From 721f2479d070d55e99a3281f65f98054c5281bd4 Mon Sep 17 00:00:00 2001 From: L Zdanski <25642903+lzdanski@users.noreply.github.com> Date: Sun, 1 Mar 2026 21:13:07 -0500 Subject: [PATCH 11/29] restructure --- docs/guides/index.rst | 10 +--------- .../run_dbt/{index.rst => execution-modes.rst} | 0 docs/index.rst | 1 + docs/{guides => reference/configs}/cosmos-conf.rst | 0 .../configs}/execution-config.rst | 0 docs/reference/configs/index.rst | 13 +++++++++++++ .../configs}/profile-config.rst | 0 .../configs}/project-config.rst | 0 docs/reference/index.rst | 9 +++++++++ 9 files changed, 24 insertions(+), 9 deletions(-) rename docs/guides/run_dbt/{index.rst => execution-modes.rst} (100%) rename docs/{guides => reference/configs}/cosmos-conf.rst (100%) rename docs/{guides => reference/configs}/execution-config.rst (100%) create mode 100644 docs/reference/configs/index.rst rename docs/{guides => reference/configs}/profile-config.rst (100%) rename docs/{guides => reference/configs}/project-config.rst (100%) create mode 100644 docs/reference/index.rst diff --git a/docs/guides/index.rst b/docs/guides/index.rst index 140a9c4554..03970c5959 100644 --- a/docs/guides/index.rst +++ b/docs/guides/index.rst @@ -24,6 +24,7 @@ Cosmos offers a number of configuration options to customize its behavior. For m :hidden: :caption: How Cosmos runs dbt + run_dbt/execution-modes run_dbt/airflow-worker/index run_dbt/container/index run_dbt/callbacks/callbacks @@ -52,12 +53,3 @@ Cosmos offers a number of configuration options to customize its behavior. For m cosmos_devex/index -.. toctree:: - :maxdepth: 1 - :hidden: - :caption: Configuration References - - Project Config - Profile Config - Execution Config - Cosmos Config diff --git a/docs/guides/run_dbt/index.rst b/docs/guides/run_dbt/execution-modes.rst similarity index 100% rename from docs/guides/run_dbt/index.rst rename to docs/guides/run_dbt/execution-modes.rst diff --git a/docs/index.rst b/docs/index.rst index 7bef1e1be2..b841351ddd 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -9,6 +9,7 @@ Getting Started Guides Optimize Performance + Reference Profiles Project Policies diff --git a/docs/guides/cosmos-conf.rst b/docs/reference/configs/cosmos-conf.rst similarity index 100% rename from docs/guides/cosmos-conf.rst rename to docs/reference/configs/cosmos-conf.rst diff --git a/docs/guides/execution-config.rst b/docs/reference/configs/execution-config.rst similarity index 100% rename from docs/guides/execution-config.rst rename to docs/reference/configs/execution-config.rst diff --git a/docs/reference/configs/index.rst b/docs/reference/configs/index.rst new file mode 100644 index 0000000000..d83b28614a --- /dev/null +++ b/docs/reference/configs/index.rst @@ -0,0 +1,13 @@ + +# Configurations - Uncomment this section to turn the page into a dropdown navigation +# ============== + +.. toctree:: + :maxdepth: 1 + :hidden: + :caption: Configurations + + cosmos-conf + execution-config + profile-config + project-config \ No newline at end of file diff --git a/docs/guides/profile-config.rst b/docs/reference/configs/profile-config.rst similarity index 100% rename from docs/guides/profile-config.rst rename to docs/reference/configs/profile-config.rst diff --git a/docs/guides/project-config.rst b/docs/reference/configs/project-config.rst similarity index 100% rename from docs/guides/project-config.rst rename to docs/reference/configs/project-config.rst diff --git a/docs/reference/index.rst b/docs/reference/index.rst new file mode 100644 index 0000000000..39b5e65f62 --- /dev/null +++ b/docs/reference/index.rst @@ -0,0 +1,9 @@ +Reference +========= + +.. toctree:: + :maxdepth: 1 + :hidden: + :caption: Configurations + + configs/index \ No newline at end of file From dcb01b4951e64efc8dc8e70e2f4a75fed7d9633e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 2 Mar 2026 03:11:28 +0000 Subject: [PATCH 12/29] =?UTF-8?q?=F0=9F=8E=A8=20[pre-commit.ci]=20Auto=20f?= =?UTF-8?q?ormat=20from=20pre-commit.com=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/guides/cosmos_devex/index.rst | 2 +- docs/guides/index.rst | 1 - docs/guides/run_dbt/airflow-worker/index.rst | 2 +- docs/guides/run_dbt/container/index.rst | 2 +- docs/guides/translate_dbt_to_airflow/index.rst | 2 +- docs/reference/configs/index.rst | 2 +- docs/reference/index.rst | 2 +- 7 files changed, 6 insertions(+), 7 deletions(-) diff --git a/docs/guides/cosmos_devex/index.rst b/docs/guides/cosmos_devex/index.rst index 614e4c3c17..2ad3dff71b 100644 --- a/docs/guides/cosmos_devex/index.rst +++ b/docs/guides/cosmos_devex/index.rst @@ -11,4 +11,4 @@ Cosmos DevEx lineage compiled-sql logging - task-display-name \ No newline at end of file + task-display-name diff --git a/docs/guides/index.rst b/docs/guides/index.rst index 03970c5959..9b7e233814 100644 --- a/docs/guides/index.rst +++ b/docs/guides/index.rst @@ -52,4 +52,3 @@ Cosmos offers a number of configuration options to customize its behavior. For m :caption: Cosmos DevEx cosmos_devex/index - diff --git a/docs/guides/run_dbt/airflow-worker/index.rst b/docs/guides/run_dbt/airflow-worker/index.rst index 00cb281bc8..eaa89c2d9f 100644 --- a/docs/guides/run_dbt/airflow-worker/index.rst +++ b/docs/guides/run_dbt/airflow-worker/index.rst @@ -6,4 +6,4 @@ Run dbt in an Airflow worker :caption: Run dbt in an Airflow worker async-execution-mode - watcher-execution-mode \ No newline at end of file + watcher-execution-mode diff --git a/docs/guides/run_dbt/container/index.rst b/docs/guides/run_dbt/container/index.rst index 634e9e8eb4..9cccdbb29a 100644 --- a/docs/guides/run_dbt/container/index.rst +++ b/docs/guides/run_dbt/container/index.rst @@ -10,4 +10,4 @@ Run dbt in a container docker gcp-cloud-run-job kubernetes - watcher-kubernetes-execution-mode \ No newline at end of file + watcher-kubernetes-execution-mode diff --git a/docs/guides/translate_dbt_to_airflow/index.rst b/docs/guides/translate_dbt_to_airflow/index.rst index 35adec52e4..5ff278003e 100644 --- a/docs/guides/translate_dbt_to_airflow/index.rst +++ b/docs/guides/translate_dbt_to_airflow/index.rst @@ -23,4 +23,4 @@ Translate dbt code into Airflow managing-sources render-config - dag-customization \ No newline at end of file + dag-customization diff --git a/docs/reference/configs/index.rst b/docs/reference/configs/index.rst index d83b28614a..741b7a76bd 100644 --- a/docs/reference/configs/index.rst +++ b/docs/reference/configs/index.rst @@ -10,4 +10,4 @@ cosmos-conf execution-config profile-config - project-config \ No newline at end of file + project-config diff --git a/docs/reference/index.rst b/docs/reference/index.rst index 39b5e65f62..4c430eb4d8 100644 --- a/docs/reference/index.rst +++ b/docs/reference/index.rst @@ -6,4 +6,4 @@ Reference :hidden: :caption: Configurations - configs/index \ No newline at end of file + configs/index From f2748562c1eebe32677d1ef7896e850b9b5aae76 Mon Sep 17 00:00:00 2001 From: L Zdanski <25642903+lzdanski@users.noreply.github.com> Date: Tue, 3 Mar 2026 17:30:13 -0500 Subject: [PATCH 13/29] fix build errors --- docs/reference/configs/cosmos-conf.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/reference/configs/cosmos-conf.rst b/docs/reference/configs/cosmos-conf.rst index cc68c3b71f..a8928c3840 100644 --- a/docs/reference/configs/cosmos-conf.rst +++ b/docs/reference/configs/cosmos-conf.rst @@ -253,14 +253,14 @@ This page lists all available Airflow configurations that affect ``astronomer-co As an example, when this option is enabled, the following is an example of specifying the imports with full module paths: - .. literalinclude:: ../../dev/dags/basic_cosmos_dag_full_module_path_imports.py + .. literalinclude:: ../../../dev/dags/basic_cosmos_dag_full_module_path_imports.py :language: python :start-after: [START cosmos_explicit_imports] :end-before: [END cosmos_explicit_imports] as opposed to the following approach you might have when this option is disabled (default): - .. literalinclude:: ../../dev/dags/basic_cosmos_dag.py + .. literalinclude:: ../../../dev/dags/basic_cosmos_dag.py :language: python :start-after: [START cosmos_init_imports] :end-before: [END cosmos_init_imports] From 6b4f5b6eb0204fd8d49a3272040bfc0cba742dae Mon Sep 17 00:00:00 2001 From: L Zdanski <25642903+lzdanski@users.noreply.github.com> Date: Tue, 3 Mar 2026 17:30:43 -0500 Subject: [PATCH 14/29] add optimize, execution modes redirects --- docs/conf.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/docs/conf.py b/docs/conf.py index 6f5a88a543..f588fc9396 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -61,7 +61,23 @@ redirects = { "airflow3_compatibility/index": "../policy/airflow3-compatibility.html", "compatibility-policy": "../policy/compatibility-policy.html", + "configuration/caching": "../optimize_performance/caching.html", + "configuration/memory_optimization": "../optimize_performance/memory_optimization.html", + "configuration/partial-parsing": "../optimize_performance/partial-parsing.html", + "configuration/selecting-excluding": "../optimize_performance/selecting-excluding.html", "contributing": "../policy/contributing.html", "contributors": "../policy/contributors.html", "contributors-roles": "../policy/contributors-roles.html", + "getting_started/async-execution-mode": "../guides/run_dbt/airflow-worker/async-execution-mode.html", + "getting_started/aws-container-run-job": "../guides/run_dbt/airflow-worker/async-execution-mode.html", + "getting_started/azure-container-instance": "../guides/run_dbt/container/azure-container-instance.html", + "getting_started/custom-airflow-properties": "../run_dbt/airflow-worker/custom-airflow-properties.html", + "getting_started/docker": "../guides/run_dbt/container/docker.html", + "getting_started/execution-modes-local-conflicts": "../guides/run_dbt/airflow-worker/execution-modes-local-conflicts.html", + "getting_started/execution-modes": "../guides/run_dbt/execution-modes.html", + "getting_started/gcp-cloud-run-job": "../guides/run_dbt/container/gcp-cloud-run-job.html", + "getting_started/kubernetes": "../guides/run_dbt/container/kubernetes.html", + "getting_started/operators": "../guides/run_dbt/operators/operators.html", + "getting_started/watcher-execution-mode": "../guides/run_dbt/airflow-worker/watcher-execution-mode.html", + "getting_started/watcher-kubernetes-execution-mode": "../guides/run_dbt/container/watcher-kubernetes-execution-mode.html", } From b387f222d24a4c0f7e2f5b03aa514af29be41ab1 Mon Sep 17 00:00:00 2001 From: Laura Zdanski <25642903+lzdanski@users.noreply.github.com> Date: Tue, 3 Mar 2026 19:57:20 -0500 Subject: [PATCH 15/29] Apply suggestions from code review Co-authored-by: Pankaj Singh <98807258+pankajastro@users.noreply.github.com> --- docs/getting_started/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/getting_started/index.rst b/docs/getting_started/index.rst index 6ed3154c98..5cdf2697ed 100644 --- a/docs/getting_started/index.rst +++ b/docs/getting_started/index.rst @@ -78,7 +78,7 @@ For more customization, check out the different execution modes that Cosmos supp For specific guides, see the following: -- `Executing dbt DAGs with Docker Operators `__ +- `Executing dbt DAGs with DockerOperators `__ - `Executing dbt DAGs with KubernetesPodOperators `__ - `Executing dbt DAGs with Watcher Kubernetes Mode `__ - `Executing dbt DAGs with AzureContainerInstancesOperators `__ From 1d61288af1c9011ef2de7d45c87865bcd5fe3644 Mon Sep 17 00:00:00 2001 From: L Zdanski <25642903+lzdanski@users.noreply.github.com> Date: Wed, 4 Mar 2026 10:18:00 -0500 Subject: [PATCH 16/29] Link and directory fixes --- docs/getting_started/index.rst | 18 ++++++++++++------ docs/index.rst | 3 ++- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/docs/getting_started/index.rst b/docs/getting_started/index.rst index 5cdf2697ed..6ff50cff91 100644 --- a/docs/getting_started/index.rst +++ b/docs/getting_started/index.rst @@ -5,9 +5,15 @@ :hidden: :caption: Cosmos Fundamentals - Astro CLI quickstart Similar dbt and Airflow concepts +.. toctree:: + :maxdepth: 1 + :hidden: + :caption: Quickstart + + Astro CLI quickstart + .. toctree:: :maxdepth: 1 :hidden: @@ -78,11 +84,11 @@ For more customization, check out the different execution modes that Cosmos supp For specific guides, see the following: -- `Executing dbt DAGs with DockerOperators `__ -- `Executing dbt DAGs with KubernetesPodOperators `__ -- `Executing dbt DAGs with Watcher Kubernetes Mode `__ -- `Executing dbt DAGs with AzureContainerInstancesOperators `__ -- `Executing dbt DAGs with GcpCloudRunExecuteJobOperators `__ +- `Executing dbt DAGs with DockerOperators <../../guides/run_dbt/container/docker.html>`__ +- `Executing dbt DAGs with KubernetesPodOperators <../../guides/run_dbt/container/kubernetes.html>`__ +- `Executing dbt DAGs with Watcher Kubernetes Mode <../../guides/run_dbt/container/watcher-kubernetes-execution-mode.html>`__ +- `Executing dbt DAGs with AzureContainerInstancesOperators <../../guides/run_dbt/container/azure-container-instance.html>`__ +- `Executing dbt DAGs with GcpCloudRunExecuteJobOperators <../../guides/run_dbt/container/gcp-cloud-run-job.html>`__ Concepts Overview diff --git a/docs/index.rst b/docs/index.rst index b841351ddd..8c24731043 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -9,10 +9,11 @@ Getting Started Guides Optimize Performance - Reference Profiles + Reference Project Policies + .. |fury| image:: https://badge.fury.io/py/astronomer-cosmos.svg :target: https://badge.fury.io/py/astronomer-cosmos From c1598ec616eaf35f3438f7d8de1ee47c348b9597 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 5 Mar 2026 17:09:54 +0000 Subject: [PATCH 17/29] =?UTF-8?q?=F0=9F=8E=A8=20[pre-commit.ci]=20Auto=20f?= =?UTF-8?q?ormat=20from=20pre-commit.com=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/getting_started/index.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/getting_started/index.rst b/docs/getting_started/index.rst index 6ff50cff91..f2f5239980 100644 --- a/docs/getting_started/index.rst +++ b/docs/getting_started/index.rst @@ -95,4 +95,3 @@ Concepts Overview ----------------- How do dbt and Airflow concepts map to each other? Learn more `in this link `__. - From ed0b9194aa6b23e98087f36322bb0b1138686854 Mon Sep 17 00:00:00 2001 From: L Zdanski <25642903+lzdanski@users.noreply.github.com> Date: Thu, 5 Mar 2026 14:08:50 -0500 Subject: [PATCH 18/29] update index page --- docs/getting_started/index.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/getting_started/index.rst b/docs/getting_started/index.rst index f2f5239980..036d20f984 100644 --- a/docs/getting_started/index.rst +++ b/docs/getting_started/index.rst @@ -21,8 +21,9 @@ Open-source Airflow Astro - Google Cloud Composer (GCC) Amazon Managed Workflows for Apache Airflow (MWAA) + Google Cloud Composer (GCC) + Getting Started =============== From d7b77e22d43f8207fe6edbb848f85e220f0e71b2 Mon Sep 17 00:00:00 2001 From: L Zdanski <25642903+lzdanski@users.noreply.github.com> Date: Thu, 5 Mar 2026 14:23:56 -0500 Subject: [PATCH 19/29] move pages update redirects --- docs/conf.py | 4 ++-- docs/guides/run_dbt/customization/index.rst | 4 +++- .../run_dbt/customization}/partial-parsing.rst | 0 .../translate_dbt_to_airflow}/selecting-excluding.rst | 0 docs/optimize_performance/index.rst | 4 ++-- 5 files changed, 7 insertions(+), 5 deletions(-) rename docs/{optimize_performance => guides/run_dbt/customization}/partial-parsing.rst (100%) rename docs/{optimize_performance => guides/translate_dbt_to_airflow}/selecting-excluding.rst (100%) diff --git a/docs/conf.py b/docs/conf.py index f588fc9396..2e68bdc085 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -63,8 +63,8 @@ "compatibility-policy": "../policy/compatibility-policy.html", "configuration/caching": "../optimize_performance/caching.html", "configuration/memory_optimization": "../optimize_performance/memory_optimization.html", - "configuration/partial-parsing": "../optimize_performance/partial-parsing.html", - "configuration/selecting-excluding": "../optimize_performance/selecting-excluding.html", + "configuration/partial-parsing": "../guides/run_dbt/customization/partial-parsing.html", + "configuration/selecting-excluding": "../guides/translate_dbt_to_airflow/selecting-excluding.html", "contributing": "../policy/contributing.html", "contributors": "../policy/contributors.html", "contributors-roles": "../policy/contributors-roles.html", diff --git a/docs/guides/run_dbt/customization/index.rst b/docs/guides/run_dbt/customization/index.rst index 44021154dc..1cc6e0852d 100644 --- a/docs/guides/run_dbt/customization/index.rst +++ b/docs/guides/run_dbt/customization/index.rst @@ -5,5 +5,7 @@ Additional Customization :maxdepth: 1 :caption: Additional Customization - operator-args scheduling + operator-args + partial-parsing + diff --git a/docs/optimize_performance/partial-parsing.rst b/docs/guides/run_dbt/customization/partial-parsing.rst similarity index 100% rename from docs/optimize_performance/partial-parsing.rst rename to docs/guides/run_dbt/customization/partial-parsing.rst diff --git a/docs/optimize_performance/selecting-excluding.rst b/docs/guides/translate_dbt_to_airflow/selecting-excluding.rst similarity index 100% rename from docs/optimize_performance/selecting-excluding.rst rename to docs/guides/translate_dbt_to_airflow/selecting-excluding.rst diff --git a/docs/optimize_performance/index.rst b/docs/optimize_performance/index.rst index 0ed84470d0..ecf7d665e0 100644 --- a/docs/optimize_performance/index.rst +++ b/docs/optimize_performance/index.rst @@ -1,7 +1,7 @@ .. _optimize-performance: -Optimize your Cosmos Performance -================================ +Optimize the performance of your Cosmos Dags +============================================ .. toctree:: :maxdepth: 1 From c8b6fad9b9758f78baf118140e26fd0ede6549bc Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 5 Mar 2026 19:24:18 +0000 Subject: [PATCH 20/29] =?UTF-8?q?=F0=9F=8E=A8=20[pre-commit.ci]=20Auto=20f?= =?UTF-8?q?ormat=20from=20pre-commit.com=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/guides/run_dbt/customization/index.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/guides/run_dbt/customization/index.rst b/docs/guides/run_dbt/customization/index.rst index 1cc6e0852d..ba28a7f2cc 100644 --- a/docs/guides/run_dbt/customization/index.rst +++ b/docs/guides/run_dbt/customization/index.rst @@ -8,4 +8,3 @@ Additional Customization scheduling operator-args partial-parsing - From 1f4e8d8fc758a6ed353b55746567f67f941e758e Mon Sep 17 00:00:00 2001 From: Laura Zdanski <25642903+lzdanski@users.noreply.github.com> Date: Thu, 5 Mar 2026 14:27:15 -0500 Subject: [PATCH 21/29] Apply suggestions from code review Co-authored-by: Tatiana Al-Chueyr --- docs/getting_started/index.rst | 3 ++- docs/guides/run_dbt/container/index.rst | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/docs/getting_started/index.rst b/docs/getting_started/index.rst index 036d20f984..4e0bb5838c 100644 --- a/docs/getting_started/index.rst +++ b/docs/getting_started/index.rst @@ -18,12 +18,13 @@ :maxdepth: 1 :hidden: :caption: Get started with Cosmos - Open-source Airflow Astro Amazon Managed Workflows for Apache Airflow (MWAA) Google Cloud Composer (GCC) + Google Cloud Composer (GCC) + Getting Started =============== diff --git a/docs/guides/run_dbt/container/index.rst b/docs/guides/run_dbt/container/index.rst index 9cccdbb29a..8e1051dc35 100644 --- a/docs/guides/run_dbt/container/index.rst +++ b/docs/guides/run_dbt/container/index.rst @@ -5,9 +5,9 @@ Run dbt in a container :maxdepth: 1 :caption: Run dbt in a container - aws-container-run-job - azure-container-instance docker - gcp-cloud-run-job kubernetes watcher-kubernetes-execution-mode + aws-container-run-job + azure-container-instance + gcp-cloud-run-job From e830d80603f48820ddf4d8d60a4a566446908586 Mon Sep 17 00:00:00 2001 From: L Zdanski <25642903+lzdanski@users.noreply.github.com> Date: Thu, 5 Mar 2026 19:02:30 -0500 Subject: [PATCH 22/29] Add guides index feedback --- docs/guides/index.rst | 2 +- docs/guides/translate_dbt_to_airflow/index.rst | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/guides/index.rst b/docs/guides/index.rst index 9b7e233814..1eb5adb580 100644 --- a/docs/guides/index.rst +++ b/docs/guides/index.rst @@ -41,7 +41,7 @@ Cosmos offers a number of configuration options to customize its behavior. For m .. toctree:: :maxdepth: 1 :hidden: - :caption: Documentation + :caption: dbt Documentation dbt_docs/generating-docs dbt_docs/hosting-docs diff --git a/docs/guides/translate_dbt_to_airflow/index.rst b/docs/guides/translate_dbt_to_airflow/index.rst index 5ff278003e..dbdd29ec74 100644 --- a/docs/guides/translate_dbt_to_airflow/index.rst +++ b/docs/guides/translate_dbt_to_airflow/index.rst @@ -9,6 +9,7 @@ Translate dbt code into Airflow parsing-methods custom-airflow-properties + selecting-excluding .. toctree:: From 2f20416d4d18d632d76387779f28af53d0c3bd89 Mon Sep 17 00:00:00 2001 From: L Zdanski <25642903+lzdanski@users.noreply.github.com> Date: Thu, 5 Mar 2026 19:29:42 -0500 Subject: [PATCH 23/29] update translate dbt to airflow --- docs/getting_started/index.rst | 3 +-- docs/guides/index.rst | 4 ++- .../guides/translate_dbt_to_airflow/index.rst | 27 ------------------- .../custom-airflow-properties.rst | 0 .../map_dbt_to_dags/index.rst | 12 +++++++++ .../{ => map_dbt_to_dags}/parsing-methods.rst | 0 .../selecting-excluding.rst | 0 .../testing-behavior.rst | 0 .../dag-customization.rst | 0 .../translate_nodes/index.rst | 12 +++++++++ .../managing-sources.rst | 0 .../{ => translate_nodes}/render-config.rst | 0 docs/optimize_performance/index.rst | 2 -- 13 files changed, 28 insertions(+), 32 deletions(-) delete mode 100644 docs/guides/translate_dbt_to_airflow/index.rst rename docs/guides/translate_dbt_to_airflow/{ => map_dbt_to_dags}/custom-airflow-properties.rst (100%) create mode 100644 docs/guides/translate_dbt_to_airflow/map_dbt_to_dags/index.rst rename docs/guides/translate_dbt_to_airflow/{ => map_dbt_to_dags}/parsing-methods.rst (100%) rename docs/guides/translate_dbt_to_airflow/{ => map_dbt_to_dags}/selecting-excluding.rst (100%) rename docs/guides/translate_dbt_to_airflow/{configure-tests => }/testing-behavior.rst (100%) rename docs/guides/translate_dbt_to_airflow/{ => translate_nodes}/dag-customization.rst (100%) create mode 100644 docs/guides/translate_dbt_to_airflow/translate_nodes/index.rst rename docs/guides/translate_dbt_to_airflow/{ => translate_nodes}/managing-sources.rst (100%) rename docs/guides/translate_dbt_to_airflow/{ => translate_nodes}/render-config.rst (100%) diff --git a/docs/getting_started/index.rst b/docs/getting_started/index.rst index 4e0bb5838c..036d20f984 100644 --- a/docs/getting_started/index.rst +++ b/docs/getting_started/index.rst @@ -18,13 +18,12 @@ :maxdepth: 1 :hidden: :caption: Get started with Cosmos + Open-source Airflow Astro Amazon Managed Workflows for Apache Airflow (MWAA) Google Cloud Composer (GCC) - Google Cloud Composer (GCC) - Getting Started =============== diff --git a/docs/guides/index.rst b/docs/guides/index.rst index 1eb5adb580..fe577f18f1 100644 --- a/docs/guides/index.rst +++ b/docs/guides/index.rst @@ -17,7 +17,9 @@ Cosmos offers a number of configuration options to customize its behavior. For m :hidden: :caption: Translating dbt into Airflow - translate_dbt_to_airflow/index + translate_dbt_to_airflow/map_dbt_to_dags/index + translate_dbt_to_airflow/testing-behavior + translate_dbt_to_airflow/translate_nodes/index .. toctree:: :maxdepth: 3 diff --git a/docs/guides/translate_dbt_to_airflow/index.rst b/docs/guides/translate_dbt_to_airflow/index.rst deleted file mode 100644 index dbdd29ec74..0000000000 --- a/docs/guides/translate_dbt_to_airflow/index.rst +++ /dev/null @@ -1,27 +0,0 @@ -.. _translate-dbt-to-airflow: - -Translate dbt code into Airflow -=============================== - -.. toctree:: - :maxdepth: 1 - :caption: Mapping dbt into dags - - parsing-methods - custom-airflow-properties - selecting-excluding - - -.. toctree:: - :maxdepth: 1 - :caption: Configure tests - - configure-tests/testing-behavior - -.. toctree:: - :maxdepth: 1 - :caption: Translate nodes - - managing-sources - render-config - dag-customization diff --git a/docs/guides/translate_dbt_to_airflow/custom-airflow-properties.rst b/docs/guides/translate_dbt_to_airflow/map_dbt_to_dags/custom-airflow-properties.rst similarity index 100% rename from docs/guides/translate_dbt_to_airflow/custom-airflow-properties.rst rename to docs/guides/translate_dbt_to_airflow/map_dbt_to_dags/custom-airflow-properties.rst diff --git a/docs/guides/translate_dbt_to_airflow/map_dbt_to_dags/index.rst b/docs/guides/translate_dbt_to_airflow/map_dbt_to_dags/index.rst new file mode 100644 index 0000000000..70848bcf4f --- /dev/null +++ b/docs/guides/translate_dbt_to_airflow/map_dbt_to_dags/index.rst @@ -0,0 +1,12 @@ +.. _map-dbt-to-dags: + +Map dbt into Dags +================== + +.. toctree:: + :maxdepth: 1 + :caption: Map dbt into dags + + parsing-methods + custom-airflow-properties + selecting-excluding \ No newline at end of file diff --git a/docs/guides/translate_dbt_to_airflow/parsing-methods.rst b/docs/guides/translate_dbt_to_airflow/map_dbt_to_dags/parsing-methods.rst similarity index 100% rename from docs/guides/translate_dbt_to_airflow/parsing-methods.rst rename to docs/guides/translate_dbt_to_airflow/map_dbt_to_dags/parsing-methods.rst diff --git a/docs/guides/translate_dbt_to_airflow/selecting-excluding.rst b/docs/guides/translate_dbt_to_airflow/map_dbt_to_dags/selecting-excluding.rst similarity index 100% rename from docs/guides/translate_dbt_to_airflow/selecting-excluding.rst rename to docs/guides/translate_dbt_to_airflow/map_dbt_to_dags/selecting-excluding.rst diff --git a/docs/guides/translate_dbt_to_airflow/configure-tests/testing-behavior.rst b/docs/guides/translate_dbt_to_airflow/testing-behavior.rst similarity index 100% rename from docs/guides/translate_dbt_to_airflow/configure-tests/testing-behavior.rst rename to docs/guides/translate_dbt_to_airflow/testing-behavior.rst diff --git a/docs/guides/translate_dbt_to_airflow/dag-customization.rst b/docs/guides/translate_dbt_to_airflow/translate_nodes/dag-customization.rst similarity index 100% rename from docs/guides/translate_dbt_to_airflow/dag-customization.rst rename to docs/guides/translate_dbt_to_airflow/translate_nodes/dag-customization.rst diff --git a/docs/guides/translate_dbt_to_airflow/translate_nodes/index.rst b/docs/guides/translate_dbt_to_airflow/translate_nodes/index.rst new file mode 100644 index 0000000000..0fa92713ca --- /dev/null +++ b/docs/guides/translate_dbt_to_airflow/translate_nodes/index.rst @@ -0,0 +1,12 @@ +.. _translate-nodes: + +Translate dbt nodes to Dags +=========================== + +.. toctree:: + :maxdepth: 1 + :caption: Translate nodes + + managing-sources + render-config + dag-customization \ No newline at end of file diff --git a/docs/guides/translate_dbt_to_airflow/managing-sources.rst b/docs/guides/translate_dbt_to_airflow/translate_nodes/managing-sources.rst similarity index 100% rename from docs/guides/translate_dbt_to_airflow/managing-sources.rst rename to docs/guides/translate_dbt_to_airflow/translate_nodes/managing-sources.rst diff --git a/docs/guides/translate_dbt_to_airflow/render-config.rst b/docs/guides/translate_dbt_to_airflow/translate_nodes/render-config.rst similarity index 100% rename from docs/guides/translate_dbt_to_airflow/render-config.rst rename to docs/guides/translate_dbt_to_airflow/translate_nodes/render-config.rst diff --git a/docs/optimize_performance/index.rst b/docs/optimize_performance/index.rst index ecf7d665e0..6ce9e2da20 100644 --- a/docs/optimize_performance/index.rst +++ b/docs/optimize_performance/index.rst @@ -7,7 +7,5 @@ Optimize the performance of your Cosmos Dags :maxdepth: 1 :caption: Optimize Performance - partial-parsing memory_optimization - selecting-excluding caching From 8bbf203c127b23008a4a720d921bcbb5efdfec0e Mon Sep 17 00:00:00 2001 From: L Zdanski <25642903+lzdanski@users.noreply.github.com> Date: Thu, 5 Mar 2026 19:34:38 -0500 Subject: [PATCH 24/29] update sidebar structure --- docs/conf.py | 9 +++++++-- docs/guides/cosmos_devex/index.rst | 14 -------------- docs/guides/index.rst | 5 ++++- 3 files changed, 11 insertions(+), 17 deletions(-) delete mode 100644 docs/guides/cosmos_devex/index.rst diff --git a/docs/conf.py b/docs/conf.py index 2e68bdc085..332db955b0 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -62,16 +62,21 @@ "airflow3_compatibility/index": "../policy/airflow3-compatibility.html", "compatibility-policy": "../policy/compatibility-policy.html", "configuration/caching": "../optimize_performance/caching.html", + "configuration/dag-customization": "../guides/translate_dbt_to_airflow/translate_nodes/dag-customization.html", "configuration/memory_optimization": "../optimize_performance/memory_optimization.html", + "configuration/parsing-methods": "../guides/translate_dbt_to_airflow/map_dbt_to_dags/parsing-methods.html", "configuration/partial-parsing": "../guides/run_dbt/customization/partial-parsing.html", - "configuration/selecting-excluding": "../guides/translate_dbt_to_airflow/selecting-excluding.html", + "configuration/render-config": "../guides/translate_dbt_to_airflow/translate_nodes/render-config.html", + "configuration/selecting-excluding": "../guides/translate_dbt_to_airflow/map_dbt_to_dags/selecting-excluding.html", + "configuration/source-nodes-rendering": "../guides/translate_dbt_to_airflow/translate_nodes/managing-sources.html", + "configuration/testing-behavior": "../guides/translate_dbt_to_airflow/testing-behavior.html", "contributing": "../policy/contributing.html", "contributors": "../policy/contributors.html", "contributors-roles": "../policy/contributors-roles.html", "getting_started/async-execution-mode": "../guides/run_dbt/airflow-worker/async-execution-mode.html", "getting_started/aws-container-run-job": "../guides/run_dbt/airflow-worker/async-execution-mode.html", "getting_started/azure-container-instance": "../guides/run_dbt/container/azure-container-instance.html", - "getting_started/custom-airflow-properties": "../run_dbt/airflow-worker/custom-airflow-properties.html", + "getting_started/custom-airflow-properties": "../guides/translate_dbt_to_airflow/custom-airflow-properties.html", "getting_started/docker": "../guides/run_dbt/container/docker.html", "getting_started/execution-modes-local-conflicts": "../guides/run_dbt/airflow-worker/execution-modes-local-conflicts.html", "getting_started/execution-modes": "../guides/run_dbt/execution-modes.html", diff --git a/docs/guides/cosmos_devex/index.rst b/docs/guides/cosmos_devex/index.rst deleted file mode 100644 index 2ad3dff71b..0000000000 --- a/docs/guides/cosmos_devex/index.rst +++ /dev/null @@ -1,14 +0,0 @@ -.. _cosmos_devex: - - -Cosmos DevEx -============ - -.. toctree:: - :maxdepth: 1 - :caption: Cosmos DevEx - - lineage - compiled-sql - logging - task-display-name diff --git a/docs/guides/index.rst b/docs/guides/index.rst index fe577f18f1..edbf4161c2 100644 --- a/docs/guides/index.rst +++ b/docs/guides/index.rst @@ -53,4 +53,7 @@ Cosmos offers a number of configuration options to customize its behavior. For m :hidden: :caption: Cosmos DevEx - cosmos_devex/index + cosmos_devex/lineage + cosmos_devex/compiled-sql + cosmos_devex/logging + cosmos_devex/task-display-name \ No newline at end of file From a8ca80db8428317bede0f972b85db6088a758a0a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 6 Mar 2026 00:37:02 +0000 Subject: [PATCH 25/29] =?UTF-8?q?=F0=9F=8E=A8=20[pre-commit.ci]=20Auto=20f?= =?UTF-8?q?ormat=20from=20pre-commit.com=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/guides/index.rst | 2 +- docs/guides/translate_dbt_to_airflow/map_dbt_to_dags/index.rst | 2 +- docs/guides/translate_dbt_to_airflow/translate_nodes/index.rst | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/guides/index.rst b/docs/guides/index.rst index edbf4161c2..a920b4fe2c 100644 --- a/docs/guides/index.rst +++ b/docs/guides/index.rst @@ -56,4 +56,4 @@ Cosmos offers a number of configuration options to customize its behavior. For m cosmos_devex/lineage cosmos_devex/compiled-sql cosmos_devex/logging - cosmos_devex/task-display-name \ No newline at end of file + cosmos_devex/task-display-name diff --git a/docs/guides/translate_dbt_to_airflow/map_dbt_to_dags/index.rst b/docs/guides/translate_dbt_to_airflow/map_dbt_to_dags/index.rst index 70848bcf4f..5a8dabff3e 100644 --- a/docs/guides/translate_dbt_to_airflow/map_dbt_to_dags/index.rst +++ b/docs/guides/translate_dbt_to_airflow/map_dbt_to_dags/index.rst @@ -9,4 +9,4 @@ Map dbt into Dags parsing-methods custom-airflow-properties - selecting-excluding \ No newline at end of file + selecting-excluding diff --git a/docs/guides/translate_dbt_to_airflow/translate_nodes/index.rst b/docs/guides/translate_dbt_to_airflow/translate_nodes/index.rst index 0fa92713ca..14ff3ae818 100644 --- a/docs/guides/translate_dbt_to_airflow/translate_nodes/index.rst +++ b/docs/guides/translate_dbt_to_airflow/translate_nodes/index.rst @@ -9,4 +9,4 @@ Translate dbt nodes to Dags managing-sources render-config - dag-customization \ No newline at end of file + dag-customization From aae514fb100a29742d25090010a6e6df3db349bb Mon Sep 17 00:00:00 2001 From: L Zdanski <25642903+lzdanski@users.noreply.github.com> Date: Thu, 5 Mar 2026 19:44:09 -0500 Subject: [PATCH 26/29] update rel-links --- .../map_dbt_to_dags/custom-airflow-properties.rst | 2 +- .../map_dbt_to_dags/index.rst | 2 +- .../map_dbt_to_dags/parsing-methods.rst | 8 ++++---- .../translate_dbt_to_airflow/testing-behavior.rst | 12 ++++++------ .../translate_nodes/index.rst | 2 +- .../translate_nodes/managing-sources.rst | 2 +- .../translate_nodes/render-config.rst | 2 +- 7 files changed, 15 insertions(+), 15 deletions(-) diff --git a/docs/guides/translate_dbt_to_airflow/map_dbt_to_dags/custom-airflow-properties.rst b/docs/guides/translate_dbt_to_airflow/map_dbt_to_dags/custom-airflow-properties.rst index fd72bba686..3f82bf825d 100644 --- a/docs/guides/translate_dbt_to_airflow/map_dbt_to_dags/custom-airflow-properties.rst +++ b/docs/guides/translate_dbt_to_airflow/map_dbt_to_dags/custom-airflow-properties.rst @@ -29,5 +29,5 @@ By adding Airflow configurations under **cosmos** in the **meta** field, you can For example, in the YAML above, the **pool** setting is applied to the specific dbt task. This approach allows for more granular control over Airflow settings per task within your dbt model definitions. -.. image:: ../../_static/custom_airflow_pool.png +.. image:: ../../../_static/custom_airflow_pool.png :alt: Result of applying Custom Airflow Pool diff --git a/docs/guides/translate_dbt_to_airflow/map_dbt_to_dags/index.rst b/docs/guides/translate_dbt_to_airflow/map_dbt_to_dags/index.rst index 5a8dabff3e..70848bcf4f 100644 --- a/docs/guides/translate_dbt_to_airflow/map_dbt_to_dags/index.rst +++ b/docs/guides/translate_dbt_to_airflow/map_dbt_to_dags/index.rst @@ -9,4 +9,4 @@ Map dbt into Dags parsing-methods custom-airflow-properties - selecting-excluding + selecting-excluding \ No newline at end of file diff --git a/docs/guides/translate_dbt_to_airflow/map_dbt_to_dags/parsing-methods.rst b/docs/guides/translate_dbt_to_airflow/map_dbt_to_dags/parsing-methods.rst index 567fc4c137..dfbc3f0da4 100644 --- a/docs/guides/translate_dbt_to_airflow/map_dbt_to_dags/parsing-methods.rst +++ b/docs/guides/translate_dbt_to_airflow/map_dbt_to_dags/parsing-methods.rst @@ -56,7 +56,7 @@ Examples of how to supply ``manifest.json`` using ``manifest_path`` argument: - Local path: -.. literalinclude:: ../../../dev/dags/cosmos_manifest_example.py +.. literalinclude:: ../../../../dev/dags/cosmos_manifest_example.py :language: python :start-after: [START local_example] :end-before: [END local_example] @@ -66,7 +66,7 @@ Examples of how to supply ``manifest.json`` using ``manifest_path`` argument: Ensure that you have the required dependencies installed to use the S3 URL. You can install the required dependencies using the following command: ``pip install "astronomer-cosmos[amazon]"`` -.. literalinclude:: ../../../dev/dags/cosmos_manifest_example.py +.. literalinclude:: ../../../../dev/dags/cosmos_manifest_example.py :language: python :start-after: [START aws_s3_example] :end-before: [END aws_s3_example] @@ -76,7 +76,7 @@ using the following command: ``pip install "astronomer-cosmos[amazon]"`` Ensure that you have the required dependencies installed to use the GCS URL. You can install the required dependencies using the following command: ``pip install "astronomer-cosmos[google]"`` -.. literalinclude:: ../../../dev/dags/cosmos_manifest_example.py +.. literalinclude:: ../../../../dev/dags/cosmos_manifest_example.py :language: python :start-after: [START gcp_gs_example] :end-before: [END gcp_gs_example] @@ -86,7 +86,7 @@ using the following command: ``pip install "astronomer-cosmos[google]"`` Ensure that you have the required dependencies installed to use the Azure blob URL. You can install the required dependencies using the following command: ``pip install "astronomer-cosmos[microsoft]"`` -.. literalinclude:: ../../../dev/dags/cosmos_manifest_example.py +.. literalinclude:: ../../../../dev/dags/cosmos_manifest_example.py :language: python :start-after: [START azure_abfs_example] :end-before: [END azure_abfs_example] diff --git a/docs/guides/translate_dbt_to_airflow/testing-behavior.rst b/docs/guides/translate_dbt_to_airflow/testing-behavior.rst index aa4535b514..68ebdb69d5 100644 --- a/docs/guides/translate_dbt_to_airflow/testing-behavior.rst +++ b/docs/guides/translate_dbt_to_airflow/testing-behavior.rst @@ -22,7 +22,7 @@ Cosmos supports the following test behaviors: Example of the standard behavior of ``TestBehavior.AFTER_EACH``, when using the example DAG available in ``dev/dags/basic_cosmos_dag.py``: -.. image:: ../../../_static/test_behavior_after_each.png +.. image:: ../../_static/test_behavior_after_each.png Example when changing the behavior to use ``TestBehavior.AFTER_ALL``: @@ -37,17 +37,17 @@ Example when changing the behavior to use ``TestBehavior.AFTER_ALL``: ) ) -.. image:: ../../../_static/test_behavior_after_all.png +.. image:: ../../_static/test_behavior_after_all.png Finally, an example DAG and how it is rendered in the Airflow UI when using ``TestBehavior.BUILD`` (available since Cosmos 1.8): -.. literalinclude:: ../../../../dev/dags/example_cosmos_dbt_build.py +.. literalinclude:: ../../../dev/dags/example_cosmos_dbt_build.py :language: python :start-after: [START build_example] :end-before: [END build_example] -.. image:: ../../../_static/test_behavior_build.png +.. image:: ../../_static/test_behavior_build.png Warning Behavior ---------------- @@ -138,7 +138,7 @@ It renders a dbt project named `multiple_parents_test Date: Fri, 6 Mar 2026 00:44:32 +0000 Subject: [PATCH 27/29] =?UTF-8?q?=F0=9F=8E=A8=20[pre-commit.ci]=20Auto=20f?= =?UTF-8?q?ormat=20from=20pre-commit.com=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/guides/translate_dbt_to_airflow/map_dbt_to_dags/index.rst | 2 +- docs/guides/translate_dbt_to_airflow/translate_nodes/index.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/guides/translate_dbt_to_airflow/map_dbt_to_dags/index.rst b/docs/guides/translate_dbt_to_airflow/map_dbt_to_dags/index.rst index 70848bcf4f..5a8dabff3e 100644 --- a/docs/guides/translate_dbt_to_airflow/map_dbt_to_dags/index.rst +++ b/docs/guides/translate_dbt_to_airflow/map_dbt_to_dags/index.rst @@ -9,4 +9,4 @@ Map dbt into Dags parsing-methods custom-airflow-properties - selecting-excluding \ No newline at end of file + selecting-excluding diff --git a/docs/guides/translate_dbt_to_airflow/translate_nodes/index.rst b/docs/guides/translate_dbt_to_airflow/translate_nodes/index.rst index 0fa92713ca..14ff3ae818 100644 --- a/docs/guides/translate_dbt_to_airflow/translate_nodes/index.rst +++ b/docs/guides/translate_dbt_to_airflow/translate_nodes/index.rst @@ -9,4 +9,4 @@ Translate dbt nodes to Dags managing-sources render-config - dag-customization \ No newline at end of file + dag-customization From c81ad22c36d87e431fb2265c2fba201721c57c18 Mon Sep 17 00:00:00 2001 From: L Zdanski <25642903+lzdanski@users.noreply.github.com> Date: Fri, 6 Mar 2026 10:05:58 -0500 Subject: [PATCH 28/29] multi-project nav fix --- docs/guides/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/guides/index.rst b/docs/guides/index.rst index a920b4fe2c..8e77058a23 100644 --- a/docs/guides/index.rst +++ b/docs/guides/index.rst @@ -38,7 +38,7 @@ Cosmos offers a number of configuration options to customize its behavior. For m :hidden: :caption: Multi-project Setups - multi_project/multi-project + Handle cross-project references .. toctree:: :maxdepth: 1 From eaa701a5f82f3afdc791862e86f143fce15fda05 Mon Sep 17 00:00:00 2001 From: L Zdanski <25642903+lzdanski@users.noreply.github.com> Date: Fri, 6 Mar 2026 10:18:46 -0500 Subject: [PATCH 29/29] Refactor translate dbt to airflow --- docs/conf.py | 12 ++++++------ docs/guides/index.rst | 9 ++++++--- .../customization}/custom-airflow-properties.rst | 0 docs/guides/run_dbt/customization/index.rst | 1 + .../{translate_nodes => }/dag-customization.rst | 0 .../{translate_nodes => }/managing-sources.rst | 2 +- .../map_dbt_to_dags/index.rst | 12 ------------ .../{map_dbt_to_dags => }/parsing-methods.rst | 8 ++++---- .../{translate_nodes => }/render-config.rst | 2 +- .../{map_dbt_to_dags => }/selecting-excluding.rst | 0 .../translate_nodes/index.rst | 12 ------------ 11 files changed, 19 insertions(+), 39 deletions(-) rename docs/guides/{translate_dbt_to_airflow/map_dbt_to_dags => run_dbt/customization}/custom-airflow-properties.rst (100%) rename docs/guides/translate_dbt_to_airflow/{translate_nodes => }/dag-customization.rst (100%) rename docs/guides/translate_dbt_to_airflow/{translate_nodes => }/managing-sources.rst (97%) delete mode 100644 docs/guides/translate_dbt_to_airflow/map_dbt_to_dags/index.rst rename docs/guides/translate_dbt_to_airflow/{map_dbt_to_dags => }/parsing-methods.rst (96%) rename docs/guides/translate_dbt_to_airflow/{translate_nodes => }/render-config.rst (99%) rename docs/guides/translate_dbt_to_airflow/{map_dbt_to_dags => }/selecting-excluding.rst (100%) delete mode 100644 docs/guides/translate_dbt_to_airflow/translate_nodes/index.rst diff --git a/docs/conf.py b/docs/conf.py index 332db955b0..05eef04190 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -62,13 +62,13 @@ "airflow3_compatibility/index": "../policy/airflow3-compatibility.html", "compatibility-policy": "../policy/compatibility-policy.html", "configuration/caching": "../optimize_performance/caching.html", - "configuration/dag-customization": "../guides/translate_dbt_to_airflow/translate_nodes/dag-customization.html", + "configuration/dag-customization": "../guides/translate_dbt_to_airflow/dag-customization.html", "configuration/memory_optimization": "../optimize_performance/memory_optimization.html", - "configuration/parsing-methods": "../guides/translate_dbt_to_airflow/map_dbt_to_dags/parsing-methods.html", + "configuration/parsing-methods": "../guides/translate_dbt_to_airflow/parsing-methods.html", "configuration/partial-parsing": "../guides/run_dbt/customization/partial-parsing.html", - "configuration/render-config": "../guides/translate_dbt_to_airflow/translate_nodes/render-config.html", - "configuration/selecting-excluding": "../guides/translate_dbt_to_airflow/map_dbt_to_dags/selecting-excluding.html", - "configuration/source-nodes-rendering": "../guides/translate_dbt_to_airflow/translate_nodes/managing-sources.html", + "configuration/render-config": "../guides/translate_dbt_to_airflow/render-config.html", + "configuration/selecting-excluding": "../guides/translate_dbt_to_airflow/selecting-excluding.html", + "configuration/source-nodes-rendering": "../guides/translate_dbt_to_airflow/managing-sources.html", "configuration/testing-behavior": "../guides/translate_dbt_to_airflow/testing-behavior.html", "contributing": "../policy/contributing.html", "contributors": "../policy/contributors.html", @@ -76,7 +76,7 @@ "getting_started/async-execution-mode": "../guides/run_dbt/airflow-worker/async-execution-mode.html", "getting_started/aws-container-run-job": "../guides/run_dbt/airflow-worker/async-execution-mode.html", "getting_started/azure-container-instance": "../guides/run_dbt/container/azure-container-instance.html", - "getting_started/custom-airflow-properties": "../guides/translate_dbt_to_airflow/custom-airflow-properties.html", + "getting_started/custom-airflow-properties": "../guides/run_dbt/customization/custom-airflow-properties.html", "getting_started/docker": "../guides/run_dbt/container/docker.html", "getting_started/execution-modes-local-conflicts": "../guides/run_dbt/airflow-worker/execution-modes-local-conflicts.html", "getting_started/execution-modes": "../guides/run_dbt/execution-modes.html", diff --git a/docs/guides/index.rst b/docs/guides/index.rst index 8e77058a23..353d37bd97 100644 --- a/docs/guides/index.rst +++ b/docs/guides/index.rst @@ -17,9 +17,12 @@ Cosmos offers a number of configuration options to customize its behavior. For m :hidden: :caption: Translating dbt into Airflow - translate_dbt_to_airflow/map_dbt_to_dags/index - translate_dbt_to_airflow/testing-behavior - translate_dbt_to_airflow/translate_nodes/index + translate_dbt_to_airflow/parsing-methods + Selecting what to run + Configure tests + translate_dbt_to_airflow/managing-sources + translate_dbt_to_airflow/render-config + Customize node conversion .. toctree:: :maxdepth: 3 diff --git a/docs/guides/translate_dbt_to_airflow/map_dbt_to_dags/custom-airflow-properties.rst b/docs/guides/run_dbt/customization/custom-airflow-properties.rst similarity index 100% rename from docs/guides/translate_dbt_to_airflow/map_dbt_to_dags/custom-airflow-properties.rst rename to docs/guides/run_dbt/customization/custom-airflow-properties.rst diff --git a/docs/guides/run_dbt/customization/index.rst b/docs/guides/run_dbt/customization/index.rst index ba28a7f2cc..47c23ebe2a 100644 --- a/docs/guides/run_dbt/customization/index.rst +++ b/docs/guides/run_dbt/customization/index.rst @@ -8,3 +8,4 @@ Additional Customization scheduling operator-args partial-parsing + custom-airflow-properties diff --git a/docs/guides/translate_dbt_to_airflow/translate_nodes/dag-customization.rst b/docs/guides/translate_dbt_to_airflow/dag-customization.rst similarity index 100% rename from docs/guides/translate_dbt_to_airflow/translate_nodes/dag-customization.rst rename to docs/guides/translate_dbt_to_airflow/dag-customization.rst diff --git a/docs/guides/translate_dbt_to_airflow/translate_nodes/managing-sources.rst b/docs/guides/translate_dbt_to_airflow/managing-sources.rst similarity index 97% rename from docs/guides/translate_dbt_to_airflow/translate_nodes/managing-sources.rst rename to docs/guides/translate_dbt_to_airflow/managing-sources.rst index 677f34d57e..a119c7be60 100644 --- a/docs/guides/translate_dbt_to_airflow/translate_nodes/managing-sources.rst +++ b/docs/guides/translate_dbt_to_airflow/managing-sources.rst @@ -70,7 +70,7 @@ The ``on_warning_callback`` is a callback parameter available on the ``DbtSource Example: -.. literalinclude:: ../../../../dev/dags/example_source_rendering.py/ +.. literalinclude:: ../../../dev/dags/example_source_rendering.py :language: python :start-after: [START cosmos_source_node_example] :end-before: [END cosmos_source_node_example] diff --git a/docs/guides/translate_dbt_to_airflow/map_dbt_to_dags/index.rst b/docs/guides/translate_dbt_to_airflow/map_dbt_to_dags/index.rst deleted file mode 100644 index 5a8dabff3e..0000000000 --- a/docs/guides/translate_dbt_to_airflow/map_dbt_to_dags/index.rst +++ /dev/null @@ -1,12 +0,0 @@ -.. _map-dbt-to-dags: - -Map dbt into Dags -================== - -.. toctree:: - :maxdepth: 1 - :caption: Map dbt into dags - - parsing-methods - custom-airflow-properties - selecting-excluding diff --git a/docs/guides/translate_dbt_to_airflow/map_dbt_to_dags/parsing-methods.rst b/docs/guides/translate_dbt_to_airflow/parsing-methods.rst similarity index 96% rename from docs/guides/translate_dbt_to_airflow/map_dbt_to_dags/parsing-methods.rst rename to docs/guides/translate_dbt_to_airflow/parsing-methods.rst index dfbc3f0da4..567fc4c137 100644 --- a/docs/guides/translate_dbt_to_airflow/map_dbt_to_dags/parsing-methods.rst +++ b/docs/guides/translate_dbt_to_airflow/parsing-methods.rst @@ -56,7 +56,7 @@ Examples of how to supply ``manifest.json`` using ``manifest_path`` argument: - Local path: -.. literalinclude:: ../../../../dev/dags/cosmos_manifest_example.py +.. literalinclude:: ../../../dev/dags/cosmos_manifest_example.py :language: python :start-after: [START local_example] :end-before: [END local_example] @@ -66,7 +66,7 @@ Examples of how to supply ``manifest.json`` using ``manifest_path`` argument: Ensure that you have the required dependencies installed to use the S3 URL. You can install the required dependencies using the following command: ``pip install "astronomer-cosmos[amazon]"`` -.. literalinclude:: ../../../../dev/dags/cosmos_manifest_example.py +.. literalinclude:: ../../../dev/dags/cosmos_manifest_example.py :language: python :start-after: [START aws_s3_example] :end-before: [END aws_s3_example] @@ -76,7 +76,7 @@ using the following command: ``pip install "astronomer-cosmos[amazon]"`` Ensure that you have the required dependencies installed to use the GCS URL. You can install the required dependencies using the following command: ``pip install "astronomer-cosmos[google]"`` -.. literalinclude:: ../../../../dev/dags/cosmos_manifest_example.py +.. literalinclude:: ../../../dev/dags/cosmos_manifest_example.py :language: python :start-after: [START gcp_gs_example] :end-before: [END gcp_gs_example] @@ -86,7 +86,7 @@ using the following command: ``pip install "astronomer-cosmos[google]"`` Ensure that you have the required dependencies installed to use the Azure blob URL. You can install the required dependencies using the following command: ``pip install "astronomer-cosmos[microsoft]"`` -.. literalinclude:: ../../../../dev/dags/cosmos_manifest_example.py +.. literalinclude:: ../../../dev/dags/cosmos_manifest_example.py :language: python :start-after: [START azure_abfs_example] :end-before: [END azure_abfs_example] diff --git a/docs/guides/translate_dbt_to_airflow/translate_nodes/render-config.rst b/docs/guides/translate_dbt_to_airflow/render-config.rst similarity index 99% rename from docs/guides/translate_dbt_to_airflow/translate_nodes/render-config.rst rename to docs/guides/translate_dbt_to_airflow/render-config.rst index 40696e0474..425e106124 100644 --- a/docs/guides/translate_dbt_to_airflow/translate_nodes/render-config.rst +++ b/docs/guides/translate_dbt_to_airflow/render-config.rst @@ -63,7 +63,7 @@ Your pipeline may even have specific node types not part of the standard dbt def The following example illustrates how it is possible to tell Cosmos how to convert two different types of nodes (``source`` and ``exposure``) into Airflow: -.. literalinclude:: ../../../../dev/dags/example_cosmos_sources.py +.. literalinclude:: ../../../dev/dags/example_cosmos_sources.py :language: python :start-after: [START custom_dbt_nodes] :end-before: [END custom_dbt_nodes] diff --git a/docs/guides/translate_dbt_to_airflow/map_dbt_to_dags/selecting-excluding.rst b/docs/guides/translate_dbt_to_airflow/selecting-excluding.rst similarity index 100% rename from docs/guides/translate_dbt_to_airflow/map_dbt_to_dags/selecting-excluding.rst rename to docs/guides/translate_dbt_to_airflow/selecting-excluding.rst diff --git a/docs/guides/translate_dbt_to_airflow/translate_nodes/index.rst b/docs/guides/translate_dbt_to_airflow/translate_nodes/index.rst deleted file mode 100644 index 14ff3ae818..0000000000 --- a/docs/guides/translate_dbt_to_airflow/translate_nodes/index.rst +++ /dev/null @@ -1,12 +0,0 @@ -.. _translate-nodes: - -Translate dbt nodes to Dags -=========================== - -.. toctree:: - :maxdepth: 1 - :caption: Translate nodes - - managing-sources - render-config - dag-customization