diff --git a/docs/conf.py b/docs/conf.py index 1f20af6190..4c51b3e93e 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -53,3 +53,56 @@ } generate_mapping_docs() + +# -- Begin docs redirect section +# -- To test redirects in a local build, paste the redirect source, and append .html to the end. +# -- For example, "airflow3_compatibility/index" redirect must be tested using "airflow3_compatibility/index.html" +# -- https://documatt.com/sphinx-reredirects/usage/ +redirects = { + "configuration/caching": "../optimize_performance/caching.html", + "configuration/cosmos-conf": "../reference/configs/cosmos-conf.html", + "configuration/execution-config": "../reference/configs/execution-config.html", + "configuration/memory_optimization": "../optimize_performance/memory_optimization.html", + "configuration/partial-parsing": "../optimize_performance/partial-parsing.html", + "configuration/profile-config": "../reference/profiles/index.html", + "configuration/project-config": "../reference/configs/project-config.html", + "configuration/selecting-excluding": "../optimize_performance/selecting-excluding.html", + "getting_started/async-execution-mode": "../guides/run_dbt/airflow-worker/async-execution-mode.html", + "getting_started/aws-container-run-job": "../guides/run_dbt/airflow-worker/async-execution-mode.html", + "getting_started/azure-container-instance": "../guides/run_dbt/container/azure-container-instance.html", + "getting_started/custom-airflow-properties": "../run_dbt/airflow-worker/custom-airflow-properties.html", + "getting_started/docker": "../guides/run_dbt/container/docker.html", + "getting_started/execution-modes-local-conflicts": "../guides/run_dbt/airflow-worker/execution-modes-local-conflicts.html", + "getting_started/execution-modes": "../guides/run_dbt/execution-modes.html", + "getting_started/gcp-cloud-run-job": "../guides/run_dbt/container/gcp-cloud-run-job.html", + "getting_started/kubernetes": "../guides/run_dbt/container/kubernetes.html", + "getting_started/operators": "../guides/run_dbt/operators/operators.html", + "getting_started/watcher-execution-mode": "../guides/run_dbt/airflow-worker/watcher-execution-mode.html", + "getting_started/watcher-kubernetes-execution-mode": "../guides/run_dbt/container/watcher-kubernetes-execution-mode.html", + "profiles/AthenaAccessKey": "../reference/profiles/AthenaAccessKey.html", + "profiles/ClickhouseUserPassword": "../reference/profiles/ClickhouseUserPassword.html", + "profiles/DatabricksOauth": "../reference/profiles/DatabricksOauth.html", + "profiles/DatabricksToken": "../reference/profiles/DatabricksToken.html", + "profiles/DuckDBUserPassword": "../reference/profiles/DuckDBUserPassword.html", + "profiles/ExasolUserPassword": "../reference/profiles/ExasolUserPassword.html", + "profiles/GoogleCloudOauth": "../reference/profiles/GoogleCloudOauth.html", + "profiles/GoogleCloudServiceAccountDict": "../reference/profiles/GoogleCloudServiceAccountDict.html", + "profiles/GoogleCloudServiceAccountFile": "../reference/profiles/GoogleCloudServiceAccountFile.html", + "profiles/index": "../reference/profiles/index.html", + "profiles/MysqlUserPassword": "../reference/profiles/MysqlUserPassword.html", + "profiles/OracleUserPassword": "../reference/profiles/OracleUserPassword.html", + "profiles/PostgresUserPassword": "../reference/profiles/PostgresUserPassword.html", + "profiles/RedshiftUserPassword": "../reference/profiles/RedshiftUserPassword.html", + "profiles/SnowflakeEncryptedPrivateKeyFilePem": "../reference/profiles/SnowflakeEncryptedPrivateKeyFilePem.html", + "profiles/SnowflakeEncryptedPrivateKeyPem": "../reference/profiles/SnowflakeEncryptedPrivateKeyPem.html", + "profiles/SnowflakePrivateKeyPem": "../reference/profiles/SnowflakePrivateKeyPem.html", + "profiles/SnowflakeUserPassword": "../reference/profiles/SnowflakeUserPassword.html", + "profiles/SparkThrift": "../reference/profiles/SparkThrift.html", + "profiles/StandardSQLServerAuth": "../reference/profiles/StandardSQLServerAuth.html", + "profiles/StarrocksUserPassword": "../reference/profiles/StarrocksUserPassword.html", + "profiles/TeradataUserPassword": "../reference/profiles/TeradataUserPassword.html", + "profiles/TrinoCertificate": "../reference/profiles/TrinoCertificate.html", + "profiles/TrinoJWT": "../reference/profiles/TrinoJWT.html", + "profiles/TrinoLDAP": "../reference/profiles/TrinoLDAP.html", + "profiles/VerticaUserPassword": "../reference/profiles/VerticaUserPassword.html", +} diff --git a/docs/configuration/index.rst b/docs/configuration/index.rst deleted file mode 100644 index a6042327b0..0000000000 --- a/docs/configuration/index.rst +++ /dev/null @@ -1,36 +0,0 @@ -.. _configuration: - -Configuration -============= - -Cosmos offers a number of configuration options to customize its behavior. For more info, check out the links on the left or the table of contents below. - -.. toctree:: - :caption: Contents: - - dbt Fusion - Multi-Project Setups - - Project Config - Profile Config - Execution Config - Render Config - - Parsing Methods - Configuring in Airflow - Configuring Lineage - Generating Docs - Hosting Docs - Scheduling - Testing Behavior - Selecting & Excluding - Partial Parsing - Source Nodes Rendering - Post-rendering DAG customization - Operator Args - Compiled SQL - Logging - Caching - Task display name - Callbacks - Memory Optimization diff --git a/docs/configuration/profile-config.rst b/docs/configuration/profile-config.rst deleted file mode 100644 index dcc5c784ab..0000000000 --- a/docs/configuration/profile-config.rst +++ /dev/null @@ -1,4 +0,0 @@ -Profile Config -================ - -Cosmos has multiple methods for supplying profiles. For more information, click on the Profiles tab on the top navbar. diff --git a/docs/contributing.rst b/docs/contributing.rst index 006149faac..d50c120398 100644 --- a/docs/contributing.rst +++ b/docs/contributing.rst @@ -155,17 +155,19 @@ To run the checks manually, run: Writing Docs ____________ -`Hatch `_ is a unified command-line tool for managing dependencies and environment isolation for Python developers. In Cosmos, we use a Hatchto declare the dependencies required for the project itself, as well as for tests and documentation builds. +`Hatch `_ is a unified command-line tool for managing dependencies and environment isolation for Python developers. In Cosmos, we use a Hatch to declare the dependencies required for the project itself, as well as for tests and documentation builds. If you don’t already have Hatch installed, please `install it `_ before proceeding. As an example, on macOS, you can do so with: .. code-block:: bash + brew install hatch You can run the docs locally by running the following: .. code-block:: bash + hatch run docs:serve diff --git a/docs/generate_mappings.py b/docs/generate_mappings.py index 52a7b1a787..bc825a896e 100644 --- a/docs/generate_mappings.py +++ b/docs/generate_mappings.py @@ -42,8 +42,8 @@ def get_fields_from_mapping(mapping: type[BaseProfileMapping]) -> list[Field]: def generate_mapping_docs( - templates_dir: str = "./templates", - output_dir: str = "./profiles", + templates_dir: str = "./reference/templates", + output_dir: str = "./reference/profiles", ) -> None: """ Generate a dedicated docs page per profile mapping. diff --git a/docs/getting_started/astro.rst b/docs/getting_started/astro.rst index b590575f2e..56e9fa0d53 100644 --- a/docs/getting_started/astro.rst +++ b/docs/getting_started/astro.rst @@ -1,7 +1,7 @@ .. _astro: -Getting Started on Astro -======================== +Getting Started with Cosmos on Astro +==================================== While it is possible to use Cosmos on Astro with all :ref:`Execution Modes `, we recommend using the ``local`` execution mode. It's the simplest to set up and use. diff --git a/docs/getting_started/dbt-airflow-concepts.rst b/docs/getting_started/dbt-airflow-concepts.rst index 70c4feae8d..ee55abe694 100644 --- a/docs/getting_started/dbt-airflow-concepts.rst +++ b/docs/getting_started/dbt-airflow-concepts.rst @@ -1,7 +1,7 @@ .. _dbt-airflow-concepts: -Similar dbt & Airflow concepts -============================== +Similar dbt and Airflow concepts +================================ While dbt is an open source tool for data transformations and analysis, using SQL, Airflow focuses on being a platform for the development, scheduling and monitoring of batch-oriented workflows, using Python. Although both tools have many diff --git a/docs/getting_started/index.rst b/docs/getting_started/index.rst index eb71d10221..59e07698a5 100644 --- a/docs/getting_started/index.rst +++ b/docs/getting_started/index.rst @@ -1,27 +1,28 @@ .. _getting-started: .. toctree:: + :maxdepth: 1 :hidden: - :caption: Contents: + :caption: Cosmos Fundamentals + + Similar dbt and Airflow concepts + +.. toctree:: + :maxdepth: 1 + :hidden: + :caption: Quickstart Astro CLI quickstart - Astro - MWAA - GCC - Open-Source - Execution Modes - Docker Execution Mode - Kubernetes Execution Mode - Azure Container Instance Execution Mode - AWS Container Run Job Execution Mode - GCP Cloud Run Job Execution Mode - Airflow Async Execution Mode - Watcher Execution Mode - Watcher Kubernetes Execution Mode - dbt and Airflow Similar Concepts - Operators - Custom Airflow Properties +.. toctree:: + :maxdepth: 1 + :hidden: + :caption: Get started with Cosmos + + Open-source Airflow + Astro + Google Cloud Composer (GCC) + Amazon Managed Workflows for Apache Airflow (MWAA) Getting Started =============== @@ -46,11 +47,11 @@ For more customization, check out the different execution modes that Cosmos supp For specific guides, see the following: -- `Executing dbt DAGs with Docker Operators `__ -- `Executing dbt DAGs with KubernetesPodOperators `__ -- `Executing dbt DAGs with Watcher Kubernetes Mode `__ -- `Executing dbt DAGs with AzureContainerInstancesOperators `__ -- `Executing dbt DAGs with GcpCloudRunExecuteJobOperators `__ +- `Executing dbt DAGs with DockerOperators <../../guides/run_dbt/container/docker.html>`__ +- `Executing dbt DAGs with KubernetesPodOperators <../../guides/run_dbt/container/kubernetes.html>`__ +- `Executing dbt DAGs with Watcher Kubernetes Mode <../../guides/run_dbt/container/watcher-kubernetes-execution-mode.html>`__ +- `Executing dbt DAGs with AzureContainerInstancesOperators <../../guides/run_dbt/container/azure-container-instance.html>`__ +- `Executing dbt DAGs with GcpCloudRunExecuteJobOperators <../../guides/run_dbt/container/gcp-cloud-run-job.html>`__ Concepts Overview diff --git a/docs/getting_started/mwaa.rst b/docs/getting_started/mwaa.rst index 5b7c41bde5..5b1da23439 100644 --- a/docs/getting_started/mwaa.rst +++ b/docs/getting_started/mwaa.rst @@ -1,7 +1,7 @@ .. _mwaa: -Getting Started on MWAA -======================= +Getting Started with Cosmos on Amazon Managed Workflows +======================================================= Users can face Python dependency issues when trying to use the Cosmos `Local Execution Mode `_ in Amazon Managed Workflows for `Apache Airflow® `_ (MWAA). diff --git a/docs/getting_started/open-source.rst b/docs/getting_started/open-source.rst index ba9bbdb15c..f5d1db832b 100644 --- a/docs/getting_started/open-source.rst +++ b/docs/getting_started/open-source.rst @@ -1,7 +1,7 @@ .. _open-source: -Getting Started on Open Source Airflow -====================================== +Getting Started with Cosmos on Open-source Airflow +================================================== When running open-source Airflow, your setup may vary. This guide assumes you have access to edit the underlying image. diff --git a/docs/configuration/compiled-sql.rst b/docs/guides/cosmos_devex/compiled-sql.rst similarity index 100% rename from docs/configuration/compiled-sql.rst rename to docs/guides/cosmos_devex/compiled-sql.rst diff --git a/docs/guides/cosmos_devex/index.rst b/docs/guides/cosmos_devex/index.rst new file mode 100644 index 0000000000..2ad3dff71b --- /dev/null +++ b/docs/guides/cosmos_devex/index.rst @@ -0,0 +1,14 @@ +.. _cosmos_devex: + + +Cosmos DevEx +============ + +.. toctree:: + :maxdepth: 1 + :caption: Cosmos DevEx + + lineage + compiled-sql + logging + task-display-name diff --git a/docs/configuration/lineage.rst b/docs/guides/cosmos_devex/lineage.rst similarity index 100% rename from docs/configuration/lineage.rst rename to docs/guides/cosmos_devex/lineage.rst diff --git a/docs/configuration/logging.rst b/docs/guides/cosmos_devex/logging.rst similarity index 100% rename from docs/configuration/logging.rst rename to docs/guides/cosmos_devex/logging.rst diff --git a/docs/configuration/task-display-name.rst b/docs/guides/cosmos_devex/task-display-name.rst similarity index 100% rename from docs/configuration/task-display-name.rst rename to docs/guides/cosmos_devex/task-display-name.rst diff --git a/docs/configuration/generating-docs.rst b/docs/guides/dbt_docs/generating-docs.rst similarity index 100% rename from docs/configuration/generating-docs.rst rename to docs/guides/dbt_docs/generating-docs.rst diff --git a/docs/configuration/hosting-docs.rst b/docs/guides/dbt_docs/hosting-docs.rst similarity index 100% rename from docs/configuration/hosting-docs.rst rename to docs/guides/dbt_docs/hosting-docs.rst diff --git a/docs/configuration/dbt-fusion.rst b/docs/guides/dbt_setup/dbt-fusion.rst similarity index 100% rename from docs/configuration/dbt-fusion.rst rename to docs/guides/dbt_setup/dbt-fusion.rst diff --git a/docs/guides/index.rst b/docs/guides/index.rst new file mode 100644 index 0000000000..9b7e233814 --- /dev/null +++ b/docs/guides/index.rst @@ -0,0 +1,54 @@ +.. _guides: + +Guides +====== + +Cosmos offers a number of configuration options to customize its behavior. For more info, check out the links on the left or the table of contents below. + +.. toctree:: + :maxdepth: 1 + :hidden: + :caption: Set up dbt with Airflow + + dbt_setup/dbt-fusion + +.. toctree:: + :maxdepth: 1 + :hidden: + :caption: Translating dbt into Airflow + + translate_dbt_to_airflow/index + +.. toctree:: + :maxdepth: 3 + :hidden: + :caption: How Cosmos runs dbt + + run_dbt/execution-modes + run_dbt/airflow-worker/index + run_dbt/container/index + run_dbt/callbacks/callbacks + run_dbt/operators/operators + run_dbt/customization/index + +.. toctree:: + :maxdepth: 1 + :hidden: + :caption: Multi-project Setups + + multi_project/multi-project + +.. toctree:: + :maxdepth: 1 + :hidden: + :caption: Documentation + + dbt_docs/generating-docs + dbt_docs/hosting-docs + +.. toctree:: + :maxdepth: 1 + :hidden: + :caption: Cosmos DevEx + + cosmos_devex/index diff --git a/docs/configuration/multi-project.rst b/docs/guides/multi_project/multi-project.rst similarity index 99% rename from docs/configuration/multi-project.rst rename to docs/guides/multi_project/multi-project.rst index 70283bc410..5dfd79eea0 100644 --- a/docs/configuration/multi-project.rst +++ b/docs/guides/multi_project/multi-project.rst @@ -169,7 +169,7 @@ You can use either separate DAGs or a combined DAG with task groups. **Option 1: Combined DAG with Task Groups using dbt ls Load Mode (Recommended)** -.. literalinclude:: ../../dev/dags/cross_project_dbt_ls_dag.py +.. literalinclude:: ../../../dev/dags/cross_project_dbt_ls_dag.py :language: python :start-after: [START cross_project_dbt_ls_dag] :end-before: [END cross_project_dbt_ls_dag] @@ -178,7 +178,7 @@ You can use either separate DAGs or a combined DAG with task groups. This option uses pre-generated ``manifest.json`` files for faster DAG parsing (no ``dbt ls`` execution required). -.. literalinclude:: ../../dev/dags/cross_project_manifest_dag.py +.. literalinclude:: ../../../dev/dags/cross_project_manifest_dag.py :language: python :start-after: [START cross_project_manifest_dag] :end-before: [END cross_project_manifest_dag] diff --git a/docs/getting_started/async-execution-mode.rst b/docs/guides/run_dbt/airflow-worker/async-execution-mode.rst similarity index 99% rename from docs/getting_started/async-execution-mode.rst rename to docs/guides/run_dbt/airflow-worker/async-execution-mode.rst index 6d61bcf22b..55d6778abc 100644 --- a/docs/getting_started/async-execution-mode.rst +++ b/docs/guides/run_dbt/airflow-worker/async-execution-mode.rst @@ -1,7 +1,5 @@ .. _async-execution-mode: -.. title:: Getting Started with Deferrable Operator - Airflow Async Execution Mode ============================ diff --git a/docs/getting_started/execution-modes-local-conflicts.rst b/docs/guides/run_dbt/airflow-worker/execution-modes-local-conflicts.rst similarity index 97% rename from docs/getting_started/execution-modes-local-conflicts.rst rename to docs/guides/run_dbt/airflow-worker/execution-modes-local-conflicts.rst index 9fec173751..0f9120127c 100644 --- a/docs/getting_started/execution-modes-local-conflicts.rst +++ b/docs/guides/run_dbt/airflow-worker/execution-modes-local-conflicts.rst @@ -10,8 +10,8 @@ When using the `Local Execution Mode `__, users may If you find errors, we recommend users isolating the installation of dbt from the Airflow installation. With the `Local Execution Mode `__, this can be accomplished by installing dbt in a separate -Python virtualenv and setting the `ExecutionConfig.dbt_executable_path <../configuration/execution-config.html>`_ and -`RenderConfig.dbt_executable_path <../configuration/render-config.html>`_ parameters. +Python virtualenv and setting the `ExecutionConfig.dbt_executable_path <../guides/execution-config.html>`_ and +`RenderConfig.dbt_executable_path <../guides/render-config.html>`_ parameters. The page `execution modes `__ describes many other methods that support isolating dbt from Airflow. diff --git a/docs/guides/run_dbt/airflow-worker/index.rst b/docs/guides/run_dbt/airflow-worker/index.rst new file mode 100644 index 0000000000..eaa89c2d9f --- /dev/null +++ b/docs/guides/run_dbt/airflow-worker/index.rst @@ -0,0 +1,9 @@ +Run dbt in an Airflow worker +============================ + +.. toctree:: + :maxdepth: 1 + :caption: Run dbt in an Airflow worker + + async-execution-mode + watcher-execution-mode diff --git a/docs/getting_started/watcher-execution-mode.rst b/docs/guides/run_dbt/airflow-worker/watcher-execution-mode.rst similarity index 98% rename from docs/getting_started/watcher-execution-mode.rst rename to docs/guides/run_dbt/airflow-worker/watcher-execution-mode.rst index af7589650c..05bb21c7f7 100644 --- a/docs/getting_started/watcher-execution-mode.rst +++ b/docs/guides/run_dbt/airflow-worker/watcher-execution-mode.rst @@ -144,7 +144,7 @@ Example 1 — Using ``DbtDag`` with ``ExecutionMode.WATCHER`` You can enable WATCHER mode directly in your ``DbtDag`` configuration. This approach is best when your Airflow DAG is fully dedicated to a dbt project. -.. literalinclude:: ../../dev/dags/example_watcher.py +.. literalinclude:: ../../../../dev/dags/example_watcher.py :language: python :start-after: [START example_watcher] :end-before: [END example_watcher] @@ -370,7 +370,7 @@ Source freshness nodes Since Cosmos 1.6, it `supports the rendering of source nodes `_. -We noticed some Cosmos users use this feature alongside `overriding Cosmos source nodes `_ as sensors or another operator that allows them to skip the following branch of the DAG if the source is not fresh. +We noticed some Cosmos users use this feature alongside `overriding Cosmos source nodes `_ as sensors or another operator that allows them to skip the following branch of the DAG if the source is not fresh. This use case is not currently supported by the ``ExecutionMode.WATCHER``, since the ``dbt build`` command does not run `source freshness checks `_. @@ -451,7 +451,7 @@ Asynchronous sensor execution To disable asynchronous execution, set the ``deferrable`` flag to ``False`` in the ``operator_args``. -.. literalinclude:: ../../dev/dags/example_watcher.py +.. literalinclude:: ../../../../dev/dags/example_watcher.py :language: python :start-after: [START example_watcher_synchronous] :end-before: [END example_watcher_synchronous] diff --git a/docs/configuration/callbacks.rst b/docs/guides/run_dbt/callbacks/callbacks.rst similarity index 98% rename from docs/configuration/callbacks.rst rename to docs/guides/run_dbt/callbacks/callbacks.rst index c754245525..4b602ece3f 100644 --- a/docs/configuration/callbacks.rst +++ b/docs/guides/run_dbt/callbacks/callbacks.rst @@ -34,7 +34,7 @@ Example: Using Callbacks with a Single Operator To demonstrate how to specify a callback function for uploading files from the target directory, here’s an example using a single operator in an Airflow DAG: -.. literalinclude:: ../../dev/dags/example_operators.py +.. literalinclude:: ../../../../dev/dags/example_operators.py :language: python :start-after: [START single_operator_callback] :end-before: [END single_operator_callback] @@ -46,7 +46,7 @@ You can leverage the :ref:`remote_target_path` configuration to upload files from the target directory to a remote storage. Below is an example of how to define a callback helper function in your ``DbtDag`` that utilizes this configuration: -.. literalinclude:: ../../dev/dags/cosmos_callback_dag.py +.. literalinclude:: ../../../../dev/dags/cosmos_callback_dag.py :language: python :start-after: [START cosmos_callback_example] :end-before: [END cosmos_callback_example] diff --git a/docs/getting_started/aws-container-run-job.rst b/docs/guides/run_dbt/container/aws-container-run-job.rst similarity index 99% rename from docs/getting_started/aws-container-run-job.rst rename to docs/guides/run_dbt/container/aws-container-run-job.rst index db00fc8c3c..4321c8f346 100644 --- a/docs/getting_started/aws-container-run-job.rst +++ b/docs/guides/run_dbt/container/aws-container-run-job.rst @@ -1,7 +1,5 @@ .. _aws-container-run-job: -.. title:: Getting Started with Astronomer Cosmos on AWS ECS - Getting Started with Astronomer Cosmos on AWS ECS ================================================== diff --git a/docs/getting_started/azure-container-instance.rst b/docs/guides/run_dbt/container/azure-container-instance.rst similarity index 100% rename from docs/getting_started/azure-container-instance.rst rename to docs/guides/run_dbt/container/azure-container-instance.rst diff --git a/docs/getting_started/docker.rst b/docs/guides/run_dbt/container/docker.rst similarity index 100% rename from docs/getting_started/docker.rst rename to docs/guides/run_dbt/container/docker.rst diff --git a/docs/getting_started/gcp-cloud-run-job.rst b/docs/guides/run_dbt/container/gcp-cloud-run-job.rst similarity index 100% rename from docs/getting_started/gcp-cloud-run-job.rst rename to docs/guides/run_dbt/container/gcp-cloud-run-job.rst diff --git a/docs/guides/run_dbt/container/index.rst b/docs/guides/run_dbt/container/index.rst new file mode 100644 index 0000000000..9cccdbb29a --- /dev/null +++ b/docs/guides/run_dbt/container/index.rst @@ -0,0 +1,13 @@ +Run dbt in a container +====================== + +.. toctree:: + :maxdepth: 1 + :caption: Run dbt in a container + + aws-container-run-job + azure-container-instance + docker + gcp-cloud-run-job + kubernetes + watcher-kubernetes-execution-mode diff --git a/docs/getting_started/kubernetes.rst b/docs/guides/run_dbt/container/kubernetes.rst similarity index 92% rename from docs/getting_started/kubernetes.rst rename to docs/guides/run_dbt/container/kubernetes.rst index 607ba07bd7..d200589429 100644 --- a/docs/getting_started/kubernetes.rst +++ b/docs/guides/run_dbt/container/kubernetes.rst @@ -28,7 +28,7 @@ Additional KubernetesPodOperator parameters can be added to the ``operator_args` For instance, -.. literalinclude:: ../../dev/dags/jaffle_shop_kubernetes.py +.. literalinclude:: ../../../../dev/dags/jaffle_shop_kubernetes.py :language: python :start-after: [START kubernetes_tg_example] :end-before: [END kubernetes_tg_example] @@ -161,7 +161,7 @@ The Kubernetes execution mode has the following limitations: - Does not emit Airflow datasets, assets, and dataset aliases (there is an `open ticket #2329 `__ to address this) - Does not handle installing dbt deps for users (there is an `open ticket #679 `__ to address this) - Does not support `ProfileMapping `_ (there is an `open ticket #749 `__ to address this) -- Does not support `Callbacks `_ (there is an `open ticket #1575 `__ to address this) -- Does not expose Compiled SQL as a `templated field `_ -- Does not benefit from `Cosmos caching mechanisms `_ -- Does not support `generating dbt docs & uploading to an object store `_ (there is a `PR `_ to solve this for S3) +- Does not support `Callbacks `_ (there is an `open ticket #1575 `__ to address this) +- Does not expose Compiled SQL as a `templated field `_ +- Does not benefit from `Cosmos caching mechanisms `_ +- Does not support `generating dbt docs & uploading to an object store `_ (there is a `PR `_ to solve this for S3) diff --git a/docs/getting_started/watcher-kubernetes-execution-mode.rst b/docs/guides/run_dbt/container/watcher-kubernetes-execution-mode.rst similarity index 99% rename from docs/getting_started/watcher-kubernetes-execution-mode.rst rename to docs/guides/run_dbt/container/watcher-kubernetes-execution-mode.rst index 16dbbffd0a..d3f8a80a49 100644 --- a/docs/getting_started/watcher-kubernetes-execution-mode.rst +++ b/docs/guides/run_dbt/container/watcher-kubernetes-execution-mode.rst @@ -183,7 +183,7 @@ Example DAG Below is a complete example of a DAG using ``ExecutionMode.WATCHER_KUBERNETES``: -.. literalinclude:: ../../dev/dags/jaffle_shop_watcher_kubernetes.py +.. literalinclude:: ../../../../dev/dags/jaffle_shop_watcher_kubernetes.py :language: python ------------------------------------------------------------------------------- diff --git a/docs/guides/run_dbt/customization/index.rst b/docs/guides/run_dbt/customization/index.rst new file mode 100644 index 0000000000..44021154dc --- /dev/null +++ b/docs/guides/run_dbt/customization/index.rst @@ -0,0 +1,9 @@ +Additional Customization +======================== + +.. toctree:: + :maxdepth: 1 + :caption: Additional Customization + + operator-args + scheduling diff --git a/docs/configuration/operator-args.rst b/docs/guides/run_dbt/customization/operator-args.rst similarity index 100% rename from docs/configuration/operator-args.rst rename to docs/guides/run_dbt/customization/operator-args.rst diff --git a/docs/configuration/scheduling.rst b/docs/guides/run_dbt/customization/scheduling.rst similarity index 99% rename from docs/configuration/scheduling.rst rename to docs/guides/run_dbt/customization/scheduling.rst index 2d4e729c5b..0040135d37 100644 --- a/docs/configuration/scheduling.rst +++ b/docs/guides/run_dbt/customization/scheduling.rst @@ -77,7 +77,7 @@ This example DAG: .. The following renders in Sphinx but not Github: -.. literalinclude:: ../../dev/dags/basic_cosmos_dag.py +.. literalinclude:: ../../../../dev/dags/basic_cosmos_dag.py :language: python :start-after: [START local_example] :end-before: [END local_example] diff --git a/docs/getting_started/execution-modes.rst b/docs/guides/run_dbt/execution-modes.rst similarity index 98% rename from docs/getting_started/execution-modes.rst rename to docs/guides/run_dbt/execution-modes.rst index ea6a03f283..71d581c25b 100644 --- a/docs/getting_started/execution-modes.rst +++ b/docs/guides/run_dbt/execution-modes.rst @@ -1,7 +1,7 @@ .. _execution-modes: Execution Modes -=============== +=================== Cosmos can run ``dbt`` commands using several different approaches, called ``execution modes``: @@ -96,7 +96,7 @@ When using the ``local`` execution mode, Cosmos converts Airflow Connections int Example of how to use, for instance, when ``dbt`` was installed together with Cosmos: -.. literalinclude:: ../../dev/dags/basic_cosmos_dag.py +.. literalinclude:: ../../../dev/dags/basic_cosmos_dag.py :language: python :start-after: [START local_example] :end-before: [END local_example] @@ -122,7 +122,7 @@ Some drawbacks of this approach: Example of how to use: -.. literalinclude:: ../../dev/dags/example_virtualenv.py +.. literalinclude:: ../../../dev/dags/example_virtualenv.py :language: python :start-after: [START virtualenv_example] :end-before: [END virtualenv_example] @@ -170,7 +170,7 @@ Check the step-by-step guide on using the ``kubernetes`` execution mode at :ref: Example DAG: -.. literalinclude:: ../../dev/dags/jaffle_shop_kubernetes.py +.. literalinclude:: ../../../dev/dags/jaffle_shop_kubernetes.py :language: python :start-after: [START kubernetes_seed_example] :end-before: [END kubernetes_seed_example] @@ -314,7 +314,7 @@ as more dbt nodes will be run in parallel since they won't be blocking Airflow's Example DAG: -.. literalinclude:: ../../dev/dags/simple_dag_async.py +.. literalinclude:: ../../../dev/dags/simple_dag_async.py :language: python :start-after: [START airflow_async_execution_mode_example] :end-before: [END airflow_async_execution_mode_example] diff --git a/docs/getting_started/operators.rst b/docs/guides/run_dbt/operators/operators.rst similarity index 88% rename from docs/getting_started/operators.rst rename to docs/guides/run_dbt/operators/operators.rst index 9f6658b6b1..448e037e77 100644 --- a/docs/getting_started/operators.rst +++ b/docs/guides/run_dbt/operators/operators.rst @@ -18,7 +18,7 @@ The ``DbtCloneLocalOperator`` implement `dbt clone = 1.5 and cosmos >= 1.6.0. @@ -70,7 +70,7 @@ The ``on_warning_callback`` is a callback parameter available on the ``DbtSource Example: -.. literalinclude:: ../../dev/dags/example_source_rendering.py/ +.. literalinclude:: ../../../dev/dags/example_source_rendering.py/ :language: python :start-after: [START cosmos_source_node_example] :end-before: [END cosmos_source_node_example] diff --git a/docs/configuration/parsing-methods.rst b/docs/guides/translate_dbt_to_airflow/parsing-methods.rst similarity index 96% rename from docs/configuration/parsing-methods.rst rename to docs/guides/translate_dbt_to_airflow/parsing-methods.rst index 9eb654d04f..567fc4c137 100644 --- a/docs/configuration/parsing-methods.rst +++ b/docs/guides/translate_dbt_to_airflow/parsing-methods.rst @@ -56,7 +56,7 @@ Examples of how to supply ``manifest.json`` using ``manifest_path`` argument: - Local path: -.. literalinclude:: ../../dev/dags/cosmos_manifest_example.py +.. literalinclude:: ../../../dev/dags/cosmos_manifest_example.py :language: python :start-after: [START local_example] :end-before: [END local_example] @@ -66,7 +66,7 @@ Examples of how to supply ``manifest.json`` using ``manifest_path`` argument: Ensure that you have the required dependencies installed to use the S3 URL. You can install the required dependencies using the following command: ``pip install "astronomer-cosmos[amazon]"`` -.. literalinclude:: ../../dev/dags/cosmos_manifest_example.py +.. literalinclude:: ../../../dev/dags/cosmos_manifest_example.py :language: python :start-after: [START aws_s3_example] :end-before: [END aws_s3_example] @@ -76,7 +76,7 @@ using the following command: ``pip install "astronomer-cosmos[amazon]"`` Ensure that you have the required dependencies installed to use the GCS URL. You can install the required dependencies using the following command: ``pip install "astronomer-cosmos[google]"`` -.. literalinclude:: ../../dev/dags/cosmos_manifest_example.py +.. literalinclude:: ../../../dev/dags/cosmos_manifest_example.py :language: python :start-after: [START gcp_gs_example] :end-before: [END gcp_gs_example] @@ -86,7 +86,7 @@ using the following command: ``pip install "astronomer-cosmos[google]"`` Ensure that you have the required dependencies installed to use the Azure blob URL. You can install the required dependencies using the following command: ``pip install "astronomer-cosmos[microsoft]"`` -.. literalinclude:: ../../dev/dags/cosmos_manifest_example.py +.. literalinclude:: ../../../dev/dags/cosmos_manifest_example.py :language: python :start-after: [START azure_abfs_example] :end-before: [END azure_abfs_example] diff --git a/docs/configuration/render-config.rst b/docs/guides/translate_dbt_to_airflow/render-config.rst similarity index 99% rename from docs/configuration/render-config.rst rename to docs/guides/translate_dbt_to_airflow/render-config.rst index f153d3c3d1..425e106124 100644 --- a/docs/configuration/render-config.rst +++ b/docs/guides/translate_dbt_to_airflow/render-config.rst @@ -63,7 +63,7 @@ Your pipeline may even have specific node types not part of the standard dbt def The following example illustrates how it is possible to tell Cosmos how to convert two different types of nodes (``source`` and ``exposure``) into Airflow: -.. literalinclude:: ../../dev/dags/example_cosmos_sources.py +.. literalinclude:: ../../../dev/dags/example_cosmos_sources.py :language: python :start-after: [START custom_dbt_nodes] :end-before: [END custom_dbt_nodes] diff --git a/docs/index.rst b/docs/index.rst index beee4f40bb..e27883979e 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -2,13 +2,14 @@ .. toctree:: :hidden: - :maxdepth: 2 + :maxdepth: 0 :caption: Contents: Home Getting Started - Configuration - Profiles + Guides + Optimize Performance + Reference Contributing Airflow 3 compatibility Compatibility Policy @@ -110,8 +111,7 @@ for managing and scaling your data workflows. Getting Started with Airflow Async Execution Mode ------------------------------------------------- -See our :doc:`Getting Started with Airflow Async Execution Mode ` for details. - +See our :doc:`Getting Started with Airflow Async Execution Mode ` for details. Airflow 3 compatibility ----------------------- diff --git a/docs/configuration/caching.rst b/docs/optimize_performance/caching.rst similarity index 100% rename from docs/configuration/caching.rst rename to docs/optimize_performance/caching.rst diff --git a/docs/optimize_performance/index.rst b/docs/optimize_performance/index.rst new file mode 100644 index 0000000000..0ed84470d0 --- /dev/null +++ b/docs/optimize_performance/index.rst @@ -0,0 +1,13 @@ +.. _optimize-performance: + +Optimize your Cosmos Performance +================================ + +.. toctree:: + :maxdepth: 1 + :caption: Optimize Performance + + partial-parsing + memory_optimization + selecting-excluding + caching diff --git a/docs/configuration/memory_optimization.rst b/docs/optimize_performance/memory_optimization.rst similarity index 100% rename from docs/configuration/memory_optimization.rst rename to docs/optimize_performance/memory_optimization.rst diff --git a/docs/configuration/partial-parsing.rst b/docs/optimize_performance/partial-parsing.rst similarity index 100% rename from docs/configuration/partial-parsing.rst rename to docs/optimize_performance/partial-parsing.rst diff --git a/docs/configuration/selecting-excluding.rst b/docs/optimize_performance/selecting-excluding.rst similarity index 100% rename from docs/configuration/selecting-excluding.rst rename to docs/optimize_performance/selecting-excluding.rst diff --git a/docs/configuration/cosmos-conf.rst b/docs/reference/configs/cosmos-conf.rst similarity index 98% rename from docs/configuration/cosmos-conf.rst rename to docs/reference/configs/cosmos-conf.rst index cc68c3b71f..a8928c3840 100644 --- a/docs/configuration/cosmos-conf.rst +++ b/docs/reference/configs/cosmos-conf.rst @@ -253,14 +253,14 @@ This page lists all available Airflow configurations that affect ``astronomer-co As an example, when this option is enabled, the following is an example of specifying the imports with full module paths: - .. literalinclude:: ../../dev/dags/basic_cosmos_dag_full_module_path_imports.py + .. literalinclude:: ../../../dev/dags/basic_cosmos_dag_full_module_path_imports.py :language: python :start-after: [START cosmos_explicit_imports] :end-before: [END cosmos_explicit_imports] as opposed to the following approach you might have when this option is disabled (default): - .. literalinclude:: ../../dev/dags/basic_cosmos_dag.py + .. literalinclude:: ../../../dev/dags/basic_cosmos_dag.py :language: python :start-after: [START cosmos_init_imports] :end-before: [END cosmos_init_imports] diff --git a/docs/configuration/execution-config.rst b/docs/reference/configs/execution-config.rst similarity index 100% rename from docs/configuration/execution-config.rst rename to docs/reference/configs/execution-config.rst diff --git a/docs/reference/configs/index.rst b/docs/reference/configs/index.rst new file mode 100644 index 0000000000..671840e74d --- /dev/null +++ b/docs/reference/configs/index.rst @@ -0,0 +1,13 @@ + +Configuration References +======================== + + +.. toctree:: + :maxdepth: 1 + :hidden: + :caption: Configurations + + Project Config + Execution Config + Cosmos Config diff --git a/docs/configuration/project-config.rst b/docs/reference/configs/project-config.rst similarity index 100% rename from docs/configuration/project-config.rst rename to docs/reference/configs/project-config.rst diff --git a/docs/reference/index.rst b/docs/reference/index.rst new file mode 100644 index 0000000000..bdc747600e --- /dev/null +++ b/docs/reference/index.rst @@ -0,0 +1,32 @@ + +Reference +========= + +.. toctree:: + :maxdepth: 0 + :hidden: + + self + + +.. toctree:: + :maxdepth: 1 + :hidden: + :caption: Configurations + + configs/index + + +.. toctree:: + :maxdepth: 1 + :hidden: + :caption: Profile Configs + + profiles/index + +There are different configurations and profiles that you can use to configure how Cosmos works. + +- `ProjectConfig `_: The ``ProjectConfig`` contains information about which dbt project a Cosmos dag or task group is going to execute, as well as configurations that apply to both, rendering and execution. +- `ExecutionConfig `_: The ``ExecutionConfig`` determines where and how the dbt commands are run within Cosmos. +- `CosmosConfig `_: This page lists available Airflow configurations that affect ``astronomer-cosmos`` behavior. You can set them in the ``airflow.cfg`` file or using environment variables. +- `ProfileConfig `_: The ``ProfileConfig`` class determines which data warehouse Cosmos connects to when it executes the dbt SQL. These docs include reference documentation for common data warehouses you might use in your dbt code. \ No newline at end of file diff --git a/docs/reference/profiles/AthenaAccessKey.rst b/docs/reference/profiles/AthenaAccessKey.rst new file mode 100644 index 0000000000..54efb0fd94 --- /dev/null +++ b/docs/reference/profiles/AthenaAccessKey.rst @@ -0,0 +1,168 @@ +.. + This file is autogenerated by ``docs/scripts/generate_mappings.py``. Do not edit by hand. + + +AthenaAccessKey +=============== + + + +Uses the Airflow AWS Connection provided to get_credentials() to generate the profile for dbt. + + + +https://airflow.apache.org/docs/apache-airflow-providers-amazon/stable/connections/aws.html + + + + + +This behaves similarly to other provider operators such as the AWS Athena Operator. + +Where you pass the aws_conn_id and the operator will generate the credentials for you. + + + +https://registry.astronomer.io/providers/amazon/versions/latest/modules/athenaoperator + + + +Information about the dbt Athena profile that is generated can be found here: + + + +https://github.com/dbt-athena/dbt-athena?tab=readme-ov-file#configuring-your-profile + +https://docs.getdbt.com/docs/core/connect-data-platform/athena-setup + + + +This profile mapping translates Airflow connections with the type ``aws`` +into dbt profiles. To use this profile, import it from ``cosmos.profiles``: + +.. code-block:: python + + from cosmos.profiles import AthenaAccessKeyProfileMapping + + profile = AthenaAccessKeyProfileMapping( + conn_id = 'my_aws_connection', + profile_args = { ... }, + ) + +While the profile mapping pulls fields from Airflow connections, you may need to supplement it +with additional ``profile_args``. The below table shows which fields are required, along with those +not required but pulled from the Airflow connection if present. You can also add additional fields +to the ``profile_args`` dict. + +.. list-table:: + :header-rows: 1 + + * - dbt Field Name + - Required + - Airflow Field Name + + + * - ``aws_profile_name`` + - False + + - ``extra.aws_profile_name`` + + + * - ``database`` + - True + + - ``extra.database`` + + + * - ``debug_query_state`` + - False + + - ``extra.debug_query_state`` + + + * - ``lf_tags_database`` + - False + + - ``extra.lf_tags_database`` + + + * - ``num_retries`` + - False + + - ``extra.num_retries`` + + + * - ``poll_interval`` + - False + + - ``extra.poll_interval`` + + + * - ``region_name`` + - True + + - ``extra.region_name`` + + + * - ``s3_data_dir`` + - False + + - ``extra.s3_data_dir`` + + + * - ``s3_data_naming`` + - False + + - ``extra.s3_data_naming`` + + + * - ``s3_staging_dir`` + - True + + - ``extra.s3_staging_dir`` + + + * - ``schema`` + - True + + - ``extra.schema`` + + + * - ``seed_s3_upload_args`` + - False + + - ``extra.seed_s3_upload_args`` + + + * - ``work_group`` + - False + + - ``extra.work_group`` + + + * - ``aws_access_key_id`` + - True + + - + + + * - ``aws_secret_access_key`` + - True + + - + + + + +Some notes about the table above: + +- This table doesn't necessarily show the full list of fields you *can* pass to the dbt profile. To + see the full list of fields, see the link to the dbt docs at the top of this page. +- If the Airflow field name starts with an ``extra.``, this means that the field is nested under + the ``extra`` field in the Airflow connection. For example, if the Airflow field name is + ``extra.token``, this means that the field is nested under ``extra`` in the Airflow connection, + and the field name is ``token``. +- If there are multiple Airflow field names, the profile mapping looks at those fields in order. + For example, if the Airflow field name is ``['password', 'extra.token']``, the profile mapping + will first look for a field named ``password``. If that field is not present, it will look for + ``extra.token``. diff --git a/docs/reference/profiles/ClickhouseUserPassword.rst b/docs/reference/profiles/ClickhouseUserPassword.rst new file mode 100644 index 0000000000..3ac5d694c3 --- /dev/null +++ b/docs/reference/profiles/ClickhouseUserPassword.rst @@ -0,0 +1,90 @@ +.. + This file is autogenerated by ``docs/scripts/generate_mappings.py``. Do not edit by hand. + + +ClickhouseUserPassword +====================== + + + +Maps Airflow generic connections using user + password authentication to dbt Clickhouse profiles. + +https://docs.getdbt.com/docs/core/connect-data-platform/clickhouse-setup + + + +This profile mapping translates Airflow connections with the type ``generic`` +into dbt profiles. To use this profile, import it from ``cosmos.profiles``: + +.. code-block:: python + + from cosmos.profiles import ClickhouseUserPasswordProfileMapping + + profile = ClickhouseUserPasswordProfileMapping( + conn_id = 'my_generic_connection', + profile_args = { ... }, + ) + +While the profile mapping pulls fields from Airflow connections, you may need to supplement it +with additional ``profile_args``. The below table shows which fields are required, along with those +not required but pulled from the Airflow connection if present. You can also add additional fields +to the ``profile_args`` dict. + +.. list-table:: + :header-rows: 1 + + * - dbt Field Name + - Required + - Airflow Field Name + + + * - ``host`` + - True + + - ``host`` + + + * - ``user`` + - True + + - ``login`` + + + * - ``password`` + - False + + - ``password`` + + + * - ``port`` + - False + + - ``port`` + + + * - ``schema`` + - True + + - ``schema`` + + + * - ``clickhouse`` + - True + + - ``extra.clickhouse`` + + + + +Some notes about the table above: + +- This table doesn't necessarily show the full list of fields you *can* pass to the dbt profile. To + see the full list of fields, see the link to the dbt docs at the top of this page. +- If the Airflow field name starts with an ``extra.``, this means that the field is nested under + the ``extra`` field in the Airflow connection. For example, if the Airflow field name is + ``extra.token``, this means that the field is nested under ``extra`` in the Airflow connection, + and the field name is ``token``. +- If there are multiple Airflow field names, the profile mapping looks at those fields in order. + For example, if the Airflow field name is ``['password', 'extra.token']``, the profile mapping + will first look for a field named ``password``. If that field is not present, it will look for + ``extra.token``. diff --git a/docs/reference/profiles/DatabricksOauth.rst b/docs/reference/profiles/DatabricksOauth.rst new file mode 100644 index 0000000000..a095564224 --- /dev/null +++ b/docs/reference/profiles/DatabricksOauth.rst @@ -0,0 +1,88 @@ +.. + This file is autogenerated by ``docs/scripts/generate_mappings.py``. Do not edit by hand. + + +DatabricksOauth +=============== + + + +Maps Airflow Databricks connections with the client auth to dbt profiles. + + + +https://docs.getdbt.com/reference/warehouse-setups/databricks-setup + +https://airflow.apache.org/docs/apache-airflow-providers-databricks/stable/connections/databricks.html + + + +This profile mapping translates Airflow connections with the type ``databricks`` +into dbt profiles. To use this profile, import it from ``cosmos.profiles``: + +.. code-block:: python + + from cosmos.profiles import DatabricksOauthProfileMapping + + profile = DatabricksOauthProfileMapping( + conn_id = 'my_databricks_connection', + profile_args = { ... }, + ) + +While the profile mapping pulls fields from Airflow connections, you may need to supplement it +with additional ``profile_args``. The below table shows which fields are required, along with those +not required but pulled from the Airflow connection if present. You can also add additional fields +to the ``profile_args`` dict. + +.. list-table:: + :header-rows: 1 + + * - dbt Field Name + - Required + - Airflow Field Name + + + * - ``host`` + - True + + - ``host`` + + + * - ``schema`` + - True + + - ``schema`` + + + * - ``client_id`` + - True + + - ``['login', 'extra.client_id']`` + + + * - ``client_secret`` + - True + + - ``['password', 'extra.client_secret']`` + + + * - ``http_path`` + - True + + - ``extra.http_path`` + + + + +Some notes about the table above: + +- This table doesn't necessarily show the full list of fields you *can* pass to the dbt profile. To + see the full list of fields, see the link to the dbt docs at the top of this page. +- If the Airflow field name starts with an ``extra.``, this means that the field is nested under + the ``extra`` field in the Airflow connection. For example, if the Airflow field name is + ``extra.token``, this means that the field is nested under ``extra`` in the Airflow connection, + and the field name is ``token``. +- If there are multiple Airflow field names, the profile mapping looks at those fields in order. + For example, if the Airflow field name is ``['password', 'extra.token']``, the profile mapping + will first look for a field named ``password``. If that field is not present, it will look for + ``extra.token``. diff --git a/docs/reference/profiles/DatabricksToken.rst b/docs/reference/profiles/DatabricksToken.rst new file mode 100644 index 0000000000..65cfbb1437 --- /dev/null +++ b/docs/reference/profiles/DatabricksToken.rst @@ -0,0 +1,82 @@ +.. + This file is autogenerated by ``docs/scripts/generate_mappings.py``. Do not edit by hand. + + +DatabricksToken +=============== + + + +Maps Airflow Databricks connections with a token to dbt profiles. + + + +https://docs.getdbt.com/reference/warehouse-setups/databricks-setup + +https://airflow.apache.org/docs/apache-airflow-providers-databricks/stable/connections/databricks.html + + + +This profile mapping translates Airflow connections with the type ``databricks`` +into dbt profiles. To use this profile, import it from ``cosmos.profiles``: + +.. code-block:: python + + from cosmos.profiles import DatabricksTokenProfileMapping + + profile = DatabricksTokenProfileMapping( + conn_id = 'my_databricks_connection', + profile_args = { ... }, + ) + +While the profile mapping pulls fields from Airflow connections, you may need to supplement it +with additional ``profile_args``. The below table shows which fields are required, along with those +not required but pulled from the Airflow connection if present. You can also add additional fields +to the ``profile_args`` dict. + +.. list-table:: + :header-rows: 1 + + * - dbt Field Name + - Required + - Airflow Field Name + + + * - ``host`` + - True + + - ``host`` + + + * - ``schema`` + - True + + - ``schema`` + + + * - ``token`` + - True + + - ``['password', 'extra.token']`` + + + * - ``http_path`` + - True + + - ``extra.http_path`` + + + + +Some notes about the table above: + +- This table doesn't necessarily show the full list of fields you *can* pass to the dbt profile. To + see the full list of fields, see the link to the dbt docs at the top of this page. +- If the Airflow field name starts with an ``extra.``, this means that the field is nested under + the ``extra`` field in the Airflow connection. For example, if the Airflow field name is + ``extra.token``, this means that the field is nested under ``extra`` in the Airflow connection, + and the field name is ``token``. +- If there are multiple Airflow field names, the profile mapping looks at those fields in order. + For example, if the Airflow field name is ``['password', 'extra.token']``, the profile mapping + will first look for a field named ``password``. If that field is not present, it will look for + ``extra.token``. diff --git a/docs/reference/profiles/DuckDBUserPassword.rst b/docs/reference/profiles/DuckDBUserPassword.rst new file mode 100644 index 0000000000..a00085bc57 --- /dev/null +++ b/docs/reference/profiles/DuckDBUserPassword.rst @@ -0,0 +1,62 @@ +.. + This file is autogenerated by ``docs/scripts/generate_mappings.py``. Do not edit by hand. + + +DuckDBUserPassword +================== + + + +Maps Airflow DuckDB connections using local path mapping to dbt profiles. + +https://docs.getdbt.com/docs/core/connect-data-platform/duckdb-setup + +https://github.com/astronomer/airflow-provider-duckdb + + + +This profile mapping translates Airflow connections with the type ``duckdb`` +into dbt profiles. To use this profile, import it from ``cosmos.profiles``: + +.. code-block:: python + + from cosmos.profiles import DuckDBUserPasswordProfileMapping + + profile = DuckDBUserPasswordProfileMapping( + conn_id = 'my_duckdb_connection', + profile_args = { ... }, + ) + +While the profile mapping pulls fields from Airflow connections, you may need to supplement it +with additional ``profile_args``. The below table shows which fields are required, along with those +not required but pulled from the Airflow connection if present. You can also add additional fields +to the ``profile_args`` dict. + +.. list-table:: + :header-rows: 1 + + * - dbt Field Name + - Required + - Airflow Field Name + + + * - ``path`` + - True + + - ``host`` + + + + +Some notes about the table above: + +- This table doesn't necessarily show the full list of fields you *can* pass to the dbt profile. To + see the full list of fields, see the link to the dbt docs at the top of this page. +- If the Airflow field name starts with an ``extra.``, this means that the field is nested under + the ``extra`` field in the Airflow connection. For example, if the Airflow field name is + ``extra.token``, this means that the field is nested under ``extra`` in the Airflow connection, + and the field name is ``token``. +- If there are multiple Airflow field names, the profile mapping looks at those fields in order. + For example, if the Airflow field name is ``['password', 'extra.token']``, the profile mapping + will first look for a field named ``password``. If that field is not present, it will look for + ``extra.token``. diff --git a/docs/reference/profiles/ExasolUserPassword.rst b/docs/reference/profiles/ExasolUserPassword.rst new file mode 100644 index 0000000000..fdc6d7bb2a --- /dev/null +++ b/docs/reference/profiles/ExasolUserPassword.rst @@ -0,0 +1,120 @@ +.. + This file is autogenerated by ``docs/scripts/generate_mappings.py``. Do not edit by hand. + + +ExasolUserPassword +================== + + + +Maps Airflow Exasol connections with a username and password to dbt profiles. + +https://docs.getdbt.com/reference/warehouse-setups/exasol-setup + + + +This profile mapping translates Airflow connections with the type ``exasol`` +into dbt profiles. To use this profile, import it from ``cosmos.profiles``: + +.. code-block:: python + + from cosmos.profiles import ExasolUserPasswordProfileMapping + + profile = ExasolUserPasswordProfileMapping( + conn_id = 'my_exasol_connection', + profile_args = { ... }, + ) + +While the profile mapping pulls fields from Airflow connections, you may need to supplement it +with additional ``profile_args``. The below table shows which fields are required, along with those +not required but pulled from the Airflow connection if present. You can also add additional fields +to the ``profile_args`` dict. + +.. list-table:: + :header-rows: 1 + + * - dbt Field Name + - Required + - Airflow Field Name + + + * - ``dsn`` + - True + + - ``host`` + + + * - ``user`` + - True + + - ``login`` + + + * - ``password`` + - True + + - ``password`` + + + * - ``dbname`` + - True + + - ``schema`` + + + * - ``encryption`` + - False + + - ``extra.encryption`` + + + * - ``compression`` + - False + + - ``extra.compression`` + + + * - ``connection_timeout`` + - False + + - ``extra.connection_timeout`` + + + * - ``socket_timeout`` + - False + + - ``extra.socket_timeout`` + + + * - ``protocol_version`` + - False + + - ``extra.protocol_version`` + + + * - ``threads`` + - True + + - + + + * - ``schema`` + - True + + - + + + + +Some notes about the table above: + +- This table doesn't necessarily show the full list of fields you *can* pass to the dbt profile. To + see the full list of fields, see the link to the dbt docs at the top of this page. +- If the Airflow field name starts with an ``extra.``, this means that the field is nested under + the ``extra`` field in the Airflow connection. For example, if the Airflow field name is + ``extra.token``, this means that the field is nested under ``extra`` in the Airflow connection, + and the field name is ``token``. +- If there are multiple Airflow field names, the profile mapping looks at those fields in order. + For example, if the Airflow field name is ``['password', 'extra.token']``, the profile mapping + will first look for a field named ``password``. If that field is not present, it will look for + ``extra.token``. diff --git a/docs/reference/profiles/GoogleCloudOauth.rst b/docs/reference/profiles/GoogleCloudOauth.rst new file mode 100644 index 0000000000..b0d303f48c --- /dev/null +++ b/docs/reference/profiles/GoogleCloudOauth.rst @@ -0,0 +1,72 @@ +.. + This file is autogenerated by ``docs/scripts/generate_mappings.py``. Do not edit by hand. + + +GoogleCloudOauth +================ + + + +Maps Airflow GCP connections to dbt BigQuery profiles that uses oauth via gcloud, + +if they don't use key file or JSON. + + + +https://docs.getdbt.com/docs/core/connect-data-platform/bigquery-setup#oauth-via-gcloud + +https://airflow.apache.org/docs/apache-airflow-providers-google/stable/connections/gcp.html + + + +This profile mapping translates Airflow connections with the type ``google_cloud_platform`` +into dbt profiles. To use this profile, import it from ``cosmos.profiles``: + +.. code-block:: python + + from cosmos.profiles import GoogleCloudOauthProfileMapping + + profile = GoogleCloudOauthProfileMapping( + conn_id = 'my_google_cloud_platform_connection', + profile_args = { ... }, + ) + +While the profile mapping pulls fields from Airflow connections, you may need to supplement it +with additional ``profile_args``. The below table shows which fields are required, along with those +not required but pulled from the Airflow connection if present. You can also add additional fields +to the ``profile_args`` dict. + +.. list-table:: + :header-rows: 1 + + * - dbt Field Name + - Required + - Airflow Field Name + + + * - ``project`` + - True + + - ``extra.project`` + + + * - ``dataset`` + - True + + - ``extra.dataset`` + + + + +Some notes about the table above: + +- This table doesn't necessarily show the full list of fields you *can* pass to the dbt profile. To + see the full list of fields, see the link to the dbt docs at the top of this page. +- If the Airflow field name starts with an ``extra.``, this means that the field is nested under + the ``extra`` field in the Airflow connection. For example, if the Airflow field name is + ``extra.token``, this means that the field is nested under ``extra`` in the Airflow connection, + and the field name is ``token``. +- If there are multiple Airflow field names, the profile mapping looks at those fields in order. + For example, if the Airflow field name is ``['password', 'extra.token']``, the profile mapping + will first look for a field named ``password``. If that field is not present, it will look for + ``extra.token``. diff --git a/docs/reference/profiles/GoogleCloudServiceAccountDict.rst b/docs/reference/profiles/GoogleCloudServiceAccountDict.rst new file mode 100644 index 0000000000..1a07827161 --- /dev/null +++ b/docs/reference/profiles/GoogleCloudServiceAccountDict.rst @@ -0,0 +1,76 @@ +.. + This file is autogenerated by ``docs/scripts/generate_mappings.py``. Do not edit by hand. + + +GoogleCloudServiceAccountDict +============================= + + + +Maps Airflow GCP connections to dbt BigQuery profiles if they use a service account keyfile dict/json. + + + +https://docs.getdbt.com/reference/warehouse-setups/bigquery-setup#service-account-file + +https://airflow.apache.org/docs/apache-airflow-providers-google/stable/connections/gcp.html + + + +This profile mapping translates Airflow connections with the type ``google_cloud_platform`` +into dbt profiles. To use this profile, import it from ``cosmos.profiles``: + +.. code-block:: python + + from cosmos.profiles import GoogleCloudServiceAccountDictProfileMapping + + profile = GoogleCloudServiceAccountDictProfileMapping( + conn_id = 'my_google_cloud_platform_connection', + profile_args = { ... }, + ) + +While the profile mapping pulls fields from Airflow connections, you may need to supplement it +with additional ``profile_args``. The below table shows which fields are required, along with those +not required but pulled from the Airflow connection if present. You can also add additional fields +to the ``profile_args`` dict. + +.. list-table:: + :header-rows: 1 + + * - dbt Field Name + - Required + - Airflow Field Name + + + * - ``project`` + - True + + - ``extra.project`` + + + * - ``dataset`` + - True + + - ``extra.dataset`` + + + * - ``keyfile_json`` + - True + + - ``['extra.keyfile_dict', 'keyfile_dict', 'extra__google_cloud_platform__keyfile_dict']`` + + + + +Some notes about the table above: + +- This table doesn't necessarily show the full list of fields you *can* pass to the dbt profile. To + see the full list of fields, see the link to the dbt docs at the top of this page. +- If the Airflow field name starts with an ``extra.``, this means that the field is nested under + the ``extra`` field in the Airflow connection. For example, if the Airflow field name is + ``extra.token``, this means that the field is nested under ``extra`` in the Airflow connection, + and the field name is ``token``. +- If there are multiple Airflow field names, the profile mapping looks at those fields in order. + For example, if the Airflow field name is ``['password', 'extra.token']``, the profile mapping + will first look for a field named ``password``. If that field is not present, it will look for + ``extra.token``. diff --git a/docs/reference/profiles/GoogleCloudServiceAccountFile.rst b/docs/reference/profiles/GoogleCloudServiceAccountFile.rst new file mode 100644 index 0000000000..2e84cad55a --- /dev/null +++ b/docs/reference/profiles/GoogleCloudServiceAccountFile.rst @@ -0,0 +1,76 @@ +.. + This file is autogenerated by ``docs/scripts/generate_mappings.py``. Do not edit by hand. + + +GoogleCloudServiceAccountFile +============================= + + + +Maps Airflow GCP connections to dbt BigQuery profiles if they use a service account file. + + + +https://docs.getdbt.com/reference/warehouse-setups/bigquery-setup#service-account-file + +https://airflow.apache.org/docs/apache-airflow-providers-google/stable/connections/gcp.html + + + +This profile mapping translates Airflow connections with the type ``google_cloud_platform`` +into dbt profiles. To use this profile, import it from ``cosmos.profiles``: + +.. code-block:: python + + from cosmos.profiles import GoogleCloudServiceAccountFileProfileMapping + + profile = GoogleCloudServiceAccountFileProfileMapping( + conn_id = 'my_google_cloud_platform_connection', + profile_args = { ... }, + ) + +While the profile mapping pulls fields from Airflow connections, you may need to supplement it +with additional ``profile_args``. The below table shows which fields are required, along with those +not required but pulled from the Airflow connection if present. You can also add additional fields +to the ``profile_args`` dict. + +.. list-table:: + :header-rows: 1 + + * - dbt Field Name + - Required + - Airflow Field Name + + + * - ``project`` + - True + + - ``extra.project`` + + + * - ``dataset`` + - True + + - ``extra.dataset`` + + + * - ``keyfile`` + - True + + - ``extra.key_path`` + + + + +Some notes about the table above: + +- This table doesn't necessarily show the full list of fields you *can* pass to the dbt profile. To + see the full list of fields, see the link to the dbt docs at the top of this page. +- If the Airflow field name starts with an ``extra.``, this means that the field is nested under + the ``extra`` field in the Airflow connection. For example, if the Airflow field name is + ``extra.token``, this means that the field is nested under ``extra`` in the Airflow connection, + and the field name is ``token``. +- If there are multiple Airflow field names, the profile mapping looks at those fields in order. + For example, if the Airflow field name is ``['password', 'extra.token']``, the profile mapping + will first look for a field named ``password``. If that field is not present, it will look for + ``extra.token``. diff --git a/docs/reference/profiles/MysqlUserPassword.rst b/docs/reference/profiles/MysqlUserPassword.rst new file mode 100644 index 0000000000..84ae205f38 --- /dev/null +++ b/docs/reference/profiles/MysqlUserPassword.rst @@ -0,0 +1,86 @@ +.. + This file is autogenerated by ``docs/scripts/generate_mappings.py``. Do not edit by hand. + + +MysqlUserPassword +================= + + + +Maps Airflow MySQL connections using user + password authentication to dbt profiles. + +https://docs.getdbt.com/reference/warehouse-setups/mysql-setup + +https://airflow.apache.org/docs/apache-airflow-providers-mysql/stable/connections/mysql.html + + + +This profile mapping translates Airflow connections with the type ``mysql`` +into dbt profiles. To use this profile, import it from ``cosmos.profiles``: + +.. code-block:: python + + from cosmos.profiles import MysqlUserPasswordProfileMapping + + profile = MysqlUserPasswordProfileMapping( + conn_id = 'my_mysql_connection', + profile_args = { ... }, + ) + +While the profile mapping pulls fields from Airflow connections, you may need to supplement it +with additional ``profile_args``. The below table shows which fields are required, along with those +not required but pulled from the Airflow connection if present. You can also add additional fields +to the ``profile_args`` dict. + +.. list-table:: + :header-rows: 1 + + * - dbt Field Name + - Required + - Airflow Field Name + + + * - ``server`` + - True + + - ``host`` + + + * - ``username`` + - True + + - ``login`` + + + * - ``password`` + - True + + - ``password`` + + + * - ``port`` + - False + + - ``port`` + + + * - ``schema`` + - True + + - ``schema`` + + + + +Some notes about the table above: + +- This table doesn't necessarily show the full list of fields you *can* pass to the dbt profile. To + see the full list of fields, see the link to the dbt docs at the top of this page. +- If the Airflow field name starts with an ``extra.``, this means that the field is nested under + the ``extra`` field in the Airflow connection. For example, if the Airflow field name is + ``extra.token``, this means that the field is nested under ``extra`` in the Airflow connection, + and the field name is ``token``. +- If there are multiple Airflow field names, the profile mapping looks at those fields in order. + For example, if the Airflow field name is ``['password', 'extra.token']``, the profile mapping + will first look for a field named ``password``. If that field is not present, it will look for + ``extra.token``. diff --git a/docs/reference/profiles/OracleUserPassword.rst b/docs/reference/profiles/OracleUserPassword.rst new file mode 100644 index 0000000000..1a2212b4e1 --- /dev/null +++ b/docs/reference/profiles/OracleUserPassword.rst @@ -0,0 +1,98 @@ +.. + This file is autogenerated by ``docs/scripts/generate_mappings.py``. Do not edit by hand. + + +OracleUserPassword +================== + + + +Maps Airflow Oracle connections using user + password authentication to dbt profiles. + +https://docs.getdbt.com/reference/warehouse-setups/oracle-setup + +https://airflow.apache.org/docs/apache-airflow-providers-oracle/stable/connections/oracle.html + + + +This profile mapping translates Airflow connections with the type ``oracle`` +into dbt profiles. To use this profile, import it from ``cosmos.profiles``: + +.. code-block:: python + + from cosmos.profiles import OracleUserPasswordProfileMapping + + profile = OracleUserPasswordProfileMapping( + conn_id = 'my_oracle_connection', + profile_args = { ... }, + ) + +While the profile mapping pulls fields from Airflow connections, you may need to supplement it +with additional ``profile_args``. The below table shows which fields are required, along with those +not required but pulled from the Airflow connection if present. You can also add additional fields +to the ``profile_args`` dict. + +.. list-table:: + :header-rows: 1 + + * - dbt Field Name + - Required + - Airflow Field Name + + + * - ``host`` + - False + + - ``host`` + + + * - ``port`` + - False + + - ``port`` + + + * - ``service`` + - False + + - ``extra.service_name`` + + + * - ``user`` + - True + + - ``login`` + + + * - ``password`` + - True + + - ``password`` + + + * - ``database`` + - False + + - ``extra.service_name`` + + + * - ``connection_string`` + - False + + - ``extra.dsn`` + + + + +Some notes about the table above: + +- This table doesn't necessarily show the full list of fields you *can* pass to the dbt profile. To + see the full list of fields, see the link to the dbt docs at the top of this page. +- If the Airflow field name starts with an ``extra.``, this means that the field is nested under + the ``extra`` field in the Airflow connection. For example, if the Airflow field name is + ``extra.token``, this means that the field is nested under ``extra`` in the Airflow connection, + and the field name is ``token``. +- If there are multiple Airflow field names, the profile mapping looks at those fields in order. + For example, if the Airflow field name is ``['password', 'extra.token']``, the profile mapping + will first look for a field named ``password``. If that field is not present, it will look for + ``extra.token``. diff --git a/docs/reference/profiles/PostgresUserPassword.rst b/docs/reference/profiles/PostgresUserPassword.rst new file mode 100644 index 0000000000..363a6d2fe5 --- /dev/null +++ b/docs/reference/profiles/PostgresUserPassword.rst @@ -0,0 +1,98 @@ +.. + This file is autogenerated by ``docs/scripts/generate_mappings.py``. Do not edit by hand. + + +PostgresUserPassword +==================== + + + +Maps Airflow Postgres connections using user + password authentication to dbt profiles. + +https://docs.getdbt.com/reference/warehouse-setups/postgres-setup + +https://airflow.apache.org/docs/apache-airflow-providers-postgres/stable/connections/postgres.html + + + +This profile mapping translates Airflow connections with the type ``postgres`` +into dbt profiles. To use this profile, import it from ``cosmos.profiles``: + +.. code-block:: python + + from cosmos.profiles import PostgresUserPasswordProfileMapping + + profile = PostgresUserPasswordProfileMapping( + conn_id = 'my_postgres_connection', + profile_args = { ... }, + ) + +While the profile mapping pulls fields from Airflow connections, you may need to supplement it +with additional ``profile_args``. The below table shows which fields are required, along with those +not required but pulled from the Airflow connection if present. You can also add additional fields +to the ``profile_args`` dict. + +.. list-table:: + :header-rows: 1 + + * - dbt Field Name + - Required + - Airflow Field Name + + + * - ``host`` + - True + + - ``host`` + + + * - ``user`` + - True + + - ``login`` + + + * - ``password`` + - True + + - ``password`` + + + * - ``port`` + - False + + - ``port`` + + + * - ``dbname`` + - True + + - ``schema`` + + + * - ``keepalives_idle`` + - False + + - ``extra.keepalives_idle`` + + + * - ``sslmode`` + - False + + - ``extra.sslmode`` + + + + +Some notes about the table above: + +- This table doesn't necessarily show the full list of fields you *can* pass to the dbt profile. To + see the full list of fields, see the link to the dbt docs at the top of this page. +- If the Airflow field name starts with an ``extra.``, this means that the field is nested under + the ``extra`` field in the Airflow connection. For example, if the Airflow field name is + ``extra.token``, this means that the field is nested under ``extra`` in the Airflow connection, + and the field name is ``token``. +- If there are multiple Airflow field names, the profile mapping looks at those fields in order. + For example, if the Airflow field name is ``['password', 'extra.token']``, the profile mapping + will first look for a field named ``password``. If that field is not present, it will look for + ``extra.token``. diff --git a/docs/reference/profiles/RedshiftUserPassword.rst b/docs/reference/profiles/RedshiftUserPassword.rst new file mode 100644 index 0000000000..eb4dc4ab0d --- /dev/null +++ b/docs/reference/profiles/RedshiftUserPassword.rst @@ -0,0 +1,110 @@ +.. + This file is autogenerated by ``docs/scripts/generate_mappings.py``. Do not edit by hand. + + +RedshiftUserPassword +==================== + + + +Maps Airflow Redshift connections to dbt Redshift profiles if they use a username and password. + +https://docs.getdbt.com/reference/warehouse-setups/redshift-setup + +https://airflow.apache.org/docs/apache-airflow-providers-amazon/stable/connections/redshift.html + + + +This profile mapping translates Airflow connections with the type ``redshift`` +into dbt profiles. To use this profile, import it from ``cosmos.profiles``: + +.. code-block:: python + + from cosmos.profiles import RedshiftUserPasswordProfileMapping + + profile = RedshiftUserPasswordProfileMapping( + conn_id = 'my_redshift_connection', + profile_args = { ... }, + ) + +While the profile mapping pulls fields from Airflow connections, you may need to supplement it +with additional ``profile_args``. The below table shows which fields are required, along with those +not required but pulled from the Airflow connection if present. You can also add additional fields +to the ``profile_args`` dict. + +.. list-table:: + :header-rows: 1 + + * - dbt Field Name + - Required + - Airflow Field Name + + + * - ``host`` + - True + + - ``host`` + + + * - ``user`` + - True + + - ``login`` + + + * - ``password`` + - True + + - ``password`` + + + * - ``port`` + - False + + - ``port`` + + + * - ``dbname`` + - True + + - ``schema`` + + + * - ``timeout`` + - False + + - ``extra.timeout`` + + + * - ``sslmode`` + - False + + - ``extra.sslmode`` + + + * - ``region`` + - False + + - ``extra.region`` + + + * - ``schema`` + - True + + - + + + + +Some notes about the table above: + +- This table doesn't necessarily show the full list of fields you *can* pass to the dbt profile. To + see the full list of fields, see the link to the dbt docs at the top of this page. +- If the Airflow field name starts with an ``extra.``, this means that the field is nested under + the ``extra`` field in the Airflow connection. For example, if the Airflow field name is + ``extra.token``, this means that the field is nested under ``extra`` in the Airflow connection, + and the field name is ``token``. +- If there are multiple Airflow field names, the profile mapping looks at those fields in order. + For example, if the Airflow field name is ``['password', 'extra.token']``, the profile mapping + will first look for a field named ``password``. If that field is not present, it will look for + ``extra.token``. diff --git a/docs/reference/profiles/SnowflakeEncryptedPrivateKeyFilePem.rst b/docs/reference/profiles/SnowflakeEncryptedPrivateKeyFilePem.rst new file mode 100644 index 0000000000..5b5edf804e --- /dev/null +++ b/docs/reference/profiles/SnowflakeEncryptedPrivateKeyFilePem.rst @@ -0,0 +1,122 @@ +.. + This file is autogenerated by ``docs/scripts/generate_mappings.py``. Do not edit by hand. + + +SnowflakeEncryptedPrivateKeyFilePem +=================================== + + + +Maps Airflow Snowflake connections to dbt profiles if they use a user/private key path. + +https://docs.getdbt.com/docs/core/connect-data-platform/snowflake-setup#key-pair-authentication + +https://airflow.apache.org/docs/apache-airflow-providers-snowflake/stable/connections/snowflake.html + + + +This profile mapping translates Airflow connections with the type ``snowflake`` +into dbt profiles. To use this profile, import it from ``cosmos.profiles``: + +.. code-block:: python + + from cosmos.profiles import SnowflakeEncryptedPrivateKeyFilePemProfileMapping + + profile = SnowflakeEncryptedPrivateKeyFilePemProfileMapping( + conn_id = 'my_snowflake_connection', + profile_args = { ... }, + ) + +While the profile mapping pulls fields from Airflow connections, you may need to supplement it +with additional ``profile_args``. The below table shows which fields are required, along with those +not required but pulled from the Airflow connection if present. You can also add additional fields +to the ``profile_args`` dict. + +.. list-table:: + :header-rows: 1 + + * - dbt Field Name + - Required + - Airflow Field Name + + + * - ``account`` + - True + + - ``extra.account`` + + + * - ``user`` + - True + + - ``login`` + + + * - ``database`` + - True + + - ``extra.database`` + + + * - ``warehouse`` + - True + + - ``extra.warehouse`` + + + * - ``schema`` + - True + + - ``schema`` + + + * - ``role`` + - False + + - ``extra.role`` + + + * - ``private_key_passphrase`` + - True + + - ``password`` + + + * - ``private_key_path`` + - True + + - ``extra.private_key_file`` + + + + +Some notes about the table above: + +- This table doesn't necessarily show the full list of fields you *can* pass to the dbt profile. To + see the full list of fields, see the link to the dbt docs at the top of this page. +- If the Airflow field name starts with an ``extra.``, this means that the field is nested under + the ``extra`` field in the Airflow connection. For example, if the Airflow field name is + ``extra.token``, this means that the field is nested under ``extra`` in the Airflow connection, + and the field name is ``token``. +- If there are multiple Airflow field names, the profile mapping looks at those fields in order. + For example, if the Airflow field name is ``['password', 'extra.token']``, the profile mapping + will first look for a field named ``password``. If that field is not present, it will look for + ``extra.token``. + + +Default Values +-------------- + +This profile mapping sets the following default values. These can be overridden by passing +them in ``profile_args``. + +.. list-table:: + :header-rows: 1 + + * - Field Name + - Default Value + + + * - ``threads`` + - ``4`` + diff --git a/docs/reference/profiles/SnowflakeEncryptedPrivateKeyPem.rst b/docs/reference/profiles/SnowflakeEncryptedPrivateKeyPem.rst new file mode 100644 index 0000000000..713e87e168 --- /dev/null +++ b/docs/reference/profiles/SnowflakeEncryptedPrivateKeyPem.rst @@ -0,0 +1,122 @@ +.. + This file is autogenerated by ``docs/scripts/generate_mappings.py``. Do not edit by hand. + + +SnowflakeEncryptedPrivateKeyPem +=============================== + + + +Maps Airflow Snowflake connections to dbt profiles if they use a user/private key. + +https://docs.getdbt.com/docs/core/connect-data-platform/snowflake-setup#key-pair-authentication + +https://airflow.apache.org/docs/apache-airflow-providers-snowflake/stable/connections/snowflake.html + + + +This profile mapping translates Airflow connections with the type ``snowflake`` +into dbt profiles. To use this profile, import it from ``cosmos.profiles``: + +.. code-block:: python + + from cosmos.profiles import SnowflakeEncryptedPrivateKeyPemProfileMapping + + profile = SnowflakeEncryptedPrivateKeyPemProfileMapping( + conn_id = 'my_snowflake_connection', + profile_args = { ... }, + ) + +While the profile mapping pulls fields from Airflow connections, you may need to supplement it +with additional ``profile_args``. The below table shows which fields are required, along with those +not required but pulled from the Airflow connection if present. You can also add additional fields +to the ``profile_args`` dict. + +.. list-table:: + :header-rows: 1 + + * - dbt Field Name + - Required + - Airflow Field Name + + + * - ``account`` + - True + + - ``extra.account`` + + + * - ``user`` + - True + + - ``login`` + + + * - ``database`` + - True + + - ``extra.database`` + + + * - ``warehouse`` + - True + + - ``extra.warehouse`` + + + * - ``schema`` + - True + + - ``schema`` + + + * - ``role`` + - False + + - ``extra.role`` + + + * - ``private_key`` + - True + + - ``extra.private_key_content`` + + + * - ``private_key_passphrase`` + - True + + - ``password`` + + + + +Some notes about the table above: + +- This table doesn't necessarily show the full list of fields you *can* pass to the dbt profile. To + see the full list of fields, see the link to the dbt docs at the top of this page. +- If the Airflow field name starts with an ``extra.``, this means that the field is nested under + the ``extra`` field in the Airflow connection. For example, if the Airflow field name is + ``extra.token``, this means that the field is nested under ``extra`` in the Airflow connection, + and the field name is ``token``. +- If there are multiple Airflow field names, the profile mapping looks at those fields in order. + For example, if the Airflow field name is ``['password', 'extra.token']``, the profile mapping + will first look for a field named ``password``. If that field is not present, it will look for + ``extra.token``. + + +Default Values +-------------- + +This profile mapping sets the following default values. These can be overridden by passing +them in ``profile_args``. + +.. list-table:: + :header-rows: 1 + + * - Field Name + - Default Value + + + * - ``threads`` + - ``4`` + diff --git a/docs/reference/profiles/SnowflakePrivateKeyPem.rst b/docs/reference/profiles/SnowflakePrivateKeyPem.rst new file mode 100644 index 0000000000..e9e8a0ee50 --- /dev/null +++ b/docs/reference/profiles/SnowflakePrivateKeyPem.rst @@ -0,0 +1,116 @@ +.. + This file is autogenerated by ``docs/scripts/generate_mappings.py``. Do not edit by hand. + + +SnowflakePrivateKeyPem +====================== + + + +Maps Airflow Snowflake connections to dbt profiles if they use a user/private key. + +https://docs.getdbt.com/docs/core/connect-data-platform/snowflake-setup#key-pair-authentication + +https://airflow.apache.org/docs/apache-airflow-providers-snowflake/stable/connections/snowflake.html + + + +This profile mapping translates Airflow connections with the type ``snowflake`` +into dbt profiles. To use this profile, import it from ``cosmos.profiles``: + +.. code-block:: python + + from cosmos.profiles import SnowflakePrivateKeyPemProfileMapping + + profile = SnowflakePrivateKeyPemProfileMapping( + conn_id = 'my_snowflake_connection', + profile_args = { ... }, + ) + +While the profile mapping pulls fields from Airflow connections, you may need to supplement it +with additional ``profile_args``. The below table shows which fields are required, along with those +not required but pulled from the Airflow connection if present. You can also add additional fields +to the ``profile_args`` dict. + +.. list-table:: + :header-rows: 1 + + * - dbt Field Name + - Required + - Airflow Field Name + + + * - ``account`` + - True + + - ``extra.account`` + + + * - ``user`` + - True + + - ``login`` + + + * - ``database`` + - True + + - ``extra.database`` + + + * - ``warehouse`` + - True + + - ``extra.warehouse`` + + + * - ``schema`` + - True + + - ``schema`` + + + * - ``role`` + - False + + - ``extra.role`` + + + * - ``private_key`` + - True + + - ``extra.private_key_content`` + + + + +Some notes about the table above: + +- This table doesn't necessarily show the full list of fields you *can* pass to the dbt profile. To + see the full list of fields, see the link to the dbt docs at the top of this page. +- If the Airflow field name starts with an ``extra.``, this means that the field is nested under + the ``extra`` field in the Airflow connection. For example, if the Airflow field name is + ``extra.token``, this means that the field is nested under ``extra`` in the Airflow connection, + and the field name is ``token``. +- If there are multiple Airflow field names, the profile mapping looks at those fields in order. + For example, if the Airflow field name is ``['password', 'extra.token']``, the profile mapping + will first look for a field named ``password``. If that field is not present, it will look for + ``extra.token``. + + +Default Values +-------------- + +This profile mapping sets the following default values. These can be overridden by passing +them in ``profile_args``. + +.. list-table:: + :header-rows: 1 + + * - Field Name + - Default Value + + + * - ``threads`` + - ``4`` + diff --git a/docs/reference/profiles/SnowflakeUserPassword.rst b/docs/reference/profiles/SnowflakeUserPassword.rst new file mode 100644 index 0000000000..9fc78be629 --- /dev/null +++ b/docs/reference/profiles/SnowflakeUserPassword.rst @@ -0,0 +1,128 @@ +.. + This file is autogenerated by ``docs/scripts/generate_mappings.py``. Do not edit by hand. + + +SnowflakeUserPassword +===================== + + + +Maps Airflow Snowflake connections to dbt profiles if they use a user/password. + +https://docs.getdbt.com/reference/warehouse-setups/snowflake-setup + +https://airflow.apache.org/docs/apache-airflow-providers-snowflake/stable/connections/snowflake.html + + + +This profile mapping translates Airflow connections with the type ``snowflake`` +into dbt profiles. To use this profile, import it from ``cosmos.profiles``: + +.. code-block:: python + + from cosmos.profiles import SnowflakeUserPasswordProfileMapping + + profile = SnowflakeUserPasswordProfileMapping( + conn_id = 'my_snowflake_connection', + profile_args = { ... }, + ) + +While the profile mapping pulls fields from Airflow connections, you may need to supplement it +with additional ``profile_args``. The below table shows which fields are required, along with those +not required but pulled from the Airflow connection if present. You can also add additional fields +to the ``profile_args`` dict. + +.. list-table:: + :header-rows: 1 + + * - dbt Field Name + - Required + - Airflow Field Name + + + * - ``account`` + - True + + - ``extra.account`` + + + * - ``user`` + - True + + - ``login`` + + + * - ``password`` + - True + + - ``password`` + + + * - ``database`` + - True + + - ``extra.database`` + + + * - ``warehouse`` + - True + + - ``extra.warehouse`` + + + * - ``schema`` + - True + + - ``schema`` + + + * - ``role`` + - False + + - ``extra.role`` + + + * - ``host`` + - False + + - ``extra.host`` + + + * - ``port`` + - False + + - ``extra.port`` + + + + +Some notes about the table above: + +- This table doesn't necessarily show the full list of fields you *can* pass to the dbt profile. To + see the full list of fields, see the link to the dbt docs at the top of this page. +- If the Airflow field name starts with an ``extra.``, this means that the field is nested under + the ``extra`` field in the Airflow connection. For example, if the Airflow field name is + ``extra.token``, this means that the field is nested under ``extra`` in the Airflow connection, + and the field name is ``token``. +- If there are multiple Airflow field names, the profile mapping looks at those fields in order. + For example, if the Airflow field name is ``['password', 'extra.token']``, the profile mapping + will first look for a field named ``password``. If that field is not present, it will look for + ``extra.token``. + + +Default Values +-------------- + +This profile mapping sets the following default values. These can be overridden by passing +them in ``profile_args``. + +.. list-table:: + :header-rows: 1 + + * - Field Name + - Default Value + + + * - ``threads`` + - ``4`` + diff --git a/docs/reference/profiles/SparkThrift.rst b/docs/reference/profiles/SparkThrift.rst new file mode 100644 index 0000000000..d19f828ea6 --- /dev/null +++ b/docs/reference/profiles/SparkThrift.rst @@ -0,0 +1,74 @@ +.. + This file is autogenerated by ``docs/scripts/generate_mappings.py``. Do not edit by hand. + + +SparkThrift +=========== + + + +Maps Airflow Spark connections to dbt profiles if they use a thrift connection. + +https://docs.getdbt.com/reference/warehouse-setups/spark-setup#thrift + +https://airflow.apache.org/docs/apache-airflow-providers-apache-spark/stable/connections/spark.html + + + +This profile mapping translates Airflow connections with the type ``spark`` +into dbt profiles. To use this profile, import it from ``cosmos.profiles``: + +.. code-block:: python + + from cosmos.profiles import SparkThriftProfileMapping + + profile = SparkThriftProfileMapping( + conn_id = 'my_spark_connection', + profile_args = { ... }, + ) + +While the profile mapping pulls fields from Airflow connections, you may need to supplement it +with additional ``profile_args``. The below table shows which fields are required, along with those +not required but pulled from the Airflow connection if present. You can also add additional fields +to the ``profile_args`` dict. + +.. list-table:: + :header-rows: 1 + + * - dbt Field Name + - Required + - Airflow Field Name + + + * - ``host`` + - True + + - ``host`` + + + * - ``port`` + - False + + - ``port`` + + + * - ``schema`` + - True + + - + + + + +Some notes about the table above: + +- This table doesn't necessarily show the full list of fields you *can* pass to the dbt profile. To + see the full list of fields, see the link to the dbt docs at the top of this page. +- If the Airflow field name starts with an ``extra.``, this means that the field is nested under + the ``extra`` field in the Airflow connection. For example, if the Airflow field name is + ``extra.token``, this means that the field is nested under ``extra`` in the Airflow connection, + and the field name is ``token``. +- If there are multiple Airflow field names, the profile mapping looks at those fields in order. + For example, if the Airflow field name is ``['password', 'extra.token']``, the profile mapping + will first look for a field named ``password``. If that field is not present, it will look for + ``extra.token``. diff --git a/docs/reference/profiles/StandardSQLServerAuth.rst b/docs/reference/profiles/StandardSQLServerAuth.rst new file mode 100644 index 0000000000..89e72f7c81 --- /dev/null +++ b/docs/reference/profiles/StandardSQLServerAuth.rst @@ -0,0 +1,90 @@ +.. + This file is autogenerated by ``docs/scripts/generate_mappings.py``. Do not edit by hand. + + +StandardSQLServerAuth +===================== + + + +This profile mapping translates Airflow connections with the type ``generic`` +into dbt profiles. To use this profile, import it from ``cosmos.profiles``: + +.. code-block:: python + + from cosmos.profiles import StandardSQLServerAuthProfileMapping + + profile = StandardSQLServerAuthProfileMapping( + conn_id = 'my_generic_connection', + profile_args = { ... }, + ) + +While the profile mapping pulls fields from Airflow connections, you may need to supplement it +with additional ``profile_args``. The below table shows which fields are required, along with those +not required but pulled from the Airflow connection if present. You can also add additional fields +to the ``profile_args`` dict. + +.. list-table:: + :header-rows: 1 + + * - dbt Field Name + - Required + - Airflow Field Name + + + * - ``server`` + - True + + - ``host`` + + + * - ``user`` + - True + + - ``login`` + + + * - ``password`` + - True + + - ``password`` + + + * - ``port`` + - False + + - ``port`` + + + * - ``schema`` + - True + + - ``schema`` + + + * - ``database`` + - True + + - ``extra.database`` + + + * - ``driver`` + - True + + - ``extra.driver`` + + + + +Some notes about the table above: + +- This table doesn't necessarily show the full list of fields you *can* pass to the dbt profile. To + see the full list of fields, see the link to the dbt docs at the top of this page. +- If the Airflow field name starts with an ``extra.``, this means that the field is nested under + the ``extra`` field in the Airflow connection. For example, if the Airflow field name is + ``extra.token``, this means that the field is nested under ``extra`` in the Airflow connection, + and the field name is ``token``. +- If there are multiple Airflow field names, the profile mapping looks at those fields in order. + For example, if the Airflow field name is ``['password', 'extra.token']``, the profile mapping + will first look for a field named ``password``. If that field is not present, it will look for + ``extra.token``. diff --git a/docs/reference/profiles/StarrocksUserPassword.rst b/docs/reference/profiles/StarrocksUserPassword.rst new file mode 100644 index 0000000000..9ff740bbf3 --- /dev/null +++ b/docs/reference/profiles/StarrocksUserPassword.rst @@ -0,0 +1,84 @@ +.. + This file is autogenerated by ``docs/scripts/generate_mappings.py``. Do not edit by hand. + + +StarrocksUserPassword +===================== + + + +Maps Airflow MySQL connections using user + password authentication to dbt profiles. + +https://docs.getdbt.com/docs/core/connect-data-platform/starrocks-setup + + + +This profile mapping translates Airflow connections with the type ``mysql`` +into dbt profiles. To use this profile, import it from ``cosmos.profiles``: + +.. code-block:: python + + from cosmos.profiles import StarrocksUserPasswordProfileMapping + + profile = StarrocksUserPasswordProfileMapping( + conn_id = 'my_mysql_connection', + profile_args = { ... }, + ) + +While the profile mapping pulls fields from Airflow connections, you may need to supplement it +with additional ``profile_args``. The below table shows which fields are required, along with those +not required but pulled from the Airflow connection if present. You can also add additional fields +to the ``profile_args`` dict. + +.. list-table:: + :header-rows: 1 + + * - dbt Field Name + - Required + - Airflow Field Name + + + * - ``host`` + - True + + - ``host`` + + + * - ``username`` + - True + + - ``login`` + + + * - ``password`` + - True + + - ``password`` + + + * - ``port`` + - True + + - ``port`` + + + * - ``schema`` + - True + + - ``schema`` + + + + +Some notes about the table above: + +- This table doesn't necessarily show the full list of fields you *can* pass to the dbt profile. To + see the full list of fields, see the link to the dbt docs at the top of this page. +- If the Airflow field name starts with an ``extra.``, this means that the field is nested under + the ``extra`` field in the Airflow connection. For example, if the Airflow field name is + ``extra.token``, this means that the field is nested under ``extra`` in the Airflow connection, + and the field name is ``token``. +- If there are multiple Airflow field names, the profile mapping looks at those fields in order. + For example, if the Airflow field name is ``['password', 'extra.token']``, the profile mapping + will first look for a field named ``password``. If that field is not present, it will look for + ``extra.token``. diff --git a/docs/reference/profiles/TeradataUserPassword.rst b/docs/reference/profiles/TeradataUserPassword.rst new file mode 100644 index 0000000000..cad00003cd --- /dev/null +++ b/docs/reference/profiles/TeradataUserPassword.rst @@ -0,0 +1,86 @@ +.. + This file is autogenerated by ``docs/scripts/generate_mappings.py``. Do not edit by hand. + + +TeradataUserPassword +==================== + + + +Maps Airflow Teradata connections using user + password authentication to dbt profiles. + +https://docs.getdbt.com/docs/core/connect-data-platform/teradata-setup + +https://airflow.apache.org/docs/apache-airflow-providers-teradata/stable/connections/teradata.html + + + +This profile mapping translates Airflow connections with the type ``teradata`` +into dbt profiles. To use this profile, import it from ``cosmos.profiles``: + +.. code-block:: python + + from cosmos.profiles import TeradataUserPasswordProfileMapping + + profile = TeradataUserPasswordProfileMapping( + conn_id = 'my_teradata_connection', + profile_args = { ... }, + ) + +While the profile mapping pulls fields from Airflow connections, you may need to supplement it +with additional ``profile_args``. The below table shows which fields are required, along with those +not required but pulled from the Airflow connection if present. You can also add additional fields +to the ``profile_args`` dict. + +.. list-table:: + :header-rows: 1 + + * - dbt Field Name + - Required + - Airflow Field Name + + + * - ``host`` + - True + + - ``host`` + + + * - ``user`` + - True + + - ``login`` + + + * - ``password`` + - True + + - ``password`` + + + * - ``schema`` + - False + + - ``schema`` + + + * - ``tmode`` + - False + + - ``extra.tmode`` + + + + +Some notes about the table above: + +- This table doesn't necessarily show the full list of fields you *can* pass to the dbt profile. To + see the full list of fields, see the link to the dbt docs at the top of this page. +- If the Airflow field name starts with an ``extra.``, this means that the field is nested under + the ``extra`` field in the Airflow connection. For example, if the Airflow field name is + ``extra.token``, this means that the field is nested under ``extra`` in the Airflow connection, + and the field name is ``token``. +- If there are multiple Airflow field names, the profile mapping looks at those fields in order. + For example, if the Airflow field name is ``['password', 'extra.token']``, the profile mapping + will first look for a field named ``password``. If that field is not present, it will look for + ``extra.token``. diff --git a/docs/reference/profiles/TrinoCertificate.rst b/docs/reference/profiles/TrinoCertificate.rst new file mode 100644 index 0000000000..a2e3c3fd3e --- /dev/null +++ b/docs/reference/profiles/TrinoCertificate.rst @@ -0,0 +1,104 @@ +.. + This file is autogenerated by ``docs/scripts/generate_mappings.py``. Do not edit by hand. + + +TrinoCertificate +================ + + + +Maps Airflow Trino connections to Certificate Trino dbt profiles. + +https://docs.getdbt.com/reference/warehouse-setups/trino-setup#certificate + +https://airflow.apache.org/docs/apache-airflow-providers-trino/stable/connections.html + + + +This profile mapping translates Airflow connections with the type ``trino`` +into dbt profiles. To use this profile, import it from ``cosmos.profiles``: + +.. code-block:: python + + from cosmos.profiles import TrinoCertificateProfileMapping + + profile = TrinoCertificateProfileMapping( + conn_id = 'my_trino_connection', + profile_args = { ... }, + ) + +While the profile mapping pulls fields from Airflow connections, you may need to supplement it +with additional ``profile_args``. The below table shows which fields are required, along with those +not required but pulled from the Airflow connection if present. You can also add additional fields +to the ``profile_args`` dict. + +.. list-table:: + :header-rows: 1 + + * - dbt Field Name + - Required + - Airflow Field Name + + + * - ``client_certificate`` + - True + + - ``extra.certs__client_cert_path`` + + + * - ``client_private_key`` + - True + + - ``extra.certs__client_key_path`` + + + * - ``host`` + - True + + - ``host`` + + + * - ``port`` + - True + + - ``port`` + + + * - ``user`` + - False + + - ``login`` + + + * - ``session_properties`` + - False + + - ``extra.session_properties`` + + + * - ``database`` + - True + + - + + + * - ``schema`` + - True + + - + + + + +Some notes about the table above: + +- This table doesn't necessarily show the full list of fields you *can* pass to the dbt profile. To + see the full list of fields, see the link to the dbt docs at the top of this page. +- If the Airflow field name starts with an ``extra.``, this means that the field is nested under + the ``extra`` field in the Airflow connection. For example, if the Airflow field name is + ``extra.token``, this means that the field is nested under ``extra`` in the Airflow connection, + and the field name is ``token``. +- If there are multiple Airflow field names, the profile mapping looks at those fields in order. + For example, if the Airflow field name is ``['password', 'extra.token']``, the profile mapping + will first look for a field named ``password``. If that field is not present, it will look for + ``extra.token``. diff --git a/docs/reference/profiles/TrinoJWT.rst b/docs/reference/profiles/TrinoJWT.rst new file mode 100644 index 0000000000..14c0cd9f86 --- /dev/null +++ b/docs/reference/profiles/TrinoJWT.rst @@ -0,0 +1,100 @@ +.. + This file is autogenerated by ``docs/scripts/generate_mappings.py``. Do not edit by hand. + + +TrinoJWT +======== + + + +Maps Airflow Trino connections to JWT Trino dbt profiles. + + + +https://docs.getdbt.com/reference/warehouse-setups/trino-setup#jwt + +https://airflow.apache.org/docs/apache-airflow-providers-trino/stable/connections.html + + + +This profile mapping translates Airflow connections with the type ``trino`` +into dbt profiles. To use this profile, import it from ``cosmos.profiles``: + +.. code-block:: python + + from cosmos.profiles import TrinoJWTProfileMapping + + profile = TrinoJWTProfileMapping( + conn_id = 'my_trino_connection', + profile_args = { ... }, + ) + +While the profile mapping pulls fields from Airflow connections, you may need to supplement it +with additional ``profile_args``. The below table shows which fields are required, along with those +not required but pulled from the Airflow connection if present. You can also add additional fields +to the ``profile_args`` dict. + +.. list-table:: + :header-rows: 1 + + * - dbt Field Name + - Required + - Airflow Field Name + + + * - ``jwt_token`` + - True + + - ``extra.jwt__token`` + + + * - ``host`` + - True + + - ``host`` + + + * - ``port`` + - True + + - ``port`` + + + * - ``user`` + - False + + - ``login`` + + + * - ``session_properties`` + - False + + - ``extra.session_properties`` + + + * - ``database`` + - True + + - + + + * - ``schema`` + - True + + - + + + + +Some notes about the table above: + +- This table doesn't necessarily show the full list of fields you *can* pass to the dbt profile. To + see the full list of fields, see the link to the dbt docs at the top of this page. +- If the Airflow field name starts with an ``extra.``, this means that the field is nested under + the ``extra`` field in the Airflow connection. For example, if the Airflow field name is + ``extra.token``, this means that the field is nested under ``extra`` in the Airflow connection, + and the field name is ``token``. +- If there are multiple Airflow field names, the profile mapping looks at those fields in order. + For example, if the Airflow field name is ``['password', 'extra.token']``, the profile mapping + will first look for a field named ``password``. If that field is not present, it will look for + ``extra.token``. diff --git a/docs/reference/profiles/TrinoLDAP.rst b/docs/reference/profiles/TrinoLDAP.rst new file mode 100644 index 0000000000..240f81f848 --- /dev/null +++ b/docs/reference/profiles/TrinoLDAP.rst @@ -0,0 +1,100 @@ +.. + This file is autogenerated by ``docs/scripts/generate_mappings.py``. Do not edit by hand. + + +TrinoLDAP +========= + + + +Maps Airflow Trino connections to LDAP Trino dbt profiles. + + + +https://docs.getdbt.com/reference/warehouse-setups/trino-setup#ldap + +https://airflow.apache.org/docs/apache-airflow-providers-trino/stable/connections.html + + + +This profile mapping translates Airflow connections with the type ``trino`` +into dbt profiles. To use this profile, import it from ``cosmos.profiles``: + +.. code-block:: python + + from cosmos.profiles import TrinoLDAPProfileMapping + + profile = TrinoLDAPProfileMapping( + conn_id = 'my_trino_connection', + profile_args = { ... }, + ) + +While the profile mapping pulls fields from Airflow connections, you may need to supplement it +with additional ``profile_args``. The below table shows which fields are required, along with those +not required but pulled from the Airflow connection if present. You can also add additional fields +to the ``profile_args`` dict. + +.. list-table:: + :header-rows: 1 + + * - dbt Field Name + - Required + - Airflow Field Name + + + * - ``user`` + - True + + - ``login`` + + + * - ``password`` + - True + + - ``password`` + + + * - ``host`` + - True + + - ``host`` + + + * - ``port`` + - True + + - ``port`` + + + * - ``session_properties`` + - False + + - ``extra.session_properties`` + + + * - ``database`` + - True + + - + + + * - ``schema`` + - True + + - + + + + +Some notes about the table above: + +- This table doesn't necessarily show the full list of fields you *can* pass to the dbt profile. To + see the full list of fields, see the link to the dbt docs at the top of this page. +- If the Airflow field name starts with an ``extra.``, this means that the field is nested under + the ``extra`` field in the Airflow connection. For example, if the Airflow field name is + ``extra.token``, this means that the field is nested under ``extra`` in the Airflow connection, + and the field name is ``token``. +- If there are multiple Airflow field names, the profile mapping looks at those fields in order. + For example, if the Airflow field name is ``['password', 'extra.token']``, the profile mapping + will first look for a field named ``password``. If that field is not present, it will look for + ``extra.token``. diff --git a/docs/reference/profiles/VerticaUserPassword.rst b/docs/reference/profiles/VerticaUserPassword.rst new file mode 100644 index 0000000000..fc07296a8d --- /dev/null +++ b/docs/reference/profiles/VerticaUserPassword.rst @@ -0,0 +1,202 @@ +.. + This file is autogenerated by ``docs/scripts/generate_mappings.py``. Do not edit by hand. + + +VerticaUserPassword +=================== + + + +Maps Airflow Vertica connections using username + password authentication to dbt profiles. + +.. note:: + + Use Airflow connection ``schema`` for vertica ``database`` to keep it consistent with other connection types and profiles. The Vertica Airflow provider hook `assumes this `_. + + This seems to be a common approach also for `Postgres `_, Redshift and Exasol since there is no ``database`` field in Airflow connection and ``schema`` is not required for the database connection. + +.. seealso:: + + https://docs.getdbt.com/reference/warehouse-setups/vertica-setup + + https://airflow.apache.org/docs/apache-airflow-providers-vertica/stable/connections/vertica.html + + + +This profile mapping translates Airflow connections with the type ``vertica`` +into dbt profiles. To use this profile, import it from ``cosmos.profiles``: + +.. code-block:: python + + from cosmos.profiles import VerticaUserPasswordProfileMapping + + profile = VerticaUserPasswordProfileMapping( + conn_id = 'my_vertica_connection', + profile_args = { ... }, + ) + +While the profile mapping pulls fields from Airflow connections, you may need to supplement it +with additional ``profile_args``. The below table shows which fields are required, along with those +not required but pulled from the Airflow connection if present. You can also add additional fields +to the ``profile_args`` dict. + +.. list-table:: + :header-rows: 1 + + * - dbt Field Name + - Required + - Airflow Field Name + + + * - ``host`` + - True + + - ``host`` + + + * - ``username`` + - True + + - ``login`` + + + * - ``password`` + - True + + - ``password`` + + + * - ``port`` + - False + + - ``port`` + + + * - ``database`` + - True + + - ``schema`` + + + * - ``autocommit`` + - False + + - ``extra.autocommit`` + + + * - ``backup_server_node`` + - False + + - ``extra.backup_server_node`` + + + * - ``binary_transfer`` + - False + + - ``extra.binary_transfer`` + + + * - ``connection_load_balance`` + - False + + - ``extra.connection_load_balance`` + + + * - ``connection_timeout`` + - False + + - ``extra.connection_timeout`` + + + * - ``disable_copy_local`` + - False + + - ``extra.disable_copy_local`` + + + * - ``kerberos_host_name`` + - False + + - ``extra.kerberos_host_name`` + + + * - ``kerberos_service_name`` + - False + + - ``extra.kerberos_service_name`` + + + * - ``log_level`` + - False + + - ``extra.log_level`` + + + * - ``log_path`` + - False + + - ``extra.log_path`` + + + * - ``oauth_access_token`` + - False + + - ``extra.oauth_access_token`` + + + * - ``request_complex_types`` + - False + + - ``extra.request_complex_types`` + + + * - ``session_label`` + - False + + - ``extra.session_label`` + + + * - ``ssl`` + - False + + - ``extra.ssl`` + + + * - ``unicode_error`` + - False + + - ``extra.unicode_error`` + + + * - ``use_prepared_statements`` + - False + + - ``extra.use_prepared_statements`` + + + * - ``workload`` + - False + + - ``extra.workload`` + + + * - ``schema`` + - True + + - + + + + +Some notes about the table above: + +- This table doesn't necessarily show the full list of fields you *can* pass to the dbt profile. To + see the full list of fields, see the link to the dbt docs at the top of this page. +- If the Airflow field name starts with an ``extra.``, this means that the field is nested under + the ``extra`` field in the Airflow connection. For example, if the Airflow field name is + ``extra.token``, this means that the field is nested under ``extra`` in the Airflow connection, + and the field name is ``token``. +- If there are multiple Airflow field names, the profile mapping looks at those fields in order. + For example, if the Airflow field name is ``['password', 'extra.token']``, the profile mapping + will first look for a field named ``password``. If that field is not present, it will look for + ``extra.token``. diff --git a/docs/reference/profiles/index.rst b/docs/reference/profiles/index.rst new file mode 100644 index 0000000000..fa98cc9835 --- /dev/null +++ b/docs/reference/profiles/index.rst @@ -0,0 +1,251 @@ +.. + This file is autogenerated by ``docs/scripts/generate_mappings.py``. Do not edit by hand. + +.. toctree:: + :caption: Profiles + + self + AthenaAccessKey + ClickhouseUserPassword + GoogleCloudServiceAccountFile + GoogleCloudServiceAccountDict + GoogleCloudOauth + DatabricksToken + DatabricksOauth + DuckDBUserPassword + MysqlUserPassword + OracleUserPassword + PostgresUserPassword + RedshiftUserPassword + SnowflakeUserPassword + SnowflakeEncryptedPrivateKeyFilePem + SnowflakeEncryptedPrivateKeyPem + SnowflakePrivateKeyPem + StarrocksUserPassword + SparkThrift + ExasolUserPassword + TeradataUserPassword + TrinoLDAP + TrinoCertificate + TrinoJWT + VerticaUserPassword + StandardSQLServerAuth + + +Profiles Overview +================== + +Cosmos supports two methods of authenticating with your database: + +- using your own dbt profiles.yml file +- using Airflow connections via Cosmos' profile mappings + +If you're already interacting with your database from Airflow and have a connection set up, it's recommended +to use a profile mapping to translate that Airflow connection to a dbt profile. This is because it's easier to +maintain a single connection object in Airflow than it is to maintain a connection object in Airflow and a dbt profile +in your dbt project. + +If you don't already have an Airflow connection, or if there's no readily-available profile mapping for your database, +you can use your own dbt profiles.yml file. + +Regardless of which method you use, you'll need to tell Cosmos which profile and target name it should use. Profile config +is set in the ``cosmos.config.ProfileConfig`` object, like so: + +.. code-block:: python + + from cosmos.config import ProfileConfig + + profile_config = ProfileConfig( + profile_name="my_profile_name", + target_name="my_target_name", + + # choose one of the following + profile_mapping=..., + profiles_yml_filepath=..., + ) + + dag = DbtDag(profile_config=profile_config, ...) + + +Using your own profiles.yml file +++++++++++++++++++++++++++++++++ + +If you don't want to use Airflow connections, or if there's no readily-available profile mapping for your database, +you can use your own dbt profiles.yml file. To do so, you'll need to pass the path to your profiles.yml file to the +``profiles_yml_filepath`` argument in ``ProfileConfig``. + +For example, the code snippet below points Cosmos at a ``profiles.yml`` file and instructs Cosmos to use the +``my_snowflake_profile`` profile and ``dev`` target: + +.. code-block:: python + + from cosmos.config import ProfileConfig + + profile_config = ProfileConfig( + profile_name="my_snowflake_profile", + target_name="dev", + profiles_yml_filepath="/path/to/profiles.yml", + ) + + dag = DbtDag(profile_config=profile_config, ...) + +Using a profile mapping ++++++++++++++++++++++++ + +Profile mappings are utilities provided by Cosmos that translate Airflow connections to dbt profiles. This means that +you can use the same connection objects you use in Airflow to authenticate with your database in dbt. To do so, there's +a class in Cosmos for each Airflow connection to dbt profile mapping. + +You can find the available profile mappings on the left-hand side of this page. Each profile mapping is imported from +``cosmos.profiles`` and takes two arguments: + +* ``conn_id``: the Airflow connection ID to use. +* ``profile_args``: a dictionary of additional arguments to pass to the dbt profile. This is useful for specifying + values that are not in the Airflow connection. This also acts as an override for any values that are in the Airflow + connection but should be overridden. + +Below is an example of using the Snowflake profile mapping, where we take most arguments from the Airflow connection +but override the ``database`` and ``schema`` values: + +.. code-block:: python + + from cosmos.profiles import SnowflakeUserPasswordProfileMapping + + profile_config = ProfileConfig( + profile_name="my_profile_name", + target_name="my_target_name", + profile_mapping=SnowflakeUserPasswordProfileMapping( + conn_id="my_snowflake_conn_id", + profile_args={ + "database": "my_snowflake_database", + "schema": "my_snowflake_schema", + }, + ), + ) + + dag = DbtDag(profile_config=profile_config, ...) + +Note that when using a profile mapping, the profiles.yml file gets generated with the profile name and target name +you specify in ``ProfileConfig``. + +.. _profile-customise-per-node: + +Customising the profile config per dbt node ++++++++++++++++++++++++++++++++++++++++++++ +.. versionadded:: 1.9.0 + + +Since Cosmos 1.9.0, it is possible to customise which profile is used per dbt node. This works both when using a +``profile_mapping`` class or when using ``profiles_yml_filepath``. + +Let's say the user configures the profile at a ``DbtDag`` or ``DbtTaskGroup`` level as: + +.. code-block:: python + + from cosmos.profiles import PostgresUserPasswordProfileMapping + + profile_config = ProfileConfig( + profile_name="default_profile", + target_name="default_target", + profile_mapping=PostgresUserPasswordProfileMapping( + conn_id="default_conn", + profile_args={"schema": "default_schema"}, + ), + ) + +But that for a specific node or group of nodes, the user would like to replace: + +* ``profile_name`` to be "non_default_profile" as opposed to "default_profile" +* ``target_name`` to be "stage" as opposed to "default_target" +* ``conn_id`` to be "non_default_connection" as opposed to "default_conn" +* ``schema`` to be "non_default_schema" as opposed to "default_schema" + +They could apply this different configuration to all the project seeds by doing: + +.. code-block:: + + seeds: + my_dbt_project: + +meta: + cosmos: + profile_config: + profile_name: non_default_profile + target_name: stage + profile_mapping: + conn_id: non_default_connection + profile_args: + schema: non_default_schema + +This same mechanism works per individual dbt nodes, as discussed in :ref:`operator-args-per-node`, +to subsets of nodes selected based on path or other criteria that dbt supports. + + +Dbt profile config variables +---------------------------- +.. versionadded:: 1.4.0 + +The parts of ``profiles.yml``, which aren't specific to a particular data platform `dbt docs `_ + +.. code-block:: python + + from cosmos.profiles import SnowflakeUserPasswordProfileMapping, DbtProfileConfigVars + + profile_config = ProfileConfig( + profile_name="my_profile_name", + target_name="my_target_name", + profile_mapping=SnowflakeUserPasswordProfileMapping( + conn_id="my_snowflake_conn_id", + profile_args={ + "database": "my_snowflake_database", + "schema": "my_snowflake_schema", + }, + dbt_config_vars=DbtProfileConfigVars( + send_anonymous_usage_stats=False, + partial_parse=True, + use_experimental_parse=True, + static_parser=True, + printer_width=120, + write_json=True, + warn_error=True, + warn_error_options={"include": "all"}, + log_format='text', + debug=True, + version_check=True, + ), + ), + ) + + dag = DbtDag(profile_config=profile_config, ...) + + +Disabling dbt event tracking +++++++++++++++++++++++++++++ + +.. note: + Deprecated in v.1.4 and will be removed in v2.0.0. Use dbt_config_vars=DbtProfileConfigVars(send_anonymous_usage_stats=False) instead. +.. versionadded:: 1.3 + +By default `dbt will track events `_ by sending anonymous usage data +when dbt commands are invoked. Users have an option to opt out of event tracking by updating their ``profiles.yml`` file. + +If you'd like to disable this behavior in the Cosmos generated profile, you can pass ``disable_event_tracking=True`` to the profile mapping like in +the example below: + +.. code-block:: python + + from cosmos.profiles import SnowflakeUserPasswordProfileMapping + + profile_config = ProfileConfig( + profile_name="my_profile_name", + target_name="my_target_name", + profile_mapping=SnowflakeUserPasswordProfileMapping( + conn_id="my_snowflake_conn_id", + profile_args={ + "database": "my_snowflake_database", + "schema": "my_snowflake_schema", + }, + disable_event_tracking=True, + ), + ) + + dag = DbtDag(profile_config=profile_config, ...) \ No newline at end of file diff --git a/docs/templates/index.rst.jinja2 b/docs/reference/templates/index.rst.jinja2 similarity index 98% rename from docs/templates/index.rst.jinja2 rename to docs/reference/templates/index.rst.jinja2 index 87285565c3..65bc676c4d 100644 --- a/docs/templates/index.rst.jinja2 +++ b/docs/reference/templates/index.rst.jinja2 @@ -1,5 +1,5 @@ .. - This file is autogenerated by `docs/scripts/generate_mappings.py`. Do not edit by hand. + This file is autogenerated by ``docs/scripts/generate_mappings.py``. Do not edit by hand. .. toctree:: :caption: Profiles diff --git a/docs/templates/profile_mapping.rst.jinja2 b/docs/reference/templates/profile_mapping.rst.jinja2 similarity index 96% rename from docs/templates/profile_mapping.rst.jinja2 rename to docs/reference/templates/profile_mapping.rst.jinja2 index c5b25b48b1..2154f778ed 100644 --- a/docs/templates/profile_mapping.rst.jinja2 +++ b/docs/reference/templates/profile_mapping.rst.jinja2 @@ -1,5 +1,5 @@ .. - This file is autogenerated by `docs/scripts/generate_mappings.py`. Do not edit by hand. + This file is autogenerated by ``docs/scripts/generate_mappings.py``. Do not edit by hand. {{ mapping_name }}