From da7fa77330046f0d4bfa3bac5638d074c6e3daa8 Mon Sep 17 00:00:00 2001 From: woogakoki Date: Thu, 30 Nov 2023 08:51:05 +0100 Subject: [PATCH 01/23] adds LoadMode dbt_ls_file to render tasks using dbt ls file path instead of running dbt ls localy --- cosmos/config.py | 10 ++++++++++ cosmos/constants.py | 1 + cosmos/dbt/graph.py | 26 ++++++++++++++++++++++++++ 3 files changed, 37 insertions(+) diff --git a/cosmos/config.py b/cosmos/config.py index 40756d2bb7..c7f133415d 100644 --- a/cosmos/config.py +++ b/cosmos/config.py @@ -269,8 +269,18 @@ class ExecutionConfig: dbt_executable_path: str | Path = field(default_factory=get_system_dbt) dbt_project_path: InitVar[str | Path | None] = None + dbt_ls_path: Path | None = None project_path: Path | None = field(init=False) def __post_init__(self, dbt_project_path: str | Path | None) -> None: self.project_path = Path(dbt_project_path) if dbt_project_path else None + + def is_dbt_ls_file_available(self) -> bool: + """ + Check if the `dbt ls` output is set and if the file exists. + """ + if not self.dbt_ls_path: + return False + + return self.dbt_ls_path.exists() diff --git a/cosmos/constants.py b/cosmos/constants.py index 9aa38c34e6..96c5bdd070 100644 --- a/cosmos/constants.py +++ b/cosmos/constants.py @@ -27,6 +27,7 @@ class LoadMode(Enum): AUTOMATIC = "automatic" CUSTOM = "custom" DBT_LS = "dbt_ls" + DBT_LS_FILE = "dbt_ls_file" DBT_MANIFEST = "dbt_manifest" diff --git a/cosmos/dbt/graph.py b/cosmos/dbt/graph.py index a890c137c9..e9160b7249 100644 --- a/cosmos/dbt/graph.py +++ b/cosmos/dbt/graph.py @@ -161,6 +161,7 @@ def load( load_method = { LoadMode.CUSTOM: self.load_via_custom_parser, LoadMode.DBT_LS: self.load_via_dbt_ls, + LoadMode.DBT_LS_FILE: self.load_via_dbt_ls_file, LoadMode.DBT_MANIFEST: self.load_from_dbt_manifest, } @@ -272,6 +273,31 @@ def load_via_dbt_ls(self) -> None: logger.info("Total nodes: %i", len(self.nodes)) logger.info("Total filtered nodes: %i", len(self.nodes)) + def load_via_dbt_ls_file(self) -> None: + """ + This is between dbt ls and full manifest. It allows to use the output of the dbt ls as a file stored in the image + you run Cosmos on. The advantage is that you can use the parser from LoadMode.DBT_LS without actually running dbt ls + every time. + + This technically should increase performance and also removes the necessity to have your whole dbt project copied + to the airflow image. + """ + logger.info("Trying to parse the dbt project `%s` using a dbt ls output file...", self.project.project_name) + + if not self.execution_config.is_dbt_ls_file_available(): + raise CosmosLoadDbtException(f"Unable to load dbt ls file using {self.execution_config.dbt_ls_path}") + + if not self.execution_config.project_path: + raise CosmosLoadDbtException("Unable to load dbt ls file without ExecutionConfig.dbt_project_path") + + with open(self.execution_config.dbt_ls_path) as fp: # type: ignore[arg-type] + dbt_ls_output = fp.read() + project_path = self.execution_config.project_path + nodes = parse_dbt_ls_output(project_path=project_path, ls_stdout=dbt_ls_output) + + self.nodes = nodes + self.filtered_nodes = nodes + def load_via_custom_parser(self) -> None: """ This is the least accurate way of loading `dbt` projects and filtering them out, since it uses custom Cosmos From e12eb17cb25eedd82f0d0b047b3a633f7d226b51 Mon Sep 17 00:00:00 2001 From: woogakoki Date: Fri, 1 Dec 2023 08:42:02 +0100 Subject: [PATCH 02/23] moves dbt_ls_path to render config adds some test and documentation --- cosmos/config.py | 18 +++---- cosmos/dbt/graph.py | 10 ++-- docs/configuration/parsing-methods.rst | 19 +++++++ tests/dbt/test_graph.py | 69 +++++++++++++++++++++++++- tests/sample/sample_dbt_ls.txt | 6 +++ 5 files changed, 106 insertions(+), 16 deletions(-) create mode 100644 tests/sample/sample_dbt_ls.txt diff --git a/cosmos/config.py b/cosmos/config.py index c7f133415d..f477de1a7e 100644 --- a/cosmos/config.py +++ b/cosmos/config.py @@ -56,6 +56,7 @@ class RenderConfig: dbt_executable_path: str | Path = get_system_dbt() env_vars: dict[str, str] = field(default_factory=dict) dbt_project_path: InitVar[str | Path | None] = None + dbt_ls_path: Path | None = None project_path: Path | None = field(init=False) @@ -83,6 +84,14 @@ def validate_dbt_command(self, fallback_cmd: str | Path = "") -> None: f"<{self.dbt_executable_path}>" + (f" and <{fallback_cmd}>." if fallback_cmd else ".") ) + def is_dbt_ls_file_available(self) -> bool: + """ + Check if the `dbt ls` output is set and if the file exists. + """ + if not self.dbt_ls_path: + return False + + return self.dbt_ls_path.exists() class ProjectConfig: """ @@ -269,18 +278,9 @@ class ExecutionConfig: dbt_executable_path: str | Path = field(default_factory=get_system_dbt) dbt_project_path: InitVar[str | Path | None] = None - dbt_ls_path: Path | None = None - project_path: Path | None = field(init=False) def __post_init__(self, dbt_project_path: str | Path | None) -> None: self.project_path = Path(dbt_project_path) if dbt_project_path else None - def is_dbt_ls_file_available(self) -> bool: - """ - Check if the `dbt ls` output is set and if the file exists. - """ - if not self.dbt_ls_path: - return False - return self.dbt_ls_path.exists() diff --git a/cosmos/dbt/graph.py b/cosmos/dbt/graph.py index e9160b7249..d7218b716b 100644 --- a/cosmos/dbt/graph.py +++ b/cosmos/dbt/graph.py @@ -275,22 +275,22 @@ def load_via_dbt_ls(self) -> None: def load_via_dbt_ls_file(self) -> None: """ - This is between dbt ls and full manifest. It allows to use the output of the dbt ls as a file stored in the image - you run Cosmos on. The advantage is that you can use the parser from LoadMode.DBT_LS without actually running dbt ls - every time. + This is between dbt ls and full manifest. It allows to use the output (needs to be json output) of the dbt ls as a + file stored in the image you run Cosmos on. The advantage is that you can use the parser from LoadMode.DBT_LS without + actually running dbt ls every time. BUT you will need one dbt ls file for each separate group. This technically should increase performance and also removes the necessity to have your whole dbt project copied to the airflow image. """ logger.info("Trying to parse the dbt project `%s` using a dbt ls output file...", self.project.project_name) - if not self.execution_config.is_dbt_ls_file_available(): + if not self.render_config.is_dbt_ls_file_available(): raise CosmosLoadDbtException(f"Unable to load dbt ls file using {self.execution_config.dbt_ls_path}") if not self.execution_config.project_path: raise CosmosLoadDbtException("Unable to load dbt ls file without ExecutionConfig.dbt_project_path") - with open(self.execution_config.dbt_ls_path) as fp: # type: ignore[arg-type] + with open(self.render_config.dbt_ls_path) as fp: # type: ignore[arg-type] dbt_ls_output = fp.read() project_path = self.execution_config.project_path nodes = parse_dbt_ls_output(project_path=project_path, ls_stdout=dbt_ls_output) diff --git a/docs/configuration/parsing-methods.rst b/docs/configuration/parsing-methods.rst index fbf6e43bfd..1fe535f05c 100644 --- a/docs/configuration/parsing-methods.rst +++ b/docs/configuration/parsing-methods.rst @@ -8,12 +8,14 @@ Cosmos offers several options to parse your dbt project: - ``automatic``. Tries to find a user-supplied ``manifest.json`` file. If it can't find one, it will run ``dbt ls`` to generate one. If that fails, it will use Cosmos' dbt parser. - ``dbt_manifest``. Parses a user-supplied ``manifest.json`` file. This can be generated manually with dbt commands or via a CI/CD process. - ``dbt_ls``. Parses a dbt project directory using the ``dbt ls`` command. +- ``dbt_ls_file``. Parses a dbt project directory using the output of ``dbt ls`` command from a file. - ``custom``. Uses Cosmos' custom dbt parser, which extracts dependencies from your dbt's model code. There are benefits and drawbacks to each method: - ``dbt_manifest``: You have to generate the manifest file on your own. When using the manifest, Cosmos gets a complete set of metadata about your models. However, Cosmos uses its own selecting & excluding logic to determine which models to run, which may not be as robust as dbt's. - ``dbt_ls``: Cosmos will generate the manifest file for you. This method uses dbt's metadata AND dbt's selecting/excluding logic. This is the most robust method. However, this requires the dbt executable to be installed on your machine (either on the host directly or in a virtual environment). +- ``dbt_ls_file``: You have to generate a file of any specific ``dbt ls``command (needs to be using ``--output json``). It ususes the same parsing logic as ``dbt_ls``, but you will need to provide one file for each group of selected models you want. - ``custom``: Cosmos will parse your project and model files for you. This means that Cosmos will not have access to dbt's metadata. However, this method does not require the dbt executable to be installed on your machine. If you're using the ``local`` mode, you should use the ``dbt_ls`` method. @@ -75,6 +77,23 @@ To use this: # ..., ) +``dbt_ls_file`` +---------------- + +If you provide the output of ``dbt ls --output json`` as a file, you can use this to parse similar to ``dbt_ls``. +You can supply a ``dbt_ls_path`` parameter on the DbtDag / DbtTaskGroup with a path to a ``dbt_ls_output.txt`` file. + +To use this: + +.. code-block:: python + + DbtDag( + render_config=RenderConfig( + load_method=LoadMode.DBT_MANIFEST, + dbt_ls_path="/path/to/dbt_ls_file.txt" + ) + # ..., + ) ``custom`` ---------- diff --git a/tests/dbt/test_graph.py b/tests/dbt/test_graph.py index a424976a10..093722cfa5 100644 --- a/tests/dbt/test_graph.py +++ b/tests/dbt/test_graph.py @@ -23,7 +23,7 @@ SAMPLE_MANIFEST_PY = Path(__file__).parent.parent / "sample/manifest_python.json" SAMPLE_MANIFEST_MODEL_VERSION = Path(__file__).parent.parent / "sample/manifest_model_version.json" SAMPLE_MANIFEST_SOURCE = Path(__file__).parent.parent / "sample/manifest_source.json" - +SAMPLE_DBT_LS_OUTPUT = Path(__file__).parent.parent / "sample/sample_dbt_ls.txt" @pytest.fixture def tmp_dbt_project_dir(): @@ -110,6 +110,28 @@ def test_load_automatic_manifest_is_available(mock_load_from_dbt_manifest): assert mock_load_from_dbt_manifest.called +@patch("cosmos.dbt.graph.DbtGraph.load_via_dbt_ls_file", return_value=None) +def test_load_automatic_dbt_ls_file_is_available(mock_load_via_dbt_ls_file): + project_config = ProjectConfig( + dbt_project_path=DBT_PROJECTS_ROOT_DIR / DBT_PROJECT_NAME + ) + profile_config = ProfileConfig( + profile_name="test", + target_name="test", + profiles_yml_filepath=DBT_PROJECTS_ROOT_DIR / DBT_PROJECT_NAME / "profiles.yml", + ) + render_config = RenderConfig( + dbt_ls_path=SAMPLE_DBT_LS_OUTPUT + ) + dbt_graph = DbtGraph( + project=project_config, + profile_config=profile_config, + render_config=render_config + ) + dbt_graph.load(method=LoadMode.DBT_LS_FILE,execution_mode=ExecutionMode.LOCAL) + assert mock_load_via_dbt_ls_file.called + + @patch("cosmos.dbt.graph.DbtGraph.load_via_custom_parser", side_effect=None) @patch("cosmos.dbt.graph.DbtGraph.load_via_dbt_ls", return_value=None) def test_load_automatic_without_manifest_with_profile_yml(mock_load_via_dbt_ls, mock_load_via_custom_parser): @@ -200,8 +222,9 @@ def test_load_manifest_with_manifest(mock_load_from_dbt_manifest): @patch("cosmos.dbt.graph.DbtGraph.load_via_custom_parser", return_value=None) @patch("cosmos.dbt.graph.DbtGraph.load_via_dbt_ls", return_value=None) @patch("cosmos.dbt.graph.DbtGraph.load_from_dbt_manifest", return_value=None) +@patch("cosmos.dbt.graph.DbtGraph.load_via_dbt_ls_file", return_value=None) def test_load( - mock_load_from_dbt_manifest, mock_load_via_dbt_ls, mock_load_via_custom_parser, exec_mode, method, expected_function + mock_load_from_dbt_manifest, mock_load_via_dbt_ls_file, mock_load_via_dbt_ls, mock_load_via_custom_parser, exec_mode, method, expected_function ): project_config = ProjectConfig(dbt_project_path=DBT_PROJECTS_ROOT_DIR / DBT_PROJECT_NAME) profile_config = ProfileConfig( @@ -678,6 +701,48 @@ def test_load_dbt_ls_and_manifest_with_model_version(load_method): "model.jaffle_shop.stg_payments", } == set(dbt_graph.nodes["model.jaffle_shop.orders"].depends_on) +@pytest.mark.integration +def test_load_via_dbt_ls_file(): + project_config = ProjectConfig( + dbt_project_path=DBT_PROJECTS_ROOT_DIR / DBT_PROJECT_NAME + ) + profile_config = ProfileConfig( + profile_name="test", + target_name="test", + profiles_yml_filepath=DBT_PROJECTS_ROOT_DIR / DBT_PROJECT_NAME / "profiles.yml", + ) + execution_config = ExecutionConfig( + dbt_project_path=DBT_PROJECTS_ROOT_DIR / DBT_PROJECT_NAME + ) + render_config = RenderConfig( + dbt_ls_path=SAMPLE_DBT_LS_OUTPUT + ) + dbt_graph = DbtGraph( + project=project_config, + profile_config=profile_config, + execution_config=execution_config, + render_config=render_config + ) + dbt_graph.load(method=LoadMode.DBT_LS_FILE, execution_mode=ExecutionMode.LOCAL) + + expected_dbt_nodes = { + "model.jaffle_shop.stg_customers": "stg_customers", + "model.jaffle_shop.stg_orders": "stg_orders", + "model.jaffle_shop.stg_payments": "stg_payments", + } + for unique_id, name in expected_dbt_nodes.items(): + assert unique_id in dbt_graph.nodes + assert name == dbt_graph.nodes[unique_id].name + # Test dependencies + assert { + "seed.jaffle_shop.raw_customers" + } == set(dbt_graph.nodes["model.jaffle_shop.stg_customers"].depends_on) + assert { + "seed.jaffle_shop.raw_orders" + } == set(dbt_graph.nodes["model.jaffle_shop.stg_orders"].depends_on) + assert { + "seed.jaffle_shop.raw_payments" + } == set(dbt_graph.nodes["model.jaffle_shop.stg_payments"].depends_on) @pytest.mark.parametrize( "stdout,returncode", diff --git a/tests/sample/sample_dbt_ls.txt b/tests/sample/sample_dbt_ls.txt new file mode 100644 index 0000000000..b356a5208c --- /dev/null +++ b/tests/sample/sample_dbt_ls.txt @@ -0,0 +1,6 @@ +14:26:04 Running with dbt=1.6.9 +14:26:04 Registered adapter: exasol=1.6.2 +14:26:04 Found 5 models, 3 seeds, 20 tests, 0 sources, 0 exposures, 0 metrics, 366 macros, 0 groups, 0 semantic models +{"name": "stg_customers", "resource_type": "model", "package_name": "jaffle_shop", "original_file_path": "models/staging/stg_customers.sql", "unique_id": "model.jaffle_shop.stg_customers", "alias": "stg_customers", "config": {"enabled": true, "alias": null, "schema": null, "database": null, "tags": [], "meta": {}, "group": null, "materialized": "view", "incremental_strategy": null, "persist_docs": {}, "quoting": {}, "column_types": {}, "full_refresh": null, "unique_key": null, "on_schema_change": "ignore", "on_configuration_change": "apply", "grants": {}, "packages": [], "docs": {"show": true, "node_color": null}, "contract": {"enforced": false}, "post-hook": [], "pre-hook": []}, "tags": [], "depends_on": {"macros": [], "nodes": ["seed.jaffle_shop.raw_customers"]}} +{"name": "stg_orders", "resource_type": "model", "package_name": "jaffle_shop", "original_file_path": "models/staging/stg_orders.sql", "unique_id": "model.jaffle_shop.stg_orders", "alias": "stg_orders", "config": {"enabled": true, "alias": null, "schema": null, "database": null, "tags": [], "meta": {}, "group": null, "materialized": "view", "incremental_strategy": null, "persist_docs": {}, "quoting": {}, "column_types": {}, "full_refresh": null, "unique_key": null, "on_schema_change": "ignore", "on_configuration_change": "apply", "grants": {}, "packages": [], "docs": {"show": true, "node_color": null}, "contract": {"enforced": false}, "post-hook": [], "pre-hook": []}, "tags": [], "depends_on": {"macros": [], "nodes": ["seed.jaffle_shop.raw_orders"]}} +{"name": "stg_payments", "resource_type": "model", "package_name": "jaffle_shop", "original_file_path": "models/staging/stg_payments.sql", "unique_id": "model.jaffle_shop.stg_payments", "alias": "stg_payments", "config": {"enabled": true, "alias": null, "schema": null, "database": null, "tags": [], "meta": {}, "group": null, "materialized": "view", "incremental_strategy": null, "persist_docs": {}, "quoting": {}, "column_types": {}, "full_refresh": null, "unique_key": null, "on_schema_change": "ignore", "on_configuration_change": "apply", "grants": {}, "packages": [], "docs": {"show": true, "node_color": null}, "contract": {"enforced": false}, "post-hook": [], "pre-hook": []}, "tags": [], "depends_on": {"macros": [], "nodes": ["seed.jaffle_shop.raw_payments"]}} From 93c1557dc58d9202110488470dd6079e7542b7d1 Mon Sep 17 00:00:00 2001 From: woogakoki Date: Fri, 1 Dec 2023 08:46:48 +0100 Subject: [PATCH 03/23] adds missing space in documentation --- docs/configuration/parsing-methods.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/configuration/parsing-methods.rst b/docs/configuration/parsing-methods.rst index 1fe535f05c..1281dd3800 100644 --- a/docs/configuration/parsing-methods.rst +++ b/docs/configuration/parsing-methods.rst @@ -15,7 +15,7 @@ There are benefits and drawbacks to each method: - ``dbt_manifest``: You have to generate the manifest file on your own. When using the manifest, Cosmos gets a complete set of metadata about your models. However, Cosmos uses its own selecting & excluding logic to determine which models to run, which may not be as robust as dbt's. - ``dbt_ls``: Cosmos will generate the manifest file for you. This method uses dbt's metadata AND dbt's selecting/excluding logic. This is the most robust method. However, this requires the dbt executable to be installed on your machine (either on the host directly or in a virtual environment). -- ``dbt_ls_file``: You have to generate a file of any specific ``dbt ls``command (needs to be using ``--output json``). It ususes the same parsing logic as ``dbt_ls``, but you will need to provide one file for each group of selected models you want. +- ``dbt_ls_file``: You have to generate a file of any specific ``dbt ls`` command (needs to be using ``--output json``). It ususes the same parsing logic as ``dbt_ls``, but you will need to provide one file for each group of selected models you want. - ``custom``: Cosmos will parse your project and model files for you. This means that Cosmos will not have access to dbt's metadata. However, this method does not require the dbt executable to be installed on your machine. If you're using the ``local`` mode, you should use the ``dbt_ls`` method. From 4c1e37b12796b0d8f8cec0b7dcc216dd0fe28f0a Mon Sep 17 00:00:00 2001 From: woogakoki Date: Fri, 1 Dec 2023 09:46:42 +0100 Subject: [PATCH 04/23] points out the advantage of new load method more --- docs/configuration/parsing-methods.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/configuration/parsing-methods.rst b/docs/configuration/parsing-methods.rst index 1281dd3800..64ff611ec8 100644 --- a/docs/configuration/parsing-methods.rst +++ b/docs/configuration/parsing-methods.rst @@ -15,7 +15,7 @@ There are benefits and drawbacks to each method: - ``dbt_manifest``: You have to generate the manifest file on your own. When using the manifest, Cosmos gets a complete set of metadata about your models. However, Cosmos uses its own selecting & excluding logic to determine which models to run, which may not be as robust as dbt's. - ``dbt_ls``: Cosmos will generate the manifest file for you. This method uses dbt's metadata AND dbt's selecting/excluding logic. This is the most robust method. However, this requires the dbt executable to be installed on your machine (either on the host directly or in a virtual environment). -- ``dbt_ls_file``: You have to generate a file of any specific ``dbt ls`` command (needs to be using ``--output json``). It ususes the same parsing logic as ``dbt_ls``, but you will need to provide one file for each group of selected models you want. +- ``dbt_ls_file``: You have to generate a file of any specific ``dbt ls`` command (needs to be using ``--output json``). Advantage of this method is that you use dbt's internal selecting/excluding logic as it uses the same parsing as ``dbt_ls``, but you will need to provide one file for each group of selected models you want. - ``custom``: Cosmos will parse your project and model files for you. This means that Cosmos will not have access to dbt's metadata. However, this method does not require the dbt executable to be installed on your machine. If you're using the ``local`` mode, you should use the ``dbt_ls`` method. From 20ea7b06c757b007b7e7bf418606a97c68125288 Mon Sep 17 00:00:00 2001 From: woogakoki Date: Fri, 1 Dec 2023 09:56:19 +0100 Subject: [PATCH 05/23] fix exception message --- cosmos/dbt/graph.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cosmos/dbt/graph.py b/cosmos/dbt/graph.py index d7218b716b..a7c5eec2c5 100644 --- a/cosmos/dbt/graph.py +++ b/cosmos/dbt/graph.py @@ -285,7 +285,7 @@ def load_via_dbt_ls_file(self) -> None: logger.info("Trying to parse the dbt project `%s` using a dbt ls output file...", self.project.project_name) if not self.render_config.is_dbt_ls_file_available(): - raise CosmosLoadDbtException(f"Unable to load dbt ls file using {self.execution_config.dbt_ls_path}") + raise CosmosLoadDbtException(f"Unable to load dbt ls file using {self.render_config.dbt_ls_path}") if not self.execution_config.project_path: raise CosmosLoadDbtException("Unable to load dbt ls file without ExecutionConfig.dbt_project_path") From 92accdbf40c6e64c83cc6420bd96091a5a1c933f Mon Sep 17 00:00:00 2001 From: woogakoki Date: Tue, 5 Dec 2023 15:49:30 +0100 Subject: [PATCH 06/23] fix pre-commit --- cosmos/config.py | 3 +-- cosmos/dbt/graph.py | 6 ++--- tests/dbt/test_graph.py | 53 ++++++++++++++++------------------------- 3 files changed, 25 insertions(+), 37 deletions(-) diff --git a/cosmos/config.py b/cosmos/config.py index f477de1a7e..d1150562c5 100644 --- a/cosmos/config.py +++ b/cosmos/config.py @@ -93,6 +93,7 @@ def is_dbt_ls_file_available(self) -> bool: return self.dbt_ls_path.exists() + class ProjectConfig: """ Class for setting project config. @@ -282,5 +283,3 @@ class ExecutionConfig: def __post_init__(self, dbt_project_path: str | Path | None) -> None: self.project_path = Path(dbt_project_path) if dbt_project_path else None - - diff --git a/cosmos/dbt/graph.py b/cosmos/dbt/graph.py index a7c5eec2c5..0b619c189e 100644 --- a/cosmos/dbt/graph.py +++ b/cosmos/dbt/graph.py @@ -275,8 +275,8 @@ def load_via_dbt_ls(self) -> None: def load_via_dbt_ls_file(self) -> None: """ - This is between dbt ls and full manifest. It allows to use the output (needs to be json output) of the dbt ls as a - file stored in the image you run Cosmos on. The advantage is that you can use the parser from LoadMode.DBT_LS without + This is between dbt ls and full manifest. It allows to use the output (needs to be json output) of the dbt ls as a + file stored in the image you run Cosmos on. The advantage is that you can use the parser from LoadMode.DBT_LS without actually running dbt ls every time. BUT you will need one dbt ls file for each separate group. This technically should increase performance and also removes the necessity to have your whole dbt project copied @@ -294,7 +294,7 @@ def load_via_dbt_ls_file(self) -> None: dbt_ls_output = fp.read() project_path = self.execution_config.project_path nodes = parse_dbt_ls_output(project_path=project_path, ls_stdout=dbt_ls_output) - + self.nodes = nodes self.filtered_nodes = nodes diff --git a/tests/dbt/test_graph.py b/tests/dbt/test_graph.py index 093722cfa5..c75886f10e 100644 --- a/tests/dbt/test_graph.py +++ b/tests/dbt/test_graph.py @@ -25,6 +25,7 @@ SAMPLE_MANIFEST_SOURCE = Path(__file__).parent.parent / "sample/manifest_source.json" SAMPLE_DBT_LS_OUTPUT = Path(__file__).parent.parent / "sample/sample_dbt_ls.txt" + @pytest.fixture def tmp_dbt_project_dir(): """ @@ -112,23 +113,15 @@ def test_load_automatic_manifest_is_available(mock_load_from_dbt_manifest): @patch("cosmos.dbt.graph.DbtGraph.load_via_dbt_ls_file", return_value=None) def test_load_automatic_dbt_ls_file_is_available(mock_load_via_dbt_ls_file): - project_config = ProjectConfig( - dbt_project_path=DBT_PROJECTS_ROOT_DIR / DBT_PROJECT_NAME - ) + project_config = ProjectConfig(dbt_project_path=DBT_PROJECTS_ROOT_DIR / DBT_PROJECT_NAME) profile_config = ProfileConfig( profile_name="test", target_name="test", profiles_yml_filepath=DBT_PROJECTS_ROOT_DIR / DBT_PROJECT_NAME / "profiles.yml", ) - render_config = RenderConfig( - dbt_ls_path=SAMPLE_DBT_LS_OUTPUT - ) - dbt_graph = DbtGraph( - project=project_config, - profile_config=profile_config, - render_config=render_config - ) - dbt_graph.load(method=LoadMode.DBT_LS_FILE,execution_mode=ExecutionMode.LOCAL) + render_config = RenderConfig(dbt_ls_path=SAMPLE_DBT_LS_OUTPUT) + dbt_graph = DbtGraph(project=project_config, profile_config=profile_config, render_config=render_config) + dbt_graph.load(method=LoadMode.DBT_LS_FILE, execution_mode=ExecutionMode.LOCAL) assert mock_load_via_dbt_ls_file.called @@ -224,7 +217,13 @@ def test_load_manifest_with_manifest(mock_load_from_dbt_manifest): @patch("cosmos.dbt.graph.DbtGraph.load_from_dbt_manifest", return_value=None) @patch("cosmos.dbt.graph.DbtGraph.load_via_dbt_ls_file", return_value=None) def test_load( - mock_load_from_dbt_manifest, mock_load_via_dbt_ls_file, mock_load_via_dbt_ls, mock_load_via_custom_parser, exec_mode, method, expected_function + mock_load_from_dbt_manifest, + mock_load_via_dbt_ls_file, + mock_load_via_dbt_ls, + mock_load_via_custom_parser, + exec_mode, + method, + expected_function, ): project_config = ProjectConfig(dbt_project_path=DBT_PROJECTS_ROOT_DIR / DBT_PROJECT_NAME) profile_config = ProfileConfig( @@ -701,27 +700,22 @@ def test_load_dbt_ls_and_manifest_with_model_version(load_method): "model.jaffle_shop.stg_payments", } == set(dbt_graph.nodes["model.jaffle_shop.orders"].depends_on) + @pytest.mark.integration def test_load_via_dbt_ls_file(): - project_config = ProjectConfig( - dbt_project_path=DBT_PROJECTS_ROOT_DIR / DBT_PROJECT_NAME - ) + project_config = ProjectConfig(dbt_project_path=DBT_PROJECTS_ROOT_DIR / DBT_PROJECT_NAME) profile_config = ProfileConfig( profile_name="test", target_name="test", profiles_yml_filepath=DBT_PROJECTS_ROOT_DIR / DBT_PROJECT_NAME / "profiles.yml", ) - execution_config = ExecutionConfig( - dbt_project_path=DBT_PROJECTS_ROOT_DIR / DBT_PROJECT_NAME - ) - render_config = RenderConfig( - dbt_ls_path=SAMPLE_DBT_LS_OUTPUT - ) + execution_config = ExecutionConfig(dbt_project_path=DBT_PROJECTS_ROOT_DIR / DBT_PROJECT_NAME) + render_config = RenderConfig(dbt_ls_path=SAMPLE_DBT_LS_OUTPUT) dbt_graph = DbtGraph( project=project_config, profile_config=profile_config, execution_config=execution_config, - render_config=render_config + render_config=render_config, ) dbt_graph.load(method=LoadMode.DBT_LS_FILE, execution_mode=ExecutionMode.LOCAL) @@ -734,15 +728,10 @@ def test_load_via_dbt_ls_file(): assert unique_id in dbt_graph.nodes assert name == dbt_graph.nodes[unique_id].name # Test dependencies - assert { - "seed.jaffle_shop.raw_customers" - } == set(dbt_graph.nodes["model.jaffle_shop.stg_customers"].depends_on) - assert { - "seed.jaffle_shop.raw_orders" - } == set(dbt_graph.nodes["model.jaffle_shop.stg_orders"].depends_on) - assert { - "seed.jaffle_shop.raw_payments" - } == set(dbt_graph.nodes["model.jaffle_shop.stg_payments"].depends_on) + assert {"seed.jaffle_shop.raw_customers"} == set(dbt_graph.nodes["model.jaffle_shop.stg_customers"].depends_on) + assert {"seed.jaffle_shop.raw_orders"} == set(dbt_graph.nodes["model.jaffle_shop.stg_orders"].depends_on) + assert {"seed.jaffle_shop.raw_payments"} == set(dbt_graph.nodes["model.jaffle_shop.stg_payments"].depends_on) + @pytest.mark.parametrize( "stdout,returncode", From 9f7ac1ca2e27ba072cf628b36e81e74933b2f979 Mon Sep 17 00:00:00 2001 From: woogakoki Date: Tue, 5 Dec 2023 15:51:55 +0100 Subject: [PATCH 07/23] fix pre-commit --- docs/configuration/parsing-methods.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/configuration/parsing-methods.rst b/docs/configuration/parsing-methods.rst index 64ff611ec8..aef3ecd433 100644 --- a/docs/configuration/parsing-methods.rst +++ b/docs/configuration/parsing-methods.rst @@ -89,8 +89,7 @@ To use this: DbtDag( render_config=RenderConfig( - load_method=LoadMode.DBT_MANIFEST, - dbt_ls_path="/path/to/dbt_ls_file.txt" + load_method=LoadMode.DBT_MANIFEST, dbt_ls_path="/path/to/dbt_ls_file.txt" ) # ..., ) From b8c4e83b55592402796b7c95717dd4ca558406df Mon Sep 17 00:00:00 2001 From: woogakoki Date: Thu, 7 Dec 2023 08:54:47 +0100 Subject: [PATCH 08/23] adds test coverage for missing files and paths --- tests/dbt/test_graph.py | 26 ++++++++++++++++++++++++++ tests/test_config.py | 15 +++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/tests/dbt/test_graph.py b/tests/dbt/test_graph.py index c75886f10e..c9a0465004 100644 --- a/tests/dbt/test_graph.py +++ b/tests/dbt/test_graph.py @@ -124,6 +124,32 @@ def test_load_automatic_dbt_ls_file_is_available(mock_load_via_dbt_ls_file): dbt_graph.load(method=LoadMode.DBT_LS_FILE, execution_mode=ExecutionMode.LOCAL) assert mock_load_via_dbt_ls_file.called +def test_load_dbt_ls_file_without_file(mock_load_via_dbt_ls_file): + project_config = ProjectConfig(dbt_project_path=DBT_PROJECTS_ROOT_DIR / DBT_PROJECT_NAME) + profile_config = ProfileConfig( + profile_name="test", + target_name="test", + profiles_yml_filepath=DBT_PROJECTS_ROOT_DIR / DBT_PROJECT_NAME / "profiles.yml", + ) + render_config = RenderConfig(dbt_ls_path=None) + dbt_graph = DbtGraph(project=project_config, profile_config=profile_config, render_config=render_config) + with pytest.raises(CosmosLoadDbtException) as err_info: + dbt_graph.load(execution_mode=ExecutionMode.LOCAL, method=LoadMode.DBT_LS_FILE) + assert err_info.value.args[0] == "Unable to load dbt ls file using None" + +def test_load_dbt_ls_file_without_project_path(mock_load_via_dbt_ls_file): + project_config = ProjectConfig(dbt_project_path=DBT_PROJECTS_ROOT_DIR / DBT_PROJECT_NAME) + profile_config = ProfileConfig( + profile_name="test", + target_name="test", + profiles_yml_filepath=DBT_PROJECTS_ROOT_DIR / DBT_PROJECT_NAME / "profiles.yml", + ) + render_config = RenderConfig(dbt_ls_path=SAMPLE_DBT_LS_OUTPUT) + execution_config =ExecutionConfig(dbt_project_path=None) + dbt_graph = DbtGraph(project=project_config, profile_config=profile_config, render_config=render_config, execution_config=execution_config) + with pytest.raises(CosmosLoadDbtException) as err_info: + dbt_graph.load(execution_mode=ExecutionMode.LOCAL, method=LoadMode.DBT_LS_FILE) + assert err_info.value.args[0] == "Unable to load dbt ls file without ExecutionConfig.dbt_project_path" @patch("cosmos.dbt.graph.DbtGraph.load_via_custom_parser", side_effect=None) @patch("cosmos.dbt.graph.DbtGraph.load_via_dbt_ls", return_value=None) diff --git a/tests/test_config.py b/tests/test_config.py index 578a68f760..5f7fca04d4 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -174,3 +174,18 @@ def test_render_config_uses_default_if_exists(mock_which): render_config = RenderConfig(dbt_executable_path="user-dbt") render_config.validate_dbt_command("fallback-dbt-path") assert render_config.dbt_executable_path == "user-dbt" + + +def test_is_dbt_ls_file_available_is_true(): + + render_config = RenderConfig( + dbt_ls_path=DBT_PROJECTS_ROOT_DIR / "sample_dbt_ls.txt" + ) + assert render_config.is_manifest_available() + + +def test_is_dbt_ls_file_available_is_false(): + render_config = RenderConfig( + dbt_ls_path=None + ) + assert not render_config.is_manifest_available() From 39d37dd4271ab69106609b6e9f36ad4ab8b02328 Mon Sep 17 00:00:00 2001 From: woogakoki Date: Thu, 7 Dec 2023 08:57:00 +0100 Subject: [PATCH 09/23] fix formatting --- tests/dbt/test_graph.py | 12 ++++++++++-- tests/test_config.py | 9 ++------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/tests/dbt/test_graph.py b/tests/dbt/test_graph.py index c9a0465004..f86f3e82e5 100644 --- a/tests/dbt/test_graph.py +++ b/tests/dbt/test_graph.py @@ -124,6 +124,7 @@ def test_load_automatic_dbt_ls_file_is_available(mock_load_via_dbt_ls_file): dbt_graph.load(method=LoadMode.DBT_LS_FILE, execution_mode=ExecutionMode.LOCAL) assert mock_load_via_dbt_ls_file.called + def test_load_dbt_ls_file_without_file(mock_load_via_dbt_ls_file): project_config = ProjectConfig(dbt_project_path=DBT_PROJECTS_ROOT_DIR / DBT_PROJECT_NAME) profile_config = ProfileConfig( @@ -137,6 +138,7 @@ def test_load_dbt_ls_file_without_file(mock_load_via_dbt_ls_file): dbt_graph.load(execution_mode=ExecutionMode.LOCAL, method=LoadMode.DBT_LS_FILE) assert err_info.value.args[0] == "Unable to load dbt ls file using None" + def test_load_dbt_ls_file_without_project_path(mock_load_via_dbt_ls_file): project_config = ProjectConfig(dbt_project_path=DBT_PROJECTS_ROOT_DIR / DBT_PROJECT_NAME) profile_config = ProfileConfig( @@ -145,12 +147,18 @@ def test_load_dbt_ls_file_without_project_path(mock_load_via_dbt_ls_file): profiles_yml_filepath=DBT_PROJECTS_ROOT_DIR / DBT_PROJECT_NAME / "profiles.yml", ) render_config = RenderConfig(dbt_ls_path=SAMPLE_DBT_LS_OUTPUT) - execution_config =ExecutionConfig(dbt_project_path=None) - dbt_graph = DbtGraph(project=project_config, profile_config=profile_config, render_config=render_config, execution_config=execution_config) + execution_config = ExecutionConfig(dbt_project_path=None) + dbt_graph = DbtGraph( + project=project_config, + profile_config=profile_config, + render_config=render_config, + execution_config=execution_config, + ) with pytest.raises(CosmosLoadDbtException) as err_info: dbt_graph.load(execution_mode=ExecutionMode.LOCAL, method=LoadMode.DBT_LS_FILE) assert err_info.value.args[0] == "Unable to load dbt ls file without ExecutionConfig.dbt_project_path" + @patch("cosmos.dbt.graph.DbtGraph.load_via_custom_parser", side_effect=None) @patch("cosmos.dbt.graph.DbtGraph.load_via_dbt_ls", return_value=None) def test_load_automatic_without_manifest_with_profile_yml(mock_load_via_dbt_ls, mock_load_via_custom_parser): diff --git a/tests/test_config.py b/tests/test_config.py index 5f7fca04d4..648348a30c 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -177,15 +177,10 @@ def test_render_config_uses_default_if_exists(mock_which): def test_is_dbt_ls_file_available_is_true(): - - render_config = RenderConfig( - dbt_ls_path=DBT_PROJECTS_ROOT_DIR / "sample_dbt_ls.txt" - ) + render_config = RenderConfig(dbt_ls_path=DBT_PROJECTS_ROOT_DIR / "sample_dbt_ls.txt") assert render_config.is_manifest_available() def test_is_dbt_ls_file_available_is_false(): - render_config = RenderConfig( - dbt_ls_path=None - ) + render_config = RenderConfig(dbt_ls_path=None) assert not render_config.is_manifest_available() From 70d4fd3486f2dfe159429b4228f4e76b6b180e5e Mon Sep 17 00:00:00 2001 From: woogakoki Date: Thu, 7 Dec 2023 08:59:50 +0100 Subject: [PATCH 10/23] fix render config test --- tests/test_config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_config.py b/tests/test_config.py index 648348a30c..73045f1e24 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -178,9 +178,9 @@ def test_render_config_uses_default_if_exists(mock_which): def test_is_dbt_ls_file_available_is_true(): render_config = RenderConfig(dbt_ls_path=DBT_PROJECTS_ROOT_DIR / "sample_dbt_ls.txt") - assert render_config.is_manifest_available() + assert render_config.is_dbt_ls_file_available() def test_is_dbt_ls_file_available_is_false(): render_config = RenderConfig(dbt_ls_path=None) - assert not render_config.is_manifest_available() + assert not render_config.is_dbt_ls_file_available() From c885f53b5779459946231ceb53dbd3515a618e15 Mon Sep 17 00:00:00 2001 From: woogakoki Date: Thu, 7 Dec 2023 09:49:59 +0100 Subject: [PATCH 11/23] fixs test by removing unnecessary fixture --- tests/dbt/test_graph.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/dbt/test_graph.py b/tests/dbt/test_graph.py index f86f3e82e5..bb85c88fef 100644 --- a/tests/dbt/test_graph.py +++ b/tests/dbt/test_graph.py @@ -125,7 +125,7 @@ def test_load_automatic_dbt_ls_file_is_available(mock_load_via_dbt_ls_file): assert mock_load_via_dbt_ls_file.called -def test_load_dbt_ls_file_without_file(mock_load_via_dbt_ls_file): +def test_load_dbt_ls_file_without_file(): project_config = ProjectConfig(dbt_project_path=DBT_PROJECTS_ROOT_DIR / DBT_PROJECT_NAME) profile_config = ProfileConfig( profile_name="test", @@ -139,7 +139,7 @@ def test_load_dbt_ls_file_without_file(mock_load_via_dbt_ls_file): assert err_info.value.args[0] == "Unable to load dbt ls file using None" -def test_load_dbt_ls_file_without_project_path(mock_load_via_dbt_ls_file): +def test_load_dbt_ls_file_without_project_path(): project_config = ProjectConfig(dbt_project_path=DBT_PROJECTS_ROOT_DIR / DBT_PROJECT_NAME) profile_config = ProfileConfig( profile_name="test", From ff10427a1e02314f1ef9a96dad820614f05f7b48 Mon Sep 17 00:00:00 2001 From: woogakoki Date: Thu, 7 Dec 2023 10:15:09 +0100 Subject: [PATCH 12/23] updates example dag to use new LoadMode, and update docs to point to example --- dev/dags/dbt/jaffle_shop/dbt_ls_models_staging.txt | 6 ++++++ dev/dags/user_defined_profile.py | 7 ++++++- docs/configuration/parsing-methods.rst | 1 + 3 files changed, 13 insertions(+), 1 deletion(-) create mode 100644 dev/dags/dbt/jaffle_shop/dbt_ls_models_staging.txt diff --git a/dev/dags/dbt/jaffle_shop/dbt_ls_models_staging.txt b/dev/dags/dbt/jaffle_shop/dbt_ls_models_staging.txt new file mode 100644 index 0000000000..b356a5208c --- /dev/null +++ b/dev/dags/dbt/jaffle_shop/dbt_ls_models_staging.txt @@ -0,0 +1,6 @@ +14:26:04 Running with dbt=1.6.9 +14:26:04 Registered adapter: exasol=1.6.2 +14:26:04 Found 5 models, 3 seeds, 20 tests, 0 sources, 0 exposures, 0 metrics, 366 macros, 0 groups, 0 semantic models +{"name": "stg_customers", "resource_type": "model", "package_name": "jaffle_shop", "original_file_path": "models/staging/stg_customers.sql", "unique_id": "model.jaffle_shop.stg_customers", "alias": "stg_customers", "config": {"enabled": true, "alias": null, "schema": null, "database": null, "tags": [], "meta": {}, "group": null, "materialized": "view", "incremental_strategy": null, "persist_docs": {}, "quoting": {}, "column_types": {}, "full_refresh": null, "unique_key": null, "on_schema_change": "ignore", "on_configuration_change": "apply", "grants": {}, "packages": [], "docs": {"show": true, "node_color": null}, "contract": {"enforced": false}, "post-hook": [], "pre-hook": []}, "tags": [], "depends_on": {"macros": [], "nodes": ["seed.jaffle_shop.raw_customers"]}} +{"name": "stg_orders", "resource_type": "model", "package_name": "jaffle_shop", "original_file_path": "models/staging/stg_orders.sql", "unique_id": "model.jaffle_shop.stg_orders", "alias": "stg_orders", "config": {"enabled": true, "alias": null, "schema": null, "database": null, "tags": [], "meta": {}, "group": null, "materialized": "view", "incremental_strategy": null, "persist_docs": {}, "quoting": {}, "column_types": {}, "full_refresh": null, "unique_key": null, "on_schema_change": "ignore", "on_configuration_change": "apply", "grants": {}, "packages": [], "docs": {"show": true, "node_color": null}, "contract": {"enforced": false}, "post-hook": [], "pre-hook": []}, "tags": [], "depends_on": {"macros": [], "nodes": ["seed.jaffle_shop.raw_orders"]}} +{"name": "stg_payments", "resource_type": "model", "package_name": "jaffle_shop", "original_file_path": "models/staging/stg_payments.sql", "unique_id": "model.jaffle_shop.stg_payments", "alias": "stg_payments", "config": {"enabled": true, "alias": null, "schema": null, "database": null, "tags": [], "meta": {}, "group": null, "materialized": "view", "incremental_strategy": null, "persist_docs": {}, "quoting": {}, "column_types": {}, "full_refresh": null, "unique_key": null, "on_schema_change": "ignore", "on_configuration_change": "apply", "grants": {}, "packages": [], "docs": {"show": true, "node_color": null}, "contract": {"enforced": false}, "post-hook": [], "pre-hook": []}, "tags": [], "depends_on": {"macros": [], "nodes": ["seed.jaffle_shop.raw_payments"]}} diff --git a/dev/dags/user_defined_profile.py b/dev/dags/user_defined_profile.py index ab30cdb2fe..032915d0ab 100644 --- a/dev/dags/user_defined_profile.py +++ b/dev/dags/user_defined_profile.py @@ -8,11 +8,12 @@ from airflow.decorators import dag from airflow.operators.empty import EmptyOperator -from cosmos import DbtTaskGroup, ProjectConfig, ProfileConfig +from cosmos import DbtTaskGroup, ProjectConfig, ProfileConfig, RenderConfig, LoadMode DEFAULT_DBT_ROOT_PATH = Path(__file__).parent / "dbt" DBT_ROOT_PATH = Path(os.getenv("DBT_ROOT_PATH", DEFAULT_DBT_ROOT_PATH)) PROFILES_FILE_PATH = Path(DBT_ROOT_PATH, "jaffle_shop", "profiles.yml") +DBT_LS_PATH = Path(DBT_ROOT_PATH, "jaffle_shop", "dbt_ls_models_staging.txt") @dag( @@ -35,6 +36,10 @@ def user_defined_profile() -> None: target_name="dev", profiles_yml_filepath=PROFILES_FILE_PATH, ), + render_config=RenderConfig( + load_method=LoadMode.DBT_LS_FILE, + dbt_ls_path=DBT_LS_PATH, + ), operator_args={"append_env": True, "install_deps": True}, default_args={"retries": 2}, ) diff --git a/docs/configuration/parsing-methods.rst b/docs/configuration/parsing-methods.rst index aef3ecd433..d8e27624cd 100644 --- a/docs/configuration/parsing-methods.rst +++ b/docs/configuration/parsing-methods.rst @@ -82,6 +82,7 @@ To use this: If you provide the output of ``dbt ls --output json`` as a file, you can use this to parse similar to ``dbt_ls``. You can supply a ``dbt_ls_path`` parameter on the DbtDag / DbtTaskGroup with a path to a ``dbt_ls_output.txt`` file. +Check [this Dag](https://github.com/astronomer/astronomer-cosmos/blob/main/dev/dags/user_defined_profile.py) for an example. To use this: From c0a234f6978f66803e9baa4467446b05e1912e92 Mon Sep 17 00:00:00 2001 From: woogakoki Date: Thu, 7 Dec 2023 10:20:24 +0100 Subject: [PATCH 13/23] change hpyerlink ref --- docs/configuration/parsing-methods.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/configuration/parsing-methods.rst b/docs/configuration/parsing-methods.rst index d8e27624cd..6d9759c5e6 100644 --- a/docs/configuration/parsing-methods.rst +++ b/docs/configuration/parsing-methods.rst @@ -82,7 +82,7 @@ To use this: If you provide the output of ``dbt ls --output json`` as a file, you can use this to parse similar to ``dbt_ls``. You can supply a ``dbt_ls_path`` parameter on the DbtDag / DbtTaskGroup with a path to a ``dbt_ls_output.txt`` file. -Check [this Dag](https://github.com/astronomer/astronomer-cosmos/blob/main/dev/dags/user_defined_profile.py) for an example. +Check `this Dag https://github.com/astronomer/astronomer-cosmos/blob/main/dev/dags/user_defined_profile.py`_ for an example. To use this: From 2ce453c41dd8075c3788cbc549ea81e88f2602b7 Mon Sep 17 00:00:00 2001 From: woogakoki Date: Thu, 7 Dec 2023 10:21:53 +0100 Subject: [PATCH 14/23] adds missing brackets --- docs/configuration/parsing-methods.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/configuration/parsing-methods.rst b/docs/configuration/parsing-methods.rst index 6d9759c5e6..c635e3a32e 100644 --- a/docs/configuration/parsing-methods.rst +++ b/docs/configuration/parsing-methods.rst @@ -82,7 +82,7 @@ To use this: If you provide the output of ``dbt ls --output json`` as a file, you can use this to parse similar to ``dbt_ls``. You can supply a ``dbt_ls_path`` parameter on the DbtDag / DbtTaskGroup with a path to a ``dbt_ls_output.txt`` file. -Check `this Dag https://github.com/astronomer/astronomer-cosmos/blob/main/dev/dags/user_defined_profile.py`_ for an example. +Check `this Dag ` for an example. To use this: From 86ce4ff3c29cbcacb8b1a979b7688216daf2c95d Mon Sep 17 00:00:00 2001 From: woogakoki Date: Thu, 7 Dec 2023 10:23:50 +0100 Subject: [PATCH 15/23] adds missing underscore --- docs/configuration/parsing-methods.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/configuration/parsing-methods.rst b/docs/configuration/parsing-methods.rst index c635e3a32e..55cad43e76 100644 --- a/docs/configuration/parsing-methods.rst +++ b/docs/configuration/parsing-methods.rst @@ -82,7 +82,7 @@ To use this: If you provide the output of ``dbt ls --output json`` as a file, you can use this to parse similar to ``dbt_ls``. You can supply a ``dbt_ls_path`` parameter on the DbtDag / DbtTaskGroup with a path to a ``dbt_ls_output.txt`` file. -Check `this Dag ` for an example. +Check `this Dag `_ for an example. To use this: From 3ba51abc6feffd3ea30c04f118c7f6c87fa43bf9 Mon Sep 17 00:00:00 2001 From: Koki <146737781+woogakoki@users.noreply.github.com> Date: Tue, 12 Dec 2023 17:02:02 +0100 Subject: [PATCH 16/23] Update docs/configuration/parsing-methods.rst Co-authored-by: Tatiana Al-Chueyr --- docs/configuration/parsing-methods.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/configuration/parsing-methods.rst b/docs/configuration/parsing-methods.rst index 55cad43e76..a5bd130ca6 100644 --- a/docs/configuration/parsing-methods.rst +++ b/docs/configuration/parsing-methods.rst @@ -15,7 +15,7 @@ There are benefits and drawbacks to each method: - ``dbt_manifest``: You have to generate the manifest file on your own. When using the manifest, Cosmos gets a complete set of metadata about your models. However, Cosmos uses its own selecting & excluding logic to determine which models to run, which may not be as robust as dbt's. - ``dbt_ls``: Cosmos will generate the manifest file for you. This method uses dbt's metadata AND dbt's selecting/excluding logic. This is the most robust method. However, this requires the dbt executable to be installed on your machine (either on the host directly or in a virtual environment). -- ``dbt_ls_file``: You have to generate a file of any specific ``dbt ls`` command (needs to be using ``--output json``). Advantage of this method is that you use dbt's internal selecting/excluding logic as it uses the same parsing as ``dbt_ls``, but you will need to provide one file for each group of selected models you want. +- ``dbt_ls_file`` (new in 1.3): Path to a file containing the ``dbt ls`` output. To use this method, run ``dbt ls`` using ``--output json`` and store the output in a file. `RenderConfig.select` and `RenderConfig.exclude` will not work using this method. - ``custom``: Cosmos will parse your project and model files for you. This means that Cosmos will not have access to dbt's metadata. However, this method does not require the dbt executable to be installed on your machine. If you're using the ``local`` mode, you should use the ``dbt_ls`` method. From 03cbfed48bc0cf4b9b2416f93cc7d3f6f5420057 Mon Sep 17 00:00:00 2001 From: Koki <146737781+woogakoki@users.noreply.github.com> Date: Tue, 12 Dec 2023 17:02:11 +0100 Subject: [PATCH 17/23] Update docs/configuration/parsing-methods.rst Co-authored-by: Tatiana Al-Chueyr --- docs/configuration/parsing-methods.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/configuration/parsing-methods.rst b/docs/configuration/parsing-methods.rst index a5bd130ca6..18e93c4b0c 100644 --- a/docs/configuration/parsing-methods.rst +++ b/docs/configuration/parsing-methods.rst @@ -80,6 +80,9 @@ To use this: ``dbt_ls_file`` ---------------- +.. note:: + New in Cosmos 1.3. + If you provide the output of ``dbt ls --output json`` as a file, you can use this to parse similar to ``dbt_ls``. You can supply a ``dbt_ls_path`` parameter on the DbtDag / DbtTaskGroup with a path to a ``dbt_ls_output.txt`` file. Check `this Dag `_ for an example. From 3fd01e31727df5ca9ccc8a02b195bac73521ad9c Mon Sep 17 00:00:00 2001 From: woogakoki Date: Tue, 12 Dec 2023 17:13:13 +0100 Subject: [PATCH 18/23] checks render config instead of execution config --- cosmos/dbt/graph.py | 7 +++---- tests/dbt/test_graph.py | 5 ++--- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/cosmos/dbt/graph.py b/cosmos/dbt/graph.py index 385255f9bd..22ee9c1fc5 100644 --- a/cosmos/dbt/graph.py +++ b/cosmos/dbt/graph.py @@ -292,12 +292,11 @@ def load_via_dbt_ls_file(self) -> None: if not self.render_config.is_dbt_ls_file_available(): raise CosmosLoadDbtException(f"Unable to load dbt ls file using {self.render_config.dbt_ls_path}") - if not self.execution_config.project_path: - raise CosmosLoadDbtException("Unable to load dbt ls file without ExecutionConfig.dbt_project_path") - + project_path = self.render_config.dbt_project_path + if not project_path: + raise CosmosLoadDbtException("Unable to load dbt ls file without RenderConfig.dbt_project_path") with open(self.render_config.dbt_ls_path) as fp: # type: ignore[arg-type] dbt_ls_output = fp.read() - project_path = self.execution_config.project_path nodes = parse_dbt_ls_output(project_path=project_path, ls_stdout=dbt_ls_output) self.nodes = nodes diff --git a/tests/dbt/test_graph.py b/tests/dbt/test_graph.py index 1b08694004..b735d06fd1 100644 --- a/tests/dbt/test_graph.py +++ b/tests/dbt/test_graph.py @@ -148,8 +148,7 @@ def test_load_dbt_ls_file_without_project_path(): target_name="test", profiles_yml_filepath=DBT_PROJECTS_ROOT_DIR / DBT_PROJECT_NAME / "profiles.yml", ) - render_config = RenderConfig(dbt_ls_path=SAMPLE_DBT_LS_OUTPUT) - execution_config = ExecutionConfig(dbt_project_path=None) + render_config = RenderConfig(dbt_ls_path=SAMPLE_DBT_LS_OUTPUT, dbt_project_path= None) dbt_graph = DbtGraph( project=project_config, profile_config=profile_config, @@ -158,7 +157,7 @@ def test_load_dbt_ls_file_without_project_path(): ) with pytest.raises(CosmosLoadDbtException) as err_info: dbt_graph.load(execution_mode=ExecutionMode.LOCAL, method=LoadMode.DBT_LS_FILE) - assert err_info.value.args[0] == "Unable to load dbt ls file without ExecutionConfig.dbt_project_path" + assert err_info.value.args[0] == "Unable to load dbt ls file without RenderConfig.dbt_project_path" @patch("cosmos.dbt.graph.DbtGraph.load_via_custom_parser", side_effect=None) From 0e4b11c5dbedccea981f505c41bab708b96b0c23 Mon Sep 17 00:00:00 2001 From: woogakoki Date: Tue, 12 Dec 2023 17:20:18 +0100 Subject: [PATCH 19/23] fix pre-commit --- docs/configuration/parsing-methods.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/configuration/parsing-methods.rst b/docs/configuration/parsing-methods.rst index 18e93c4b0c..ef50bdb4e6 100644 --- a/docs/configuration/parsing-methods.rst +++ b/docs/configuration/parsing-methods.rst @@ -15,7 +15,7 @@ There are benefits and drawbacks to each method: - ``dbt_manifest``: You have to generate the manifest file on your own. When using the manifest, Cosmos gets a complete set of metadata about your models. However, Cosmos uses its own selecting & excluding logic to determine which models to run, which may not be as robust as dbt's. - ``dbt_ls``: Cosmos will generate the manifest file for you. This method uses dbt's metadata AND dbt's selecting/excluding logic. This is the most robust method. However, this requires the dbt executable to be installed on your machine (either on the host directly or in a virtual environment). -- ``dbt_ls_file`` (new in 1.3): Path to a file containing the ``dbt ls`` output. To use this method, run ``dbt ls`` using ``--output json`` and store the output in a file. `RenderConfig.select` and `RenderConfig.exclude` will not work using this method. +- ``dbt_ls_file`` (new in 1.3): Path to a file containing the ``dbt ls`` output. To use this method, run ``dbt ls`` using ``--output json`` and store the output in a file. ``RenderConfig.select`` and ``RenderConfig.exclude`` will not work using this method. - ``custom``: Cosmos will parse your project and model files for you. This means that Cosmos will not have access to dbt's metadata. However, this method does not require the dbt executable to be installed on your machine. If you're using the ``local`` mode, you should use the ``dbt_ls`` method. From a42ba9c370e700f54ead3faec2b2e29b8e9dd261 Mon Sep 17 00:00:00 2001 From: woogakoki Date: Tue, 12 Dec 2023 17:29:33 +0100 Subject: [PATCH 20/23] use render_config.project_path --- cosmos/dbt/graph.py | 4 ++-- tests/dbt/test_graph.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cosmos/dbt/graph.py b/cosmos/dbt/graph.py index 22ee9c1fc5..bae32f344b 100644 --- a/cosmos/dbt/graph.py +++ b/cosmos/dbt/graph.py @@ -292,9 +292,9 @@ def load_via_dbt_ls_file(self) -> None: if not self.render_config.is_dbt_ls_file_available(): raise CosmosLoadDbtException(f"Unable to load dbt ls file using {self.render_config.dbt_ls_path}") - project_path = self.render_config.dbt_project_path + project_path = self.render_config.project_path if not project_path: - raise CosmosLoadDbtException("Unable to load dbt ls file without RenderConfig.dbt_project_path") + raise CosmosLoadDbtException("Unable to load dbt ls file without RenderConfig.project_path") with open(self.render_config.dbt_ls_path) as fp: # type: ignore[arg-type] dbt_ls_output = fp.read() nodes = parse_dbt_ls_output(project_path=project_path, ls_stdout=dbt_ls_output) diff --git a/tests/dbt/test_graph.py b/tests/dbt/test_graph.py index b735d06fd1..74007d315a 100644 --- a/tests/dbt/test_graph.py +++ b/tests/dbt/test_graph.py @@ -157,7 +157,7 @@ def test_load_dbt_ls_file_without_project_path(): ) with pytest.raises(CosmosLoadDbtException) as err_info: dbt_graph.load(execution_mode=ExecutionMode.LOCAL, method=LoadMode.DBT_LS_FILE) - assert err_info.value.args[0] == "Unable to load dbt ls file without RenderConfig.dbt_project_path" + assert err_info.value.args[0] == "Unable to load dbt ls file without RenderConfig.project_path" @patch("cosmos.dbt.graph.DbtGraph.load_via_custom_parser", side_effect=None) From a539c3a65ef79afacffb74c60189c4f35526db65 Mon Sep 17 00:00:00 2001 From: woogakoki Date: Tue, 12 Dec 2023 17:33:28 +0100 Subject: [PATCH 21/23] fix test --- tests/dbt/test_graph.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/dbt/test_graph.py b/tests/dbt/test_graph.py index 74007d315a..358fc608db 100644 --- a/tests/dbt/test_graph.py +++ b/tests/dbt/test_graph.py @@ -148,12 +148,11 @@ def test_load_dbt_ls_file_without_project_path(): target_name="test", profiles_yml_filepath=DBT_PROJECTS_ROOT_DIR / DBT_PROJECT_NAME / "profiles.yml", ) - render_config = RenderConfig(dbt_ls_path=SAMPLE_DBT_LS_OUTPUT, dbt_project_path= None) + render_config = RenderConfig(dbt_ls_path=SAMPLE_DBT_LS_OUTPUT, dbt_project_path=None) dbt_graph = DbtGraph( project=project_config, profile_config=profile_config, render_config=render_config, - execution_config=execution_config, ) with pytest.raises(CosmosLoadDbtException) as err_info: dbt_graph.load(execution_mode=ExecutionMode.LOCAL, method=LoadMode.DBT_LS_FILE) From 492602db5ee23d237c53454ec1f0f58554844c0a Mon Sep 17 00:00:00 2001 From: woogakoki Date: Wed, 13 Dec 2023 16:48:07 +0100 Subject: [PATCH 22/23] fix test_load_via_dbt_ls_file --- tests/dbt/test_graph.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/dbt/test_graph.py b/tests/dbt/test_graph.py index 358fc608db..a09cb054ca 100644 --- a/tests/dbt/test_graph.py +++ b/tests/dbt/test_graph.py @@ -787,12 +787,12 @@ def test_load_via_dbt_ls_file(): target_name="test", profiles_yml_filepath=DBT_PROJECTS_ROOT_DIR / DBT_PROJECT_NAME / "profiles.yml", ) - execution_config = ExecutionConfig(dbt_project_path=DBT_PROJECTS_ROOT_DIR / DBT_PROJECT_NAME) - render_config = RenderConfig(dbt_ls_path=SAMPLE_DBT_LS_OUTPUT) + render_config = RenderConfig( + dbt_ls_path=SAMPLE_DBT_LS_OUTPUT, dbt_project_path=DBT_PROJECTS_ROOT_DIR / DBT_PROJECT_NAME + ) dbt_graph = DbtGraph( project=project_config, profile_config=profile_config, - execution_config=execution_config, render_config=render_config, ) dbt_graph.load(method=LoadMode.DBT_LS_FILE, execution_mode=ExecutionMode.LOCAL) From 5943619dc3ac553f9e80d70bc1055ac436bb18cd Mon Sep 17 00:00:00 2001 From: woogakoki Date: Thu, 14 Dec 2023 09:07:51 +0100 Subject: [PATCH 23/23] include seeds in the example --- dev/dags/dbt/jaffle_shop/dbt_ls_models_staging.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dev/dags/dbt/jaffle_shop/dbt_ls_models_staging.txt b/dev/dags/dbt/jaffle_shop/dbt_ls_models_staging.txt index b356a5208c..b8cc902ec0 100644 --- a/dev/dags/dbt/jaffle_shop/dbt_ls_models_staging.txt +++ b/dev/dags/dbt/jaffle_shop/dbt_ls_models_staging.txt @@ -4,3 +4,6 @@ {"name": "stg_customers", "resource_type": "model", "package_name": "jaffle_shop", "original_file_path": "models/staging/stg_customers.sql", "unique_id": "model.jaffle_shop.stg_customers", "alias": "stg_customers", "config": {"enabled": true, "alias": null, "schema": null, "database": null, "tags": [], "meta": {}, "group": null, "materialized": "view", "incremental_strategy": null, "persist_docs": {}, "quoting": {}, "column_types": {}, "full_refresh": null, "unique_key": null, "on_schema_change": "ignore", "on_configuration_change": "apply", "grants": {}, "packages": [], "docs": {"show": true, "node_color": null}, "contract": {"enforced": false}, "post-hook": [], "pre-hook": []}, "tags": [], "depends_on": {"macros": [], "nodes": ["seed.jaffle_shop.raw_customers"]}} {"name": "stg_orders", "resource_type": "model", "package_name": "jaffle_shop", "original_file_path": "models/staging/stg_orders.sql", "unique_id": "model.jaffle_shop.stg_orders", "alias": "stg_orders", "config": {"enabled": true, "alias": null, "schema": null, "database": null, "tags": [], "meta": {}, "group": null, "materialized": "view", "incremental_strategy": null, "persist_docs": {}, "quoting": {}, "column_types": {}, "full_refresh": null, "unique_key": null, "on_schema_change": "ignore", "on_configuration_change": "apply", "grants": {}, "packages": [], "docs": {"show": true, "node_color": null}, "contract": {"enforced": false}, "post-hook": [], "pre-hook": []}, "tags": [], "depends_on": {"macros": [], "nodes": ["seed.jaffle_shop.raw_orders"]}} {"name": "stg_payments", "resource_type": "model", "package_name": "jaffle_shop", "original_file_path": "models/staging/stg_payments.sql", "unique_id": "model.jaffle_shop.stg_payments", "alias": "stg_payments", "config": {"enabled": true, "alias": null, "schema": null, "database": null, "tags": [], "meta": {}, "group": null, "materialized": "view", "incremental_strategy": null, "persist_docs": {}, "quoting": {}, "column_types": {}, "full_refresh": null, "unique_key": null, "on_schema_change": "ignore", "on_configuration_change": "apply", "grants": {}, "packages": [], "docs": {"show": true, "node_color": null}, "contract": {"enforced": false}, "post-hook": [], "pre-hook": []}, "tags": [], "depends_on": {"macros": [], "nodes": ["seed.jaffle_shop.raw_payments"]}} +{"name": "raw_customers", "resource_type": "seed", "package_name": "jaffle_shop", "original_file_path": "seeds/raw_customers.csv", "unique_id": "seed.jaffle_shop.raw_customers", "alias": "raw_customers", "config": {"enabled": true, "alias": null, "schema": null, "database": null, "tags": [], "meta": {}, "group": null, "materialized": "seed", "incremental_strategy": null, "persist_docs": {}, "quoting": {}, "column_types": {}, "full_refresh": null, "unique_key": null, "on_schema_change": "ignore", "on_configuration_change": "apply", "grants": {}, "packages": [], "docs": {"show": true, "node_color": null}, "contract": {"enforced": false}, "quote_columns": null, "post-hook": [], "pre-hook": []}, "tags": [], "depends_on": {"macros": []}} +{"name": "raw_orders", "resource_type": "seed", "package_name": "jaffle_shop", "original_file_path": "seeds/raw_orders.csv", "unique_id": "seed.jaffle_shop.raw_orders", "alias": "raw_orders", "config": {"enabled": true, "alias": null, "schema": null, "database": null, "tags": [], "meta": {}, "group": null, "materialized": "seed", "incremental_strategy": null, "persist_docs": {}, "quoting": {}, "column_types": {}, "full_refresh": null, "unique_key": null, "on_schema_change": "ignore", "on_configuration_change": "apply", "grants": {}, "packages": [], "docs": {"show": true, "node_color": null}, "contract": {"enforced": false}, "quote_columns": null, "post-hook": [], "pre-hook": []}, "tags": [], "depends_on": {"macros": []}} +{"name": "raw_payments", "resource_type": "seed", "package_name": "jaffle_shop", "original_file_path": "seeds/raw_payments.csv", "unique_id": "seed.jaffle_shop.raw_payments", "alias": "raw_payments", "config": {"enabled": true, "alias": null, "schema": null, "database": null, "tags": [], "meta": {}, "group": null, "materialized": "seed", "incremental_strategy": null, "persist_docs": {}, "quoting": {}, "column_types": {}, "full_refresh": null, "unique_key": null, "on_schema_change": "ignore", "on_configuration_change": "apply", "grants": {}, "packages": [], "docs": {"show": true, "node_color": null}, "contract": {"enforced": false}, "quote_columns": null, "post-hook": [], "pre-hook": []}, "tags": [], "depends_on": {"macros": []}}