From f25c15db14cbec1d128dac8f57447d9971f75ba5 Mon Sep 17 00:00:00 2001 From: Evan Volgas Date: Sun, 22 Feb 2026 10:31:32 -0800 Subject: [PATCH 1/6] Fix KeyError in graph selector when using + operator with dbt-loom external nodes When using dbt-loom for cross-project references, external nodes are filtered out during manifest loading (they have no file path). However, local nodes may still have depends_on entries pointing to these external nodes. The + graph operator triggers select_node_precursors which traverses depends_on entries, and when it encounters these external node IDs that were filtered out, it raises a KeyError. This fix adds bounds checks in two locations in cosmos/dbt/selector.py: 1. GraphSelector.select_node_precursors: Skip node IDs not present in the nodes dict during upstream traversal 2. NodeSelector.select_nodes_ids_by_intersection: Skip external node IDs that were collected during graph traversal but don't exist in the nodes dict This allows the + traversal to gracefully stop at project boundaries, which is the correct behavior for cross-project setups where external dependencies are managed by their own DAGs/task groups. Original fix by @award1230 in #2389 Co-Authored-By: award1230 <26311596+award1230@users.noreply.github.com> Co-Authored-By: Claude Opus 4.5 --- cosmos/dbt/selector.py | 11 ++++++++++- tests/dbt/test_selector.py | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/cosmos/dbt/selector.py b/cosmos/dbt/selector.py index e325213d9b..1c3b61b8a0 100644 --- a/cosmos/dbt/selector.py +++ b/cosmos/dbt/selector.py @@ -191,7 +191,11 @@ def select_node_precursors(self, nodes: dict[str, DbtNode], root_id: str, select new_generation: set[str] = set() for node_id in previous_generation: if node_id not in processed_nodes: - new_generation.update(set(nodes[node_id].depends_on)) + # When using dbt-loom for cross-project references, external nodes are filtered out + # during manifest loading but local nodes may still reference them in depends_on. + # Skip missing nodes to gracefully stop traversal at project boundaries. + if node_id in nodes: + new_generation.update(set(nodes[node_id].depends_on)) processed_nodes.add(node_id) selected_nodes.update(new_generation) previous_generation = new_generation @@ -587,6 +591,11 @@ def select_nodes_ids_by_intersection(self) -> set[str]: if self.config.graph_selectors: graph_selected_nodes = self.select_by_graph_operator() for node_id in graph_selected_nodes: + # When using dbt-loom for cross-project references, external nodes are filtered out + # during manifest loading but may be collected during graph traversal via depends_on. + # Skip these external node IDs that don't exist in the nodes dict. + if node_id not in self.nodes: + continue node = self.nodes[node_id] # Since the method below changes the tags of test nodes, it can lead to incorrect # results during the application of graph selectors. Therefore, it is being run within diff --git a/tests/dbt/test_selector.py b/tests/dbt/test_selector.py index 44e3babe5a..38511ab6b7 100644 --- a/tests/dbt/test_selector.py +++ b/tests/dbt/test_selector.py @@ -574,6 +574,44 @@ def test_select_node_by_child_and_precursors_no_node(): assert list(selected.keys()) == expected +def test_select_nodes_by_precursors_with_external_dependency(): + """Test that the + selector handles depends_on references to nodes not in the nodes dict. + + When using dbt-loom for cross-project references, external nodes are filtered out during + manifest loading (they have no file path). However, local nodes may still have depends_on + entries pointing to these external nodes. The + selector should gracefully skip missing + nodes instead of raising a KeyError. + """ + external_upstream_id = "model.upstream_project.external_model" + + local_staging = DbtNode( + unique_id=f"{DbtResourceType.MODEL.value}.{SAMPLE_PROJ_PATH.stem}.local_staging", + resource_type=DbtResourceType.MODEL, + depends_on=[external_upstream_id], + file_path=SAMPLE_PROJ_PATH / "models/local_staging.sql", + tags=[], + config={}, + ) + local_marts = DbtNode( + unique_id=f"{DbtResourceType.MODEL.value}.{SAMPLE_PROJ_PATH.stem}.local_marts", + resource_type=DbtResourceType.MODEL, + depends_on=[local_staging.unique_id], + file_path=SAMPLE_PROJ_PATH / "models/local_marts.sql", + tags=[], + config={}, + ) + + nodes_with_external_dep = { + local_staging.unique_id: local_staging, + local_marts.unique_id: local_marts, + } + + selected = select_nodes(project_dir=SAMPLE_PROJ_PATH, nodes=nodes_with_external_dep, select=["+local_marts"]) + assert local_marts.unique_id in selected + assert local_staging.unique_id in selected + assert external_upstream_id not in selected + + def test_select_node_by_descendants(): selected = select_nodes(project_dir=SAMPLE_PROJ_PATH, nodes=sample_nodes, select=["grandparent+"]) expected = [ From 7619f20454f1db8569cbae33aabab102c7319aa1 Mon Sep 17 00:00:00 2001 From: Tatiana Al-Chueyr Date: Mon, 23 Feb 2026 13:49:00 +0000 Subject: [PATCH 2/6] Apply suggestion from @tatiana --- cosmos/dbt/selector.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cosmos/dbt/selector.py b/cosmos/dbt/selector.py index 15a35a3487..04dd98352e 100644 --- a/cosmos/dbt/selector.py +++ b/cosmos/dbt/selector.py @@ -191,7 +191,6 @@ def select_node_precursors(self, nodes: dict[str, DbtNode], root_id: str, select new_generation: set[str] = set() for node_id in previous_generation: if node_id not in processed_nodes: -<<<<<<< fix/graph-selector-dbt-loom-external-nodes # When using dbt-loom for cross-project references, external nodes are filtered out # during manifest loading but local nodes may still reference them in depends_on. # Skip missing nodes to gracefully stop traversal at project boundaries. From b29ce2dd54899204132fe46922a90f760e2e56b1 Mon Sep 17 00:00:00 2001 From: Tatiana Al-Chueyr Date: Mon, 23 Feb 2026 13:49:06 +0000 Subject: [PATCH 3/6] Apply suggestion from @tatiana --- cosmos/dbt/selector.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cosmos/dbt/selector.py b/cosmos/dbt/selector.py index 04dd98352e..82f399b296 100644 --- a/cosmos/dbt/selector.py +++ b/cosmos/dbt/selector.py @@ -194,7 +194,6 @@ def select_node_precursors(self, nodes: dict[str, DbtNode], root_id: str, select # When using dbt-loom for cross-project references, external nodes are filtered out # during manifest loading but local nodes may still reference them in depends_on. # Skip missing nodes to gracefully stop traversal at project boundaries. -======= >>>>>>> main if node_id in nodes: new_generation.update(set(nodes[node_id].depends_on)) From 64558350b70829e68a4c2975f260b9534db2d1a7 Mon Sep 17 00:00:00 2001 From: Tatiana Al-Chueyr Date: Mon, 23 Feb 2026 13:49:13 +0000 Subject: [PATCH 4/6] Apply suggestion from @tatiana --- cosmos/dbt/selector.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cosmos/dbt/selector.py b/cosmos/dbt/selector.py index 82f399b296..517886f641 100644 --- a/cosmos/dbt/selector.py +++ b/cosmos/dbt/selector.py @@ -194,7 +194,6 @@ def select_node_precursors(self, nodes: dict[str, DbtNode], root_id: str, select # When using dbt-loom for cross-project references, external nodes are filtered out # during manifest loading but local nodes may still reference them in depends_on. # Skip missing nodes to gracefully stop traversal at project boundaries. ->>>>>>> main if node_id in nodes: new_generation.update(set(nodes[node_id].depends_on)) processed_nodes.add(node_id) From 2988c304a452f87567e63c7e7784275dffa24074 Mon Sep 17 00:00:00 2001 From: Tatiana Al-Chueyr Date: Mon, 23 Feb 2026 13:49:19 +0000 Subject: [PATCH 5/6] Apply suggestion from @tatiana --- cosmos/dbt/selector.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cosmos/dbt/selector.py b/cosmos/dbt/selector.py index 517886f641..0acbf1b96a 100644 --- a/cosmos/dbt/selector.py +++ b/cosmos/dbt/selector.py @@ -591,7 +591,6 @@ def select_nodes_ids_by_intersection(self) -> set[str]: if self.config.graph_selectors: graph_selected_nodes = self.select_by_graph_operator() for node_id in graph_selected_nodes: -<<<<<<< fix/graph-selector-dbt-loom-external-nodes # When using dbt-loom for cross-project references, external nodes are filtered out # during manifest loading but may be collected during graph traversal via depends_on. # Skip these external node IDs that don't exist in the nodes dict. From 5bf236efe6f93afb6b92c15311427383539283e9 Mon Sep 17 00:00:00 2001 From: Tatiana Al-Chueyr Date: Mon, 23 Feb 2026 13:49:24 +0000 Subject: [PATCH 6/6] Apply suggestion from @tatiana --- cosmos/dbt/selector.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/cosmos/dbt/selector.py b/cosmos/dbt/selector.py index 0acbf1b96a..1c3b61b8a0 100644 --- a/cosmos/dbt/selector.py +++ b/cosmos/dbt/selector.py @@ -594,8 +594,6 @@ def select_nodes_ids_by_intersection(self) -> set[str]: # When using dbt-loom for cross-project references, external nodes are filtered out # during manifest loading but may be collected during graph traversal via depends_on. # Skip these external node IDs that don't exist in the nodes dict. -======= ->>>>>>> main if node_id not in self.nodes: continue node = self.nodes[node_id]