From 5e5d3ef0a49c811ee717984d73e3b69ab1d08e98 Mon Sep 17 00:00:00 2001 From: Alex Ward Date: Fri, 20 Feb 2026 15:12:55 -0800 Subject: [PATCH] Fix KeyError in graph selector when using + operator with dbt-loom external nodes When using the `+` (precursor) graph selector with dbt-loom cross-project references, `select_node_precursors` crashes with a `KeyError` because external nodes (injected by dbt-loom) are filtered out during manifest loading but local nodes still reference them in `depends_on`. The dbt-loom support added in #2271 correctly skips external nodes (those without `original_file_path`) during manifest loading. However, when the `+` graph operator traverses upstream dependencies, it encounters `depends_on` entries pointing to these filtered-out external nodes and raises a `KeyError`. This fix adds bounds checks in two locations: - `GraphSelector.select_node_precursors`: skip node IDs not present in the nodes dict during upstream traversal - `NodeSelector.select_nodes_ids_by_intersection`: skip external node IDs that were collected during graph traversal but are not in the nodes dict This allows the graph traversal to gracefully stop at project boundaries, which is the correct behavior for cross-project setups where external dependencies are managed by their own DAGs/task groups. Closes # Co-authored-by: Cursor --- cosmos/dbt/selector.py | 5 ++++- tests/dbt/test_selector.py | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/cosmos/dbt/selector.py b/cosmos/dbt/selector.py index 9488f185f1..c641cb6551 100644 --- a/cosmos/dbt/selector.py +++ b/cosmos/dbt/selector.py @@ -169,7 +169,8 @@ def select_node_precursors(self, nodes: dict[str, DbtNode], root_id: str, select new_generation: set[str] = set() for node_id in previous_generation: if node_id not in processed_nodes: - new_generation.update(set(nodes[node_id].depends_on)) + if node_id in nodes: + new_generation.update(set(nodes[node_id].depends_on)) processed_nodes.add(node_id) selected_nodes.update(new_generation) previous_generation = new_generation @@ -548,6 +549,8 @@ def select_nodes_ids_by_intersection(self) -> set[str]: if self.config.graph_selectors: graph_selected_nodes = self.select_by_graph_operator() for node_id in graph_selected_nodes: + if node_id not in self.nodes: + continue node = self.nodes[node_id] # Since the method below changes the tags of test nodes, it can lead to incorrect # results during the application of graph selectors. Therefore, it is being run within diff --git a/tests/dbt/test_selector.py b/tests/dbt/test_selector.py index 9c5f5b208a..4644875236 100644 --- a/tests/dbt/test_selector.py +++ b/tests/dbt/test_selector.py @@ -507,6 +507,44 @@ def test_select_node_by_child_and_precursors_no_node(): assert list(selected.keys()) == expected +def test_select_nodes_by_precursors_with_external_dependency(): + """Test that the + selector handles depends_on references to nodes not in the nodes dict. + + When using dbt-loom for cross-project references, external nodes are filtered out during + manifest loading (they have no file path). However, local nodes may still have depends_on + entries pointing to these external nodes. The + selector should gracefully skip missing + nodes instead of raising a KeyError. + """ + external_upstream_id = "model.upstream_project.external_model" + + local_staging = DbtNode( + unique_id=f"{DbtResourceType.MODEL.value}.{SAMPLE_PROJ_PATH.stem}.local_staging", + resource_type=DbtResourceType.MODEL, + depends_on=[external_upstream_id], + file_path=SAMPLE_PROJ_PATH / "models/local_staging.sql", + tags=[], + config={}, + ) + local_marts = DbtNode( + unique_id=f"{DbtResourceType.MODEL.value}.{SAMPLE_PROJ_PATH.stem}.local_marts", + resource_type=DbtResourceType.MODEL, + depends_on=[local_staging.unique_id], + file_path=SAMPLE_PROJ_PATH / "models/local_marts.sql", + tags=[], + config={}, + ) + + nodes_with_external_dep = { + local_staging.unique_id: local_staging, + local_marts.unique_id: local_marts, + } + + selected = select_nodes(project_dir=SAMPLE_PROJ_PATH, nodes=nodes_with_external_dep, select=["+local_marts"]) + assert local_marts.unique_id in selected + assert local_staging.unique_id in selected + assert external_upstream_id not in selected + + def test_select_node_by_descendants(): selected = select_nodes(project_dir=SAMPLE_PROJ_PATH, nodes=sample_nodes, select=["grandparent+"]) expected = [