Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion cosmos/dbt/selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,8 @@ def select_node_precursors(self, nodes: dict[str, DbtNode], root_id: str, select
new_generation: set[str] = set()
for node_id in previous_generation:
if node_id not in processed_nodes:
new_generation.update(set(nodes[node_id].depends_on))
if node_id in nodes:
Comment thread
tatiana marked this conversation as resolved.
new_generation.update(set(nodes[node_id].depends_on))
processed_nodes.add(node_id)
selected_nodes.update(new_generation)
previous_generation = new_generation
Expand Down Expand Up @@ -548,6 +549,8 @@ def select_nodes_ids_by_intersection(self) -> set[str]:
if self.config.graph_selectors:
graph_selected_nodes = self.select_by_graph_operator()
for node_id in graph_selected_nodes:
if node_id not in self.nodes:
Comment thread
tatiana marked this conversation as resolved.
continue
node = self.nodes[node_id]
# Since the method below changes the tags of test nodes, it can lead to incorrect
# results during the application of graph selectors. Therefore, it is being run within
Expand Down
38 changes: 38 additions & 0 deletions tests/dbt/test_selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -507,6 +507,44 @@ def test_select_node_by_child_and_precursors_no_node():
assert list(selected.keys()) == expected


def test_select_nodes_by_precursors_with_external_dependency():
"""Test that the + selector handles depends_on references to nodes not in the nodes dict.

When using dbt-loom for cross-project references, external nodes are filtered out during
manifest loading (they have no file path). However, local nodes may still have depends_on
entries pointing to these external nodes. The + selector should gracefully skip missing
nodes instead of raising a KeyError.
"""
external_upstream_id = "model.upstream_project.external_model"

local_staging = DbtNode(
unique_id=f"{DbtResourceType.MODEL.value}.{SAMPLE_PROJ_PATH.stem}.local_staging",
resource_type=DbtResourceType.MODEL,
depends_on=[external_upstream_id],
file_path=SAMPLE_PROJ_PATH / "models/local_staging.sql",
tags=[],
config={},
)
local_marts = DbtNode(
unique_id=f"{DbtResourceType.MODEL.value}.{SAMPLE_PROJ_PATH.stem}.local_marts",
resource_type=DbtResourceType.MODEL,
depends_on=[local_staging.unique_id],
file_path=SAMPLE_PROJ_PATH / "models/local_marts.sql",
tags=[],
config={},
)

nodes_with_external_dep = {
local_staging.unique_id: local_staging,
local_marts.unique_id: local_marts,
}

selected = select_nodes(project_dir=SAMPLE_PROJ_PATH, nodes=nodes_with_external_dep, select=["+local_marts"])
assert local_marts.unique_id in selected
assert local_staging.unique_id in selected
assert external_upstream_id not in selected


def test_select_node_by_descendants():
selected = select_nodes(project_dir=SAMPLE_PROJ_PATH, nodes=sample_nodes, select=["grandparent+"])
expected = [
Expand Down