Skip to content

Commit 5be0927

Browse files
committed
fix: misidentify-column-name-as-alias (#539)
1 parent 0326b59 commit 5be0927

File tree

4 files changed

+49
-9
lines changed

4 files changed

+49
-9
lines changed

sqllineage/core/models.py

+1
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,7 @@ def _to_src_col(
223223
else:
224224
# select unqualified column
225225
source = _to_src_col(src_col, None)
226+
setattr(source, "has_qualifier", False)
226227
for table in set(alias_mapping.values()):
227228
# in case of only one table, we get the right answer
228229
# in case of multiple tables, a bunch of possible tables are set

sqllineage/core/parser/__init__.py

+12-8
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,15 @@ def end_of_query_cleanup(self, holder: SubQueryLineageHolder) -> None:
2727
lateral_aliases = set()
2828
for idx, tgt_col in enumerate(col_grp):
2929
tgt_col.parent = tgt_tbl
30-
for lateral_alias_ref in col_grp[idx + 1 :]: # noqa: E203
31-
if any(
32-
src_col[0] == tgt_col.raw_name
33-
for src_col in lateral_alias_ref.source_columns
34-
):
35-
lateral_aliases.add(tgt_col.raw_name)
36-
break
30+
if hasattr(tgt_col, "has_alias") and tgt_col.has_alias is True:
31+
for lateral_alias_ref in col_grp[idx + 1 :]: # noqa: E203
32+
if any(
33+
src_col[0] == tgt_col.raw_name
34+
for src_col in lateral_alias_ref.source_columns
35+
if src_col[1] is None
36+
):
37+
lateral_aliases.add(tgt_col.raw_name)
38+
break
3739
for src_col in tgt_col.to_source_columns(
3840
holder.get_alias_mapping_from_table_group(tbl_grp)
3941
):
@@ -49,7 +51,9 @@ def end_of_query_cleanup(self, holder: SubQueryLineageHolder) -> None:
4951
if wc.raw_name == "*":
5052
continue
5153
if (
52-
src_col.raw_name == wc.raw_name
54+
hasattr(src_col, "has_qualifier")
55+
and src_col.has_qualifier is False
56+
and src_col.raw_name == wc.raw_name
5357
and src_col.raw_name in lateral_aliases
5458
):
5559
is_lateral_alias_ref = True

sqllineage/core/parser/sqlfluff/models.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -105,10 +105,12 @@ def of(column: BaseSegment, **kwargs) -> Column:
105105
if column.type == "select_clause_element":
106106
source_columns, alias = SqlFluffColumn._get_column_and_alias(column)
107107
if alias:
108-
return Column(
108+
alias_column = Column(
109109
alias,
110110
source_columns=source_columns,
111111
)
112+
setattr(alias_column, "has_alias", True)
113+
return alias_column
112114
if source_columns:
113115
column_name = None
114116
for sub_segment in list_child_segments(column):

tests/sql/column/test_column_select_lateral_alias_ref.py

+33
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,37 @@ def test_column_top_level_lateral_ref():
2727
ColumnQualifierTuple("id", "public.tgt_tbl1"),
2828
),
2929
],
30+
test_sqlparse=False,
31+
)
32+
sql = """
33+
insert into public.tgt_tbl1
34+
(
35+
name,
36+
email
37+
)
38+
select
39+
st1.name,
40+
st1.name || st1.email || '@gmail.com' as email
41+
from
42+
public.src_tbl1 as st1
43+
"""
44+
assert_column_lineage_equal(
45+
sql,
46+
[
47+
(
48+
ColumnQualifierTuple("name", "public.src_tbl1"),
49+
ColumnQualifierTuple("name", "public.tgt_tbl1"),
50+
),
51+
(
52+
ColumnQualifierTuple("name", "public.src_tbl1"),
53+
ColumnQualifierTuple("email", "public.tgt_tbl1"),
54+
),
55+
(
56+
ColumnQualifierTuple("email", "public.src_tbl1"),
57+
ColumnQualifierTuple("email", "public.tgt_tbl1"),
58+
),
59+
],
60+
test_sqlparse=False,
3061
)
3162

3263

@@ -60,6 +91,7 @@ def test_column_lateral_ref_within_subquery():
6091
ColumnQualifierTuple("name", "public.tgt_tbl1"),
6192
),
6293
],
94+
test_sqlparse=False,
6395
)
6496

6597
sql = """
@@ -95,4 +127,5 @@ def test_column_lateral_ref_within_subquery():
95127
ColumnQualifierTuple("name", "public.tgt_tbl1"),
96128
),
97129
],
130+
test_sqlparse=False,
98131
)

0 commit comments

Comments
 (0)