Skip to content

Commit a4b6957

Browse files
committed
[SPARK-39492] Rework MISSING_COLUMN
1 parent e841fa3 commit a4b6957

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+181
-158
lines changed

core/src/main/resources/error/error-classes.json

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -251,12 +251,6 @@
251251
"Key <keyValue> does not exist. Use `try_element_at` to tolerate non-existent key and return NULL instead. If necessary set <config> to \"false\" to bypass this error."
252252
]
253253
},
254-
"MISSING_COLUMN" : {
255-
"message" : [
256-
"Column '<columnName>' does not exist. Did you mean one of the following? [<proposal>]"
257-
],
258-
"sqlState" : "42000"
259-
},
260254
"MISSING_STATIC_PARTITION_COLUMN" : {
261255
"message" : [
262256
"Unknown static partition column: <columnName>"
@@ -352,6 +346,12 @@
352346
],
353347
"sqlState" : "42000"
354348
},
349+
"UNRESOLVED_COLUMN" : {
350+
"message" : [
351+
"A column or function parameter with name <objectName> cannot be resolved. Did you mean one of the following? [<objectList>]"
352+
],
353+
"sqlState" : "42000"
354+
},
355355
"UNSUPPORTED_DATATYPE" : {
356356
"message" : [
357357
"Unsupported data type <typeName>"

core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ class SparkThrowableSuite extends SparkFunSuite {
160160
test("Check if message parameters match message format") {
161161
// Requires 2 args
162162
intercept[IllegalFormatException] {
163-
getMessage("MISSING_COLUMN", null, Array.empty)
163+
getMessage("UNRESOLVED_COLUMN", null, Array.empty)
164164
}
165165

166166
// Does not fail with too many args (expects 0 args)
@@ -172,8 +172,9 @@ class SparkThrowableSuite extends SparkFunSuite {
172172
}
173173

174174
test("Error message is formatted") {
175-
assert(getMessage("MISSING_COLUMN", null, Array("foo", "bar, baz")) ==
176-
"[MISSING_COLUMN] Column 'foo' does not exist. Did you mean one of the following? [bar, baz]")
175+
assert(getMessage("UNRESOLVED_COLUMN", null, Array("`foo`", "`bar`, `baz`")) ==
176+
"[UNRESOLVED_COLUMN] A column or function parameter with name `foo` cannot be resolved. " +
177+
"Did you mean one of the following? [`bar`, `baz`]")
177178
}
178179

179180
test("Try catching legacy SparkError") {

python/pyspark/pandas/tests/test_indexops_spark.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,11 @@ def test_series_transform_negative(self):
3939
):
4040
self.psser.spark.transform(lambda scol: 1)
4141

42-
with self.assertRaisesRegex(AnalysisException, "Column.*non-existent.*does not exist"):
42+
with self.assertRaisesRegex(
43+
AnalysisException,
44+
"[UNRESOLVED_COLUMN] A column or function parameter with name `non-existent` " +
45+
"cannot be resolved. Did you mean one of the following? [`__index_level_0__`, " +
46+
"`x`, `__natural_order__`]"):
4347
self.psser.spark.transform(lambda scol: F.col("non-existent"))
4448

4549
def test_multiindex_transform_negative(self):

python/pyspark/sql/tests/test_utils.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,9 @@ def test_capture_user_friendly_exception(self):
3636
try:
3737
self.spark.sql("select `中文字段`")
3838
except AnalysisException as e:
39-
self.assertRegex(str(e), "Column '`中文字段`' does not exist")
39+
self.assertRegex(str(e),
40+
"[UNRESOLVED_COLUMN] A column or function parameter with name " +
41+
"`中文字段` cannot be resolved. Did you mean one of the following? []")
4042

4143
def test_spark_upgrade_exception(self):
4244
# SPARK-32161 : Test case to Handle SparkUpgradeException in pythonic way
@@ -72,7 +74,7 @@ def test_get_error_class_state(self):
7274
try:
7375
self.spark.sql("""SELECT a""")
7476
except AnalysisException as e:
75-
self.assertEquals(e.getErrorClass(), "MISSING_COLUMN")
77+
self.assertEquals(e.getErrorClass(), "UNRESOLVED_COLUMN")
7678
self.assertEquals(e.getSqlState(), "42000")
7779

7880

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3419,10 +3419,9 @@ class Analyzer(override val catalogManager: CatalogManager)
34193419
i.userSpecifiedCols, "in the column list", resolver)
34203420

34213421
i.userSpecifiedCols.map { col =>
3422-
i.table.resolve(Seq(col), resolver)
3423-
.getOrElse(i.failAnalysis(
3424-
errorClass = "MISSING_COLUMN",
3425-
messageParameters = Array(col, i.table.output.map(_.name).mkString(", "))))
3422+
i.table.resolve(Seq(col), resolver).getOrElse(
3423+
throw QueryCompilationErrors.unresolvedColumnError(
3424+
col, i.table.output.map(_.name), i.origin))
34263425
}
34273426
}
34283427

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -179,9 +179,8 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
179179
val missingCol = a.sql
180180
val candidates = operator.inputSet.toSeq.map(_.qualifiedName)
181181
val orderedCandidates = StringUtils.orderStringsBySimilarity(missingCol, candidates)
182-
a.failAnalysis(
183-
errorClass = "MISSING_COLUMN",
184-
messageParameters = Array(missingCol, orderedCandidates.mkString(", ")))
182+
throw QueryCompilationErrors.unresolvedColumnError(
183+
missingCol, orderedCandidates, a.origin)
185184

186185
case s: Star =>
187186
withPosition(s) {

sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,15 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase {
144144
s"side of the join. The $side-side columns: [${plan.output.map(_.name).mkString(", ")}]")
145145
}
146146

147+
def unresolvedColumnError(
148+
colName: String, candidates: Seq[String], origin: Origin): Throwable = {
149+
val candidateIds = candidates.map(candidate => toSQLId(candidate))
150+
new AnalysisException(
151+
errorClass = "UNRESOLVED_COLUMN",
152+
messageParameters = Array(toSQLId(colName), candidateIds.mkString(", ")),
153+
origin = origin)
154+
}
155+
147156
def dataTypeMismatchForDeserializerError(
148157
dataType: DataType, desiredType: String): Throwable = {
149158
new AnalysisException(

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -297,16 +297,16 @@ class AnalysisErrorSuite extends AnalysisTest {
297297
errorClassTest(
298298
"unresolved attributes",
299299
testRelation.select($"abcd"),
300-
"MISSING_COLUMN",
301-
Array("abcd", "a"))
300+
"UNRESOLVED_COLUMN",
301+
Array("`abcd`", "`a`"))
302302

303303
errorClassTest(
304304
"unresolved attributes with a generated name",
305305
testRelation2.groupBy($"a")(max($"b"))
306306
.where(sum($"b") > 0)
307307
.orderBy($"havingCondition".asc),
308-
"MISSING_COLUMN",
309-
Array("havingCondition", "max(b)"))
308+
"UNRESOLVED_COLUMN",
309+
Array("`havingCondition`", "`max(b)`"))
310310

311311
errorTest(
312312
"unresolved star expansion in max",
@@ -321,8 +321,8 @@ class AnalysisErrorSuite extends AnalysisTest {
321321
errorClassTest(
322322
"sorting by attributes are not from grouping expressions",
323323
testRelation2.groupBy($"a", $"c")($"a", $"c", count($"a").as("a3")).orderBy($"b".asc),
324-
"MISSING_COLUMN",
325-
Array("b", "a, c, a3"))
324+
"UNRESOLVED_COLUMN",
325+
Array("`b`", "`a`, `c`, `a3`"))
326326

327327
errorTest(
328328
"non-boolean filters",
@@ -415,8 +415,8 @@ class AnalysisErrorSuite extends AnalysisTest {
415415
"SPARK-9955: correct error message for aggregate",
416416
// When parse SQL string, we will wrap aggregate expressions with UnresolvedAlias.
417417
testRelation2.where($"bad_column" > 1).groupBy($"a")(UnresolvedAlias(max($"b"))),
418-
"MISSING_COLUMN",
419-
Array("bad_column", "a, b, c, d, e"))
418+
"UNRESOLVED_COLUMN",
419+
Array("`bad_column`", "`a`, `b`, `c`, `d`, `e`"))
420420

421421
errorTest(
422422
"slide duration greater than window in time window",
@@ -836,7 +836,8 @@ class AnalysisErrorSuite extends AnalysisTest {
836836
errorTest(
837837
"SPARK-34920: error code to error message",
838838
testRelation2.where($"bad_column" > 1).groupBy($"a")(UnresolvedAlias(max($"b"))),
839-
"Column 'bad_column' does not exist. Did you mean one of the following? [a, b, c, d, e]"
839+
"[UNRESOLVED_COLUMN] A column or function parameter with name `bad_column` cannot be " +
840+
"resolved. Did you mean one of the following? [`a`, `b`, `c`, `d`, `e`]"
840841
:: Nil)
841842

842843
test("SPARK-35080: Unsupported correlated equality predicates in subquery") {

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -103,8 +103,8 @@ class AnalysisSuite extends AnalysisTest with Matchers {
103103
assertAnalysisErrorClass(
104104
Project(Seq(UnresolvedAttribute("tBl.a")),
105105
SubqueryAlias("TbL", UnresolvedRelation(TableIdentifier("TaBlE")))),
106-
"MISSING_COLUMN",
107-
Array("tBl.a", "TbL.a"))
106+
"UNRESOLVED_COLUMN",
107+
Array("`tBl`.`a`", "`TbL`.`a`"))
108108

109109
checkAnalysisWithoutViewWrapper(
110110
Project(Seq(UnresolvedAttribute("TbL.a")),
@@ -711,8 +711,8 @@ class AnalysisSuite extends AnalysisTest with Matchers {
711711

712712
test("CTE with non-existing column alias") {
713713
assertAnalysisErrorClass(parsePlan("WITH t(x) AS (SELECT 1) SELECT * FROM t WHERE y = 1"),
714-
"MISSING_COLUMN",
715-
Array("y", "t.x"))
714+
"UNRESOLVED_COLUMN",
715+
Array("`y`", "`t`.`x`"))
716716
}
717717

718718
test("CTE with non-matching column alias") {
@@ -1149,8 +1149,8 @@ class AnalysisSuite extends AnalysisTest with Matchers {
11491149
|GROUP BY c.x
11501150
|ORDER BY c.x + c.y
11511151
|""".stripMargin),
1152-
"MISSING_COLUMN",
1153-
Array("c.y", "x"))
1152+
"UNRESOLVED_COLUMN",
1153+
Array("`c`.`y`", "`x`"))
11541154
}
11551155

11561156
test("SPARK-38118: Func(wrong_type) in the HAVING clause should throw data mismatch error") {

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -133,34 +133,34 @@ class ResolveSubquerySuite extends AnalysisTest {
133133
// TODO: support accessing columns from outer outer query.
134134
assertAnalysisErrorClass(
135135
lateralJoin(t1, lateralJoin(t2, t0.select($"a", $"b", $"c"))),
136-
"MISSING_COLUMN",
137-
Array("a", ""))
136+
"UNRESOLVED_COLUMN",
137+
Array("`a`", ""))
138138
}
139139

140140
test("lateral subquery with unresolvable attributes") {
141141
// SELECT * FROM t1, LATERAL (SELECT a, c)
142142
assertAnalysisErrorClass(
143143
lateralJoin(t1, t0.select($"a", $"c")),
144-
"MISSING_COLUMN",
145-
Array("c", "")
144+
"UNRESOLVED_COLUMN",
145+
Array("`c`", "")
146146
)
147147
// SELECT * FROM t1, LATERAL (SELECT a, b, c, d FROM t2)
148148
assertAnalysisErrorClass(
149149
lateralJoin(t1, t2.select($"a", $"b", $"c", $"d")),
150-
"MISSING_COLUMN",
151-
Array("d", "b, c")
150+
"UNRESOLVED_COLUMN",
151+
Array("`d`", "`b`, `c`")
152152
)
153153
// SELECT * FROM t1, LATERAL (SELECT * FROM t2, LATERAL (SELECT t1.a))
154154
assertAnalysisErrorClass(
155155
lateralJoin(t1, lateralJoin(t2, t0.select($"t1.a"))),
156-
"MISSING_COLUMN",
157-
Array("t1.a", "")
156+
"UNRESOLVED_COLUMN",
157+
Array("`t1`.`a`", "")
158158
)
159159
// SELECT * FROM t1, LATERAL (SELECT * FROM t2, LATERAL (SELECT a, b))
160160
assertAnalysisErrorClass(
161161
lateralJoin(t1, lateralJoin(t2, t0.select($"a", $"b"))),
162-
"MISSING_COLUMN",
163-
Array("a", "")
162+
"UNRESOLVED_COLUMN",
163+
Array("`a`", "")
164164
)
165165
}
166166

0 commit comments

Comments
 (0)