diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala index 02b0661eb3dd7..cc148d9e247f6 100644 --- a/project/MimaExcludes.scala +++ b/project/MimaExcludes.scala @@ -53,6 +53,8 @@ object MimaExcludes { ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.util.ExecutionListenerManager.this"), // [SPARK-37786][SQL] StreamingQueryListener support use SQLConf.get to get corresponding SessionState's SQLConf ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.streaming.StreamingQueryManager.this"), + // [SPARK-38432][SQL] Reactor framework so as JDBC dialect could compile filter by self way + ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.sources.Filter.toV2"), // [SPARK-37600][BUILD] Upgrade to Hadoop 3.3.2 ProblemFilters.exclude[MissingClassProblem]("org.apache.hadoop.shaded.net.jpountz.lz4.LZ4Compressor"), diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Expression.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Expression.java index 9f6c0975ae0e1..76dfe73f666cf 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Expression.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Expression.java @@ -17,6 +17,8 @@ package org.apache.spark.sql.connector.expressions; +import java.util.Arrays; + import org.apache.spark.annotation.Evolving; /** @@ -26,8 +28,23 @@ */ @Evolving public interface Expression { + Expression[] EMPTY_EXPRESSION = new Expression[0]; + /** * Format the expression as a human readable SQL-like string. */ default String describe() { return this.toString(); } + + /** + * Returns an array of the children of this node. Children should not change. + */ + Expression[] children(); + + /** + * List of fields or columns that are referenced by this expression. + */ + default NamedReference[] references() { + return Arrays.stream(children()).map(e -> e.references()) + .flatMap(Arrays::stream).distinct().toArray(NamedReference[]::new); + } } diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/GeneralScalarExpression.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/GeneralScalarExpression.java index b3dd2cbfe3d7d..8952761f9ef34 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/GeneralScalarExpression.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/GeneralScalarExpression.java @@ -19,77 +19,19 @@ import java.io.Serializable; import java.util.Arrays; +import java.util.Objects; import org.apache.spark.annotation.Evolving; +import org.apache.spark.sql.connector.expressions.filter.Predicate; import org.apache.spark.sql.connector.util.V2ExpressionSQLBuilder; -// scalastyle:off line.size.limit /** * The general representation of SQL scalar expressions, which contains the upper-cased - * expression name and all the children expressions. + * expression name and all the children expressions. Please also see {@link Predicate} + * for the supported predicate expressions. *
* The currently supported SQL scalar expressions: *
IS_NULL
- * expr IS NULLIS_NOT_NULL
- * expr IS NOT NULL=
- * expr1 = expr2!=
- * expr1 != expr2<>
- * expr1 <> expr2<=>
- * expr1 <=> expr2<
- * expr1 < expr2<=
- * expr1 <= expr2>
- * expr1 > expr2>=
- * expr1 >= expr2+
* expr1 + expr2AND
- * expr1 AND expr2OR
- * expr1 OR expr2NOT
- * NOT expr~
* ~ expr+ * The currently supported predicate expressions: + *
IS_NULL
+ * expr IS NULLIS_NOT_NULL
+ * expr IS NOT NULLSTARTS_WITH
+ * expr1 LIKE 'expr2%'ENDS_WITH
+ * expr1 LIKE '%expr2'CONTAINS
+ * expr1 LIKE '%expr2%'IN
+ * expr IN (expr1, expr2, ...)=
+ * expr1 = expr2<>
+ * expr1 <> expr2<=>
+ * expr1 = expr2<
+ * expr1 < expr2<=
+ * expr1 <= expr2>
+ * expr1 > expr2>=
+ * expr1 >= expr2AND
+ * expr1 AND expr2OR
+ * expr1 OR expr2NOT
+ * NOT exprALWAYS_TRUE
+ * TRUEALWAYS_FALSE
+ * FALSE- * Rows should be returned from the data source if and only if all of the filters match. That is, - * filters must be interpreted as ANDed together. + * Rows should be returned from the data source if and only if all of the predicates match. + * That is, predicates must be interpreted as ANDed together. */ - Filter[] pushFilters(Filter[] filters); + Predicate[] pushPredicates(Predicate[] predicates); /** - * Returns the filters that are pushed to the data source via {@link #pushFilters(Filter[])}. + * Returns the predicates that are pushed to the data source via + * {@link #pushPredicates(Predicate[])}. *
- * There are 3 kinds of filters: + * There are 3 kinds of predicates: *
- * Both case 1 and 2 should be considered as pushed filters and should be returned by this method. + * Both case 1 and 2 should be considered as pushed predicates and should be returned + * by this method. *
- * It's possible that there is no filters in the query and {@link #pushFilters(Filter[])}
- * is never called, empty array should be returned for this case.
+ * It's possible that there is no predicates in the query and
+ * {@link #pushPredicates(Predicate[])} is never called,
+ * empty array should be returned for this case.
*/
- Filter[] pushedFilters();
+ Predicate[] pushedPredicates();
}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java
index 0af0d88b0f622..91dae749f974b 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java
@@ -17,39 +17,53 @@
package org.apache.spark.sql.connector.util;
-import java.util.ArrayList;
+import java.util.Arrays;
import java.util.List;
+import java.util.stream.Collectors;
import org.apache.spark.sql.connector.expressions.Expression;
-import org.apache.spark.sql.connector.expressions.FieldReference;
+import org.apache.spark.sql.connector.expressions.NamedReference;
import org.apache.spark.sql.connector.expressions.GeneralScalarExpression;
-import org.apache.spark.sql.connector.expressions.LiteralValue;
+import org.apache.spark.sql.connector.expressions.Literal;
/**
* The builder to generate SQL from V2 expressions.
*/
public class V2ExpressionSQLBuilder {
+
public String build(Expression expr) {
- if (expr instanceof LiteralValue) {
- return visitLiteral((LiteralValue) expr);
- } else if (expr instanceof FieldReference) {
- return visitFieldReference((FieldReference) expr);
+ if (expr instanceof Literal) {
+ return visitLiteral((Literal) expr);
+ } else if (expr instanceof NamedReference) {
+ return visitNamedReference((NamedReference) expr);
} else if (expr instanceof GeneralScalarExpression) {
GeneralScalarExpression e = (GeneralScalarExpression) expr;
String name = e.name();
switch (name) {
+ case "IN": {
+ List