apache · aokolnychyi · Dec 10, 2020 · cloud-fan · Dec 15, 2020 · dongjoon-hyun
diff --git a/...yst/src/main/java/org/apache/spark/sql/connector/distributions/ClusteredDistribution.java b/...yst/src/main/java/org/apache/spark/sql/connector/distributions/ClusteredDistribution.java
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.distributions;
+
+import org.apache.spark.annotation.Experimental;
+import org.apache.spark.sql.connector.expressions.Expression;
+
+/**
+ * A distribution where tuples that share the same values for clustering expressions are co-located
+ * in the same partition.
+ *
+ * @since 3.2.0
+ */
+@Experimental
+public interface ClusteredDistribution extends Distribution {
+  /**
+   * Returns clustering expressions.
+   */
+  Expression[] clustering();
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/distributions/Distribution.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/distributions/Distribution.java
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.distributions;
+
+import org.apache.spark.annotation.Experimental;
+
+/**
+ * An interface that defines how data is distributed across partitions.
+ *
+ * @since 3.2.0
+ */
+@Experimental
+public interface Distribution {}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/distributions/Distributions.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/distributions/Distributions.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.distributions;
+
+import org.apache.spark.annotation.Experimental;
+import org.apache.spark.sql.connector.expressions.Expression;
+import org.apache.spark.sql.connector.expressions.SortOrder;
+
+/**
+ * Helper methods to create distributions to pass into Spark.
+ *
+ * @since 3.2.0
+ */
+@Experimental
+public class Distributions {
+  private Distributions() {
+  }
+
+  /**
+   * Creates a distribution where no promises are made about co-location of data.
+   */
+  public static UnspecifiedDistribution unspecified() {
+    return LogicalDistributions.unspecified();
+  }
+
+  /**
+   * Creates a distribution where tuples that share the same values for clustering expressions are
+   * co-located in the same partition.
+   */
+  public static ClusteredDistribution clustered(Expression[] clustering) {
+    return LogicalDistributions.clustered(clustering);
+  }
+
+  /**
+   * Creates a distribution where tuples have been ordered across partitions according
+   * to ordering expressions, but not necessarily within a given partition.
+   */
+  public static OrderedDistribution ordered(SortOrder[] ordering) {
+    return LogicalDistributions.ordered(ordering);
+  }
+}
diff --git a/...alyst/src/main/java/org/apache/spark/sql/connector/distributions/OrderedDistribution.java b/...alyst/src/main/java/org/apache/spark/sql/connector/distributions/OrderedDistribution.java
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.distributions;
+
+import org.apache.spark.annotation.Experimental;
+import org.apache.spark.sql.connector.expressions.SortOrder;
+
+/**
+ * A distribution where tuples have been ordered across partitions according
+ * to ordering expressions, but not necessarily within a given partition.
+ *
+ * @since 3.2.0
+ */
+@Experimental
+public interface OrderedDistribution extends Distribution {
+  /**
+   * Returns ordering expressions.
+   */
+  SortOrder[] ordering();
+}
diff --git a/...t/src/main/java/org/apache/spark/sql/connector/distributions/UnspecifiedDistribution.java b/...t/src/main/java/org/apache/spark/sql/connector/distributions/UnspecifiedDistribution.java
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.distributions;
+
+import org.apache.spark.annotation.Experimental;
+
+/**
+ * A distribution where no promises are made about co-location of data.
+ *
+ * @since 3.2.0
+ */
+@Experimental
+public interface UnspecifiedDistribution extends Distribution {}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Expressions.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Expressions.java
@@ -164,4 +164,15 @@ public static Transform hours(String column) {
     return LogicalExpressions.hours(Expressions.column(column));
   }
 
+  /**
+   * Create a sort expression.
+   *
+   * @param expr an expression to produce values to sort
+   * @param direction direction of the sort
+   * @param nullOrder null order of the sort
+   * @return a SortOrder
+   */
+  public static SortOrder sort(Expression expr, SortDirection direction, NullOrdering nullOrder) {
+    return LogicalExpressions.sort(expr, direction, nullOrder);
+  }
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/NullOrdering.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/NullOrdering.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.expressions;
+
+import org.apache.spark.annotation.Experimental;
+
+/**
+ * A null order used in sorting expressions.
+ *
+ * @since 3.2.0
+ */
+@Experimental
+public enum NullOrdering {
+  NULLS_FIRST, NULLS_LAST;
+
+  @Override
+  public String toString() {
+    switch (this) {
+      case NULLS_FIRST:
+        return "NULLS FIRST";
+      case NULLS_LAST:
+        return "NULLS LAST";
+      default:
+        throw new IllegalArgumentException("Unexpected null order: " + this);
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/SortDirection.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/SortDirection.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.expressions;
+
+import org.apache.spark.annotation.Experimental;
+
+/**
+ * A sort direction used in sorting expressions.
+ *
+ * @since 3.2.0
+ */
+@Experimental
+public enum SortDirection {
+  ASCENDING, DESCENDING;
+
+  @Override
+  public String toString() {
+    switch (this) {
+      case ASCENDING:
+        return "ASC";
+      case DESCENDING:
+        return "DESC";
+      default:
+        throw new IllegalArgumentException("Unexpected sort direction: " + this);
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/SortOrder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/SortOrder.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.expressions;
+
+import org.apache.spark.annotation.Experimental;
+
+/**
+ * Represents a sort order in the public expression API.
+ *
+ * @since 3.2.0
+ */
+@Experimental
+public interface SortOrder extends Expression {
+  /**
+   * Returns the sort expression.
+   */
+  Expression expression();
+
+  /**
+   * Returns the sort direction.
+   */
+  SortDirection direction();
+
+  /**
+   * Returns the null ordering.
+   */
+  NullOrdering nullOrdering();
+}
diff --git a/...t/src/main/java/org/apache/spark/sql/connector/write/RequiresDistributionAndOrdering.java b/...t/src/main/java/org/apache/spark/sql/connector/write/RequiresDistributionAndOrdering.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.write;
+
+import org.apache.spark.annotation.Experimental;
+import org.apache.spark.sql.connector.distributions.Distribution;
+import org.apache.spark.sql.connector.distributions.UnspecifiedDistribution;
+import org.apache.spark.sql.connector.expressions.SortOrder;
+
+/**
+ * A write that requires a specific distribution and ordering of data.
+ *
+ * @since 3.2.0
+ */
+@Experimental
+public interface RequiresDistributionAndOrdering extends Write {
+  /**
+   * Returns the distribution required by this write.
+   * <p>
+   * Spark will distribute incoming records across partitions to satisfy the required distribution
+   * before passing the records to the data source table on write.
+   * <p>
+   * Implementations may return {@link UnspecifiedDistribution} if they don't require any specific
+   * distribution of data on write.
+   *
+   * @return the required distribution
+   */
+  Distribution requiredDistribution();
+
+  /**
+   * Returns the ordering required by this write.
+   * <p>
+   * Spark will order incoming records within partitions to satisfy the required ordering
+   * before passing those records to the data source table on write.
+   * <p>
+   * Implementations may return an empty array if they don't require any specific ordering of data
+   * on write.
+   *
+   * @return the required ordering
+   */
+  SortOrder[] requiredOrdering();
+}