apache · jackye1995 · Apr 2, 2021 · aokolnychyi · Apr 1, 2021 · openinx
diff --git a/api/src/main/java/org/apache/iceberg/RowKey.java b/api/src/main/java/org/apache/iceberg/RowKey.java
@@ -0,0 +1,178 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg;
+
+import java.io.Serializable;
+import java.util.Set;
+import org.apache.iceberg.exceptions.ValidationException;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
+import org.apache.iceberg.relocated.com.google.common.collect.Sets;
+import org.apache.iceberg.types.Types;
+
+/**
+ * Row key of a table.
+ * <p>
+ * Row key is a definition of table row uniqueness,
+ * similar to the concept of primary key in a relational database system.
+ * A row should be unique in a table based on the values of an unordered set of {@link RowKeyIdentifierField}.
+ * Iceberg itself does not enforce row uniqueness based on this key.
+ * It is leveraged by operations such as streaming upsert.
+ */
+public class RowKey implements Serializable {
+
+  private static final RowKey NOT_IDENTIFIED = new RowKey(new Schema(), Sets.newHashSet());
+
+  private final Schema schema;
+  private final RowKeyIdentifierField[] identifierFields;
+
+  private transient volatile Set<RowKeyIdentifierField> identifierFieldSet;
+
+  private RowKey(Schema schema, Set<RowKeyIdentifierField> identifierFields) {
+    this.schema = schema;
+    this.identifierFields = identifierFields.toArray(new RowKeyIdentifierField[0]);
+  }
+
+  /**
+   * Returns the {@link Schema} referenced by the row key
+   */
+  public Schema schema() {
+    return schema;
+  }
+
+  /**
+   * Return the set of {@link RowKeyIdentifierField} in the row key
+   * <p>
+   * @return the set of fields in the row key
+   */
+  public Set<RowKeyIdentifierField> identifierFields() {
+    return lazyIdentifierFieldSet();
+  }
+
+  private Set<RowKeyIdentifierField> lazyIdentifierFieldSet() {
+    if (identifierFieldSet == null) {
+      synchronized (this) {
+        if (identifierFieldSet == null) {
+          identifierFieldSet = ImmutableSet.copyOf(identifierFields);
+        }
+      }
+    }
+
+    return identifierFieldSet;
+  }
+
+  /**
+   * Returns the default row key that has no field
+   */
+  public static RowKey notIdentified() {
+    return NOT_IDENTIFIED;
+  }
+
+  /**
+   * Returns true if the row key is the default one with no field
+   */
+  public boolean isNotIdentified() {
+    return identifierFields.length < 1;
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (this == other) {
+      return true;
+    } else if (other == null || getClass() != other.getClass()) {
+      return false;
+    }
+
+    RowKey that = (RowKey) other;
+    return identifierFields().equals(that.identifierFields());
+  }
+
+  @Override
+  public int hashCode() {
+    return identifierFields().hashCode();
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder();
+    sb.append("[");
+    for (RowKeyIdentifierField field : identifierFields) {
+      sb.append("\n");
+      sb.append("  ").append(field);
+    }
+    if (identifierFields.length > 0) {
+      sb.append("\n");
+    }
+    sb.append("]");
+    return sb.toString();
+  }
+
+  /**
+   * Creates a new {@link Builder row key builder} for the given {@link Schema}.
+   *
+   * @param schema a schema
+   * @return a row key builder for the given schema.
+   */
+  public static Builder builderFor(Schema schema) {
+    return new Builder(schema);
+  }
+
+  /**
+   * A builder to create valid {@link RowKey row key}.
+   * <p>
+   * Call {@link #builderFor(Schema)} to create a new builder.
+   */
+  public static class Builder {
+    private final Schema schema;
+    private final Set<RowKeyIdentifierField> fields = Sets.newHashSet();
+
+    private Builder(Schema schema) {
+      this.schema = schema;
+    }
+
+    public Builder addField(String name) {
+      Types.NestedField column = schema.findField(name);
+      ValidationException.check(column != null, "Cannot find column with name %s in schema %s", name, schema);
+      return addField(column);
+    }
+
+    public Builder addField(int id) {
+      Types.NestedField column = schema.findField(id);
+      ValidationException.check(column != null, "Cannot find column with ID %s in schema %s", id, schema);
+      return addField(column);
+    }
+
+    private Builder addField(Types.NestedField column) {
+      ValidationException.check(column.isRequired(),
+          "Cannot add column %s to row key because it is not a required column in schema %s", column, schema);
+      ValidationException.check(column.type().isPrimitiveType(),
+          "Cannot add column %s to row key because it is not a primitive data type in schema %s", column, schema);
+      fields.add(new RowKeyIdentifierField(column.fieldId()));
+      return this;
+    }
+
+    public RowKey build() {
+      if (fields.size() == 0) {
+        return NOT_IDENTIFIED;
+      }
+
+      return new RowKey(schema, fields);
+    }
+  }
+}
diff --git a/api/src/main/java/org/apache/iceberg/RowKeyIdentifierField.java b/api/src/main/java/org/apache/iceberg/RowKeyIdentifierField.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg;
+
+import java.io.Serializable;
+import java.util.Objects;
+
+/**
+ * An identifier field in {@link RowKey}
+ * <p>
+ * The field must be:
+ *  1. a required column in the table schema
+ *  2. a primitive type column
+ */
+public class RowKeyIdentifierField implements Serializable {
+
+  private final int sourceId;
+
+  RowKeyIdentifierField(int sourceId) {
+    this.sourceId = sourceId;
+  }
+
+  public int sourceId() {
+    return sourceId;
+  }
+
+  @Override
+  public String toString() {
+    return "(" + sourceId + ")";
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (this == other) {
+      return true;
+    } else if (other == null || getClass() != other.getClass()) {
+      return false;
+    }
+
+    RowKeyIdentifierField that = (RowKeyIdentifierField) other;
+    return sourceId == that.sourceId;
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hash(sourceId);
+  }
+}
diff --git a/api/src/main/java/org/apache/iceberg/Table.java b/api/src/main/java/org/apache/iceberg/Table.java
@@ -88,6 +88,13 @@ default String name() {
    */
   Map<Integer, SortOrder> sortOrders();
 
+  /**
+   * Return the {@link RowKey row key} for this table.
+   *
+   * @return this table's row key.
+   */
+  RowKey rowKey();
+
   /**
    * Return a map of string properties for this table.
    *

diff --git a/api/src/main/java/org/apache/iceberg/Tables.java b/api/src/main/java/org/apache/iceberg/Tables.java
@@ -46,7 +46,17 @@ default Table create(Schema schema,
                        SortOrder order,
                        Map<String, String> properties,
                        String tableIdentifier) {
-    throw new UnsupportedOperationException(this.getClass().getName() + " does not implement create with a sort order");
+    return create(schema, spec, order, RowKey.notIdentified(), properties, tableIdentifier);
+  }
+
+  default Table create(Schema schema,
+                       PartitionSpec spec,
+                       SortOrder order,
+                       RowKey rowKey,
+                       Map<String, String> properties,
+                       String tableIdentifier) {
+    throw new UnsupportedOperationException(this.getClass().getName() +
+        " does not implement create with a sort order and row key");
   }
 
   Table load(String tableIdentifier);

diff --git a/api/src/main/java/org/apache/iceberg/catalog/Catalog.java b/api/src/main/java/org/apache/iceberg/catalog/Catalog.java
@@ -22,6 +22,7 @@
 import java.util.List;
 import java.util.Map;
 import org.apache.iceberg.PartitionSpec;
+import org.apache.iceberg.RowKey;
 import org.apache.iceberg.Schema;
 import org.apache.iceberg.SortOrder;
 import org.apache.iceberg.Table;
@@ -360,6 +361,14 @@ interface TableBuilder {
      */
     TableBuilder withSortOrder(SortOrder sortOrder);
 
+    /**
+     * Sets a row key for the table.
+     *
+     * @param rowKey a row key
+     * @return this for method chaining
+     */
+    TableBuilder withRowKey(RowKey rowKey);
+
     /**
      * Sets a location for the table.
      *

diff --git a/core/src/main/java/org/apache/iceberg/BaseMetadataTable.java b/core/src/main/java/org/apache/iceberg/BaseMetadataTable.java
@@ -37,6 +37,7 @@
 abstract class BaseMetadataTable implements Table, HasTableOperations, Serializable {
   private final PartitionSpec spec = PartitionSpec.unpartitioned();
   private final SortOrder sortOrder = SortOrder.unsorted();
+  private final RowKey rowKey = RowKey.notIdentified();
   private final TableOperations ops;
   private final Table table;
   private final String name;
@@ -108,6 +109,11 @@ public Map<Integer, SortOrder> sortOrders() {
     return ImmutableMap.of(sortOrder.orderId(), sortOrder);
   }
 
+  @Override
+  public RowKey rowKey() {
+    return rowKey;
+  }
+
   @Override
   public Map<String, String> properties() {
     return ImmutableMap.of();

diff --git a/core/src/main/java/org/apache/iceberg/BaseMetastoreCatalog.java b/core/src/main/java/org/apache/iceberg/BaseMetastoreCatalog.java
@@ -160,6 +160,7 @@ protected class BaseMetastoreCatalogTableBuilder implements TableBuilder {
     private final ImmutableMap.Builder<String, String> propertiesBuilder = ImmutableMap.builder();
     private PartitionSpec spec = PartitionSpec.unpartitioned();
     private SortOrder sortOrder = SortOrder.unsorted();
+    private RowKey rowKey = RowKey.notIdentified();
     private String location = null;
 
     public BaseMetastoreCatalogTableBuilder(TableIdentifier identifier, Schema schema) {
@@ -181,6 +182,12 @@ public TableBuilder withSortOrder(SortOrder newSortOrder) {
       return this;
     }
 
+    @Override
+    public TableBuilder withRowKey(RowKey newRowKey) {
+      this.rowKey = newRowKey != null ? newRowKey : RowKey.notIdentified();
+      return this;
+    }
+
     @Override
     public TableBuilder withLocation(String newLocation) {
       this.location = newLocation;
@@ -210,7 +217,8 @@ public Table create() {
 
       String baseLocation = location != null ? location : defaultWarehouseLocation(identifier);
       Map<String, String> properties = propertiesBuilder.build();
-      TableMetadata metadata = TableMetadata.newTableMetadata(schema, spec, sortOrder, baseLocation, properties);
+      TableMetadata metadata = TableMetadata.newTableMetadata(
+          schema, spec, sortOrder, rowKey, baseLocation, properties);
 
       try {
         ops.commit(null, metadata);
@@ -230,7 +238,8 @@ public Transaction createTransaction() {
 
       String baseLocation = location != null ? location : defaultWarehouseLocation(identifier);
       Map<String, String> properties = propertiesBuilder.build();
-      TableMetadata metadata = TableMetadata.newTableMetadata(schema, spec, sortOrder, baseLocation, properties);
+      TableMetadata metadata = TableMetadata.newTableMetadata(
+          schema, spec, sortOrder, rowKey, baseLocation, properties);
       return Transactions.createTableTransaction(identifier.toString(), ops, metadata);
     }
 
@@ -253,10 +262,12 @@ private Transaction newReplaceTableTransaction(boolean orCreate) {
       TableMetadata metadata;
       if (ops.current() != null) {
         String baseLocation = location != null ? location : ops.current().location();
-        metadata = ops.current().buildReplacement(schema, spec, sortOrder, baseLocation, propertiesBuilder.build());
+        metadata = ops.current().buildReplacement(
+            schema, spec, sortOrder, rowKey, baseLocation, propertiesBuilder.build());
       } else {
         String baseLocation = location != null ? location : defaultWarehouseLocation(identifier);
-        metadata = TableMetadata.newTableMetadata(schema, spec, sortOrder, baseLocation, propertiesBuilder.build());
+        metadata = TableMetadata.newTableMetadata(
+            schema, spec, sortOrder, rowKey, baseLocation, propertiesBuilder.build());
       }
 
       if (orCreate) {

diff --git a/core/src/main/java/org/apache/iceberg/BaseTable.java b/core/src/main/java/org/apache/iceberg/BaseTable.java
@@ -89,6 +89,11 @@ public Map<Integer, SortOrder> sortOrders() {
     return ops.current().sortOrdersById();
   }
 
+  @Override
+  public RowKey rowKey() {
+    return ops.current().rowKey();
+  }
+
   @Override
   public Map<String, String> properties() {
     return ops.current().properties();