Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 26 additions & 1 deletion api/src/main/java/org/apache/iceberg/Schema.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,17 @@

/**
* The schema of a data table.
* <p>
* Schema ID will only be populated when reading from/writing to table metadata,
* otherwise it will be default to 0.
*/
public class Schema implements Serializable {
private static final Joiner NEWLINE = Joiner.on('\n');
private static final String ALL_COLUMNS = "*";
private static final int DEFAULT_SCHEMA_ID = 0;

private final StructType struct;
private final int schemaId;
private transient BiMap<String, Integer> aliasToId = null;
private transient Map<Integer, NestedField> idToField = null;
private transient Map<String, Integer> nameToId = null;
Expand All @@ -54,6 +59,7 @@ public class Schema implements Serializable {
private transient Map<Integer, String> idToName = null;

public Schema(List<NestedField> columns, Map<String, Integer> aliases) {
this.schemaId = DEFAULT_SCHEMA_ID;
this.struct = StructType.of(columns);
this.aliasToId = aliases != null ? ImmutableBiMap.copyOf(aliases) : null;

Expand All @@ -62,12 +68,21 @@ public Schema(List<NestedField> columns, Map<String, Integer> aliases) {
}

public Schema(List<NestedField> columns) {
this(DEFAULT_SCHEMA_ID, columns);
}

public Schema(int schemaId, List<NestedField> columns) {
this.schemaId = schemaId;
this.struct = StructType.of(columns);
lazyIdToName();
}

public Schema(NestedField... columns) {
this(Arrays.asList(columns));
this(DEFAULT_SCHEMA_ID, Arrays.asList(columns));
}

public Schema(int schemaId, NestedField... columns) {
this(schemaId, Arrays.asList(columns));
}

private Map<Integer, NestedField> lazyIdToField() {
Expand Down Expand Up @@ -105,6 +120,16 @@ private Map<Integer, Accessor<StructLike>> lazyIdToAccessor() {
return idToAccessor;
}

/**
* Returns the schema ID for this schema.
* <p>
* Note that schema ID will only be populated when reading from/writing to table metadata,
* otherwise it will be default to 0.
*/
public int schemaId() {
return this.schemaId;
}

/**
* Returns an alias map for this schema, if set.
* <p>
Expand Down
15 changes: 15 additions & 0 deletions api/src/main/java/org/apache/iceberg/types/TypeUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,21 @@ public static Schema assignFreshIds(Schema schema, NextID nextId) {
.fields());
}

/**
* Assigns fresh ids from the {@link NextID nextId function} for all fields in a schema.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: function should be outside the bracket

Copy link
Contributor Author

@yyanyy yyanyy Feb 5, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm actually I think either way may make sense? We either link nextId function or nextId to NextId interface, and I think both are valid interpretations? I'm also a bit hesitant to change since I copied this from another method, and there are a few others that use this style.

*
* @param schemaId an ID assigned to this schema
* @param schema a schema
* @param nextId an id assignment function
* @return a structurally identical schema with new ids assigned by the nextId function
*/
public static Schema assignFreshIds(int schemaId, Schema schema, NextID nextId) {
return new Schema(schemaId, TypeUtil
.visit(schema.asStruct(), new AssignFreshIds(nextId))
.asNestedType()
.fields());
}

/**
* Assigns ids to match a given schema, and fresh ids from the {@link NextID nextId function} for all other fields.
*
Expand Down
18 changes: 18 additions & 0 deletions api/src/test/java/org/apache/iceberg/TestHelpers.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.stream.IntStream;
import org.apache.iceberg.expressions.BoundPredicate;
import org.apache.iceberg.expressions.BoundSetPredicate;
import org.apache.iceberg.expressions.Expression;
Expand Down Expand Up @@ -84,6 +85,23 @@ public static <T> T roundTripSerialize(T type) throws IOException, ClassNotFound
}
}

public static void assertSameSchemaList(List<Schema> list1, List<Schema> list2) {
if (list1.size() != list2.size()) {
Assert.fail("Should have same number of schemas in both lists");
}

IntStream.range(0, list1.size()).forEach(
index -> {
Schema schema1 = list1.get(index);
Schema schema2 = list2.get(index);
Assert.assertEquals("Should have matching schema id",
schema1.schemaId(), schema2.schemaId());
Assert.assertEquals("Should have matching schema struct",
schema1.asStruct(), schema2.asStruct());
}
);
}

private static class CheckReferencesBound extends ExpressionVisitors.ExpressionVisitor<Void> {
private final String message;

Expand Down
21 changes: 18 additions & 3 deletions core/src/main/java/org/apache/iceberg/SchemaParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ public class SchemaParser {
private SchemaParser() {
}

private static final String SCHEMA_ID = "schema-id";
private static final String TYPE = "type";
private static final String STRUCT = "struct";
private static final String LIST = "list";
Expand All @@ -57,10 +58,18 @@ private SchemaParser() {
private static final String ELEMENT_REQUIRED = "element-required";
private static final String VALUE_REQUIRED = "value-required";

static void toJson(Types.StructType struct, JsonGenerator generator) throws IOException {
private static void toJson(Types.StructType struct, JsonGenerator generator) throws IOException {
toJson(struct, null, generator);
}

private static void toJson(Types.StructType struct, Integer schemaId, JsonGenerator generator) throws IOException {
generator.writeStartObject();

generator.writeStringField(TYPE, STRUCT);
if (schemaId != null) {
generator.writeNumberField(SCHEMA_ID, schemaId);
}

generator.writeArrayFieldStart(FIELDS);
for (Types.NestedField field : struct.fields()) {
generator.writeStartObject();
Expand Down Expand Up @@ -135,7 +144,7 @@ static void toJson(Type type, JsonGenerator generator) throws IOException {
}

public static void toJson(Schema schema, JsonGenerator generator) throws IOException {
toJson(schema.asStruct(), generator);
toJson(schema.asStruct(), schema.schemaId(), generator);
}

public static String toJson(Schema schema) {
Expand Down Expand Up @@ -237,7 +246,13 @@ public static Schema fromJson(JsonNode json) {
Type type = typeFromJson(json);
Preconditions.checkArgument(type.isNestedType() && type.asNestedType().isStructType(),
"Cannot create schema, not a struct type: %s", type);
return new Schema(type.asNestedType().asStructType().fields());
Integer schemaId = JsonUtil.getIntOrNull(SCHEMA_ID, json);

if (schemaId == null) {
return new Schema(type.asNestedType().asStructType().fields());
} else {
return new Schema(schemaId, type.asNestedType().asStructType().fields());
}
}

private static final Cache<String, Schema> SCHEMA_CACHE = Caffeine.newBuilder()
Expand Down
Loading