Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 86 additions & 16 deletions api/src/main/java/org/apache/iceberg/types/PruneColumns.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,26 @@
import org.apache.iceberg.Schema;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.types.Types.ListType;
import org.apache.iceberg.types.Types.MapType;
import org.apache.iceberg.types.Types.StructType;

class PruneColumns extends TypeUtil.SchemaVisitor<Type> {
private final Set<Integer> selected;
private final boolean selectFullTypes;

PruneColumns(Set<Integer> selected) {
/**
* Visits a schema and returns only the fields selected by the id set.
* <p>
* When selectFullTypes is false selecting list or map types is undefined and forbidden.
*
* @param selected ids of elements to return
* @param selectFullTypes whether to select all subfields of a selected nested type
*/
PruneColumns(Set<Integer> selected, boolean selectFullTypes) {
Preconditions.checkNotNull(selected, "Selected field ids cannot be null");
this.selected = selected;
this.selectFullTypes = selectFullTypes;
}

@Override
Expand Down Expand Up @@ -77,10 +90,19 @@ public Type struct(Types.StructType struct, List<Type> fieldResults) {
@Override
public Type field(Types.NestedField field, Type fieldResult) {
if (selected.contains(field.fieldId())) {
return field.type();
if (selectFullTypes) {
return field.type();
} else if (field.type().isStructType()) {
return projectSelectedStruct(fieldResult);
} else {
Preconditions.checkArgument(!field.type().isNestedType(),
"Cannot explicitly project List or Map types, %s:%s of type %s was selected",
field.fieldId(), field.name(), field.type());
// Selected non-struct field
return field.type();
}
} else if (fieldResult != null) {
// this isn't necessarily the same as field.type() because a struct may not have all
// fields selected.
// This field wasn't selected but a subfield was so include that
return fieldResult;
}
return null;
Expand All @@ -89,31 +111,39 @@ public Type field(Types.NestedField field, Type fieldResult) {
@Override
public Type list(Types.ListType list, Type elementResult) {
if (selected.contains(list.elementId())) {
return list;
} else if (elementResult != null) {
if (list.elementType() == elementResult) {
if (selectFullTypes) {
return list;
} else if (list.isElementOptional()) {
return Types.ListType.ofOptional(list.elementId(), elementResult);
} else if (list.elementType().isStructType()) {
StructType projectedStruct = projectSelectedStruct(elementResult);
return projectList(list, projectedStruct);
} else {
return Types.ListType.ofRequired(list.elementId(), elementResult);
Preconditions.checkArgument(list.elementType().isPrimitiveType(),
"Cannot explicitly project List or Map types, List element %s of type %s was selected",
list.elementId(), list.elementType());
return list;
}
} else if (elementResult != null) {
return projectList(list, elementResult);
}
return null;
}

@Override
public Type map(Types.MapType map, Type ignored, Type valueResult) {
if (selected.contains(map.valueId())) {
return map;
} else if (valueResult != null) {
if (map.valueType() == valueResult) {
if (selectFullTypes) {
return map;
} else if (map.isValueOptional()) {
return Types.MapType.ofOptional(map.keyId(), map.valueId(), map.keyType(), valueResult);
} else if (map.valueType().isStructType()) {
Type projectedStruct = projectSelectedStruct(valueResult);
return projectMap(map, projectedStruct);
} else {
return Types.MapType.ofRequired(map.keyId(), map.valueId(), map.keyType(), valueResult);
Preconditions.checkArgument(map.valueType().isPrimitiveType(),
"Cannot explicitly project List or Map types, Map value %s of type %s was selected",
map.valueId(), map.valueType());
return map;
}
} else if (valueResult != null) {
return projectMap(map, valueResult);
} else if (selected.contains(map.keyId())) {
// right now, maps can't be selected without values
return map;
Expand All @@ -125,4 +155,44 @@ public Type map(Types.MapType map, Type ignored, Type valueResult) {
public Type primitive(Type.PrimitiveType primitive) {
return null;
}

private ListType projectList(ListType list, Type elementResult) {
Preconditions.checkArgument(elementResult != null, "Cannot project a list when the element result is null");
if (list.elementType() == elementResult) {
return list;
} else if (list.isElementOptional()) {
return Types.ListType.ofOptional(list.elementId(), elementResult);
} else {
return Types.ListType.ofRequired(list.elementId(), elementResult);
}
}

private MapType projectMap(MapType map, Type valueResult) {
Preconditions.checkArgument(valueResult != null, "Attempted to project a map without a defined map value type");
if (map.valueType() == valueResult) {
return map;
} else if (map.isValueOptional()) {
return Types.MapType.ofOptional(map.keyId(), map.valueId(), map.keyType(), valueResult);
} else {
return Types.MapType.ofRequired(map.keyId(), map.valueId(), map.keyType(), valueResult);
}
}

/**
* If select full types is disabled we need to recreate the struct with only the selected
* subfields. If no subfields are selected we return an empty struct.
* @param projectedField subfields already selected in this projection
* @return projected struct
*/
private StructType projectSelectedStruct(Type projectedField) {
Preconditions.checkArgument(projectedField == null || projectedField.isStructType());
// the struct was selected, ensure at least an empty struct is returned
if (projectedField == null) {
// no sub-fields were selected but the struct was, return an empty struct
return Types.StructType.of();
} else {
// sub-fields were selected so return the projected struct
return projectedField.asStructType();
}
}
}
45 changes: 43 additions & 2 deletions api/src/main/java/org/apache/iceberg/types/TypeUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

package org.apache.iceberg.types;

import java.util.Collections;
import java.util.List;
import java.util.Locale;
import java.util.Map;
Expand All @@ -42,6 +43,46 @@ public class TypeUtil {
private TypeUtil() {
}

/**
* Project extracts particular fields from a schema by ID.
* <p>
* Unlike {@link TypeUtil#select(Schema, Set)}, project will pick out only the fields enumerated. Structs that are
* explicitly projected are empty unless sub-fields are explicitly projected. Maps and lists cannot be explicitly
* selected in fieldIds.
* @param schema to project fields from
* @param fieldIds list of explicit fields to extract
* @return the schema with all fields fields not selected removed
*/
public static Schema project(Schema schema, Set<Integer> fieldIds) {
Preconditions.checkNotNull(schema, "Schema cannot be null");

Types.StructType result = project(schema.asStruct(), fieldIds);
if (schema.asStruct().equals(result)) {
return schema;
} else if (result != null) {
if (schema.getAliases() != null) {
return new Schema(result.fields(), schema.getAliases());
} else {
return new Schema(result.fields());
}
}
return new Schema(Collections.emptyList(), schema.getAliases());
}

public static Types.StructType project(Types.StructType struct, Set<Integer> fieldIds) {
Preconditions.checkNotNull(struct, "Struct cannot be null");
Preconditions.checkNotNull(fieldIds, "Field ids cannot be null");

Type result = visit(struct, new PruneColumns(fieldIds, false));
if (struct.equals(result)) {
return struct;
} else if (result != null) {
return result.asStructType();
}

return Types.StructType.of();
}

public static Schema select(Schema schema, Set<Integer> fieldIds) {
Preconditions.checkNotNull(schema, "Schema cannot be null");

Expand All @@ -63,8 +104,8 @@ public static Types.StructType select(Types.StructType struct, Set<Integer> fiel
Preconditions.checkNotNull(struct, "Struct cannot be null");
Preconditions.checkNotNull(fieldIds, "Field ids cannot be null");

Type result = visit(struct, new PruneColumns(fieldIds));
if (struct == result) {
Type result = visit(struct, new PruneColumns(fieldIds, true));
if (struct.equals(result)) {
return struct;
} else if (result != null) {
return result.asStructType();
Expand Down
Loading