Skip to content
11 changes: 10 additions & 1 deletion api/src/main/java/org/apache/iceberg/types/GetProjectedIds.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,17 @@
import org.apache.iceberg.relocated.com.google.common.collect.Sets;

class GetProjectedIds extends TypeUtil.SchemaVisitor<Set<Integer>> {
private final boolean includeStructIds;
private final Set<Integer> fieldIds = Sets.newHashSet();

GetProjectedIds() {
this(false);
}

GetProjectedIds(boolean includeStructIds) {
this.includeStructIds = includeStructIds;
}

@Override
public Set<Integer> schema(Schema schema, Set<Integer> structResult) {
return fieldIds;
Expand All @@ -39,7 +48,7 @@ public Set<Integer> struct(Types.StructType struct, List<Set<Integer>> fieldResu

@Override
public Set<Integer> field(Types.NestedField field, Set<Integer> fieldResult) {
if (fieldResult == null) {
if ((includeStructIds && field.type().isStructType()) || field.type().isPrimitiveType()) {
fieldIds.add(field.fieldId());
}
return fieldIds;
Expand Down
102 changes: 86 additions & 16 deletions api/src/main/java/org/apache/iceberg/types/PruneColumns.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,26 @@
import org.apache.iceberg.Schema;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.types.Types.ListType;
import org.apache.iceberg.types.Types.MapType;
import org.apache.iceberg.types.Types.StructType;

class PruneColumns extends TypeUtil.SchemaVisitor<Type> {
private final Set<Integer> selected;
private final boolean selectFullTypes;

PruneColumns(Set<Integer> selected) {
/**
* Visits a schema and returns only the fields selected by the id set.
* <p>
* When selectFullTypes is false selecting list or map types is undefined and forbidden.
*
* @param selected ids of elements to return
* @param selectFullTypes whether to select all subfields of a selected nested type
*/
PruneColumns(Set<Integer> selected, boolean selectFullTypes) {
Preconditions.checkNotNull(selected, "Selected field ids cannot be null");
this.selected = selected;
this.selectFullTypes = selectFullTypes;
}

@Override
Expand Down Expand Up @@ -77,10 +90,19 @@ public Type struct(Types.StructType struct, List<Type> fieldResults) {
@Override
public Type field(Types.NestedField field, Type fieldResult) {
if (selected.contains(field.fieldId())) {
return field.type();
if (selectFullTypes) {
return field.type();
} else if (field.type().isStructType()) {
return projectSelectedStruct(fieldResult);
} else {
Preconditions.checkArgument(!field.type().isNestedType(),
"Cannot explicitly project List or Map types, %s:%s of type %s was selected",
field.fieldId(), field.name(), field.type());
// Selected non-struct field
return field.type();
}
} else if (fieldResult != null) {
// this isn't necessarily the same as field.type() because a struct may not have all
// fields selected.
// This field wasn't selected but a subfield was so include that
return fieldResult;
}
return null;
Expand All @@ -89,31 +111,39 @@ public Type field(Types.NestedField field, Type fieldResult) {
@Override
public Type list(Types.ListType list, Type elementResult) {
if (selected.contains(list.elementId())) {
return list;
} else if (elementResult != null) {
if (list.elementType() == elementResult) {
if (selectFullTypes) {
return list;
} else if (list.isElementOptional()) {
return Types.ListType.ofOptional(list.elementId(), elementResult);
} else if (list.elementType().isStructType()) {
StructType projectedStruct = projectSelectedStruct(elementResult);
return projectList(list, projectedStruct);
} else {
return Types.ListType.ofRequired(list.elementId(), elementResult);
Preconditions.checkArgument(list.elementType().isPrimitiveType(),
"Cannot explicitly project List or Map types, List element %s of type %s was selected",
list.elementId(), list.elementType());
return list;
}
} else if (elementResult != null) {
return projectList(list, elementResult);
}
return null;
}

@Override
public Type map(Types.MapType map, Type ignored, Type valueResult) {
if (selected.contains(map.valueId())) {
return map;
} else if (valueResult != null) {
if (map.valueType() == valueResult) {
if (selectFullTypes) {
return map;
} else if (map.isValueOptional()) {
return Types.MapType.ofOptional(map.keyId(), map.valueId(), map.keyType(), valueResult);
} else if (map.valueType().isStructType()) {
Type projectedStruct = projectSelectedStruct(valueResult);
return projectMap(map, projectedStruct);
} else {
return Types.MapType.ofRequired(map.keyId(), map.valueId(), map.keyType(), valueResult);
Preconditions.checkArgument(map.valueType().isPrimitiveType(),
"Cannot explicitly project List or Map types, Map value %s of type %s was selected",
map.valueId(), map.valueType());
return map;
}
} else if (valueResult != null) {
return projectMap(map, valueResult);
} else if (selected.contains(map.keyId())) {
// right now, maps can't be selected without values
return map;
Expand All @@ -125,4 +155,44 @@ public Type map(Types.MapType map, Type ignored, Type valueResult) {
public Type primitive(Type.PrimitiveType primitive) {
return null;
}

private ListType projectList(ListType list, Type elementResult) {
Preconditions.checkArgument(elementResult != null, "Cannot project a list when the element result is null");
if (list.elementType() == elementResult) {
return list;
} else if (list.isElementOptional()) {
return Types.ListType.ofOptional(list.elementId(), elementResult);
} else {
return Types.ListType.ofRequired(list.elementId(), elementResult);
}
}

private MapType projectMap(MapType map, Type valueResult) {
Preconditions.checkArgument(valueResult != null, "Attempted to project a map without a defined map value type");
if (map.valueType() == valueResult) {
return map;
} else if (map.isValueOptional()) {
return Types.MapType.ofOptional(map.keyId(), map.valueId(), map.keyType(), valueResult);
} else {
return Types.MapType.ofRequired(map.keyId(), map.valueId(), map.keyType(), valueResult);
}
}

/**
* If select full types is disabled we need to recreate the struct with only the selected
* subfields. If no subfields are selected we return an empty struct.
* @param projectedField subfields already selected in this projection
* @return projected struct
*/
private StructType projectSelectedStruct(Type projectedField) {
Preconditions.checkArgument(projectedField == null || projectedField.isStructType());
// the struct was selected, ensure at least an empty struct is returned
if (projectedField == null) {
// no sub-fields were selected but the struct was, return an empty struct
return Types.StructType.of();
} else {
// sub-fields were selected so return the projected struct
return projectedField.asStructType();
}
}
}
61 changes: 51 additions & 10 deletions api/src/main/java/org/apache/iceberg/types/TypeUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

package org.apache.iceberg.types;

import java.util.Collections;
import java.util.List;
import java.util.Locale;
import java.util.Map;
Expand All @@ -42,6 +43,46 @@ public class TypeUtil {
private TypeUtil() {
}

/**
* Project extracts particular fields from a schema by ID.
* <p>
* Unlike {@link TypeUtil#select(Schema, Set)}, project will pick out only the fields enumerated. Structs that are
* explicitly projected are empty unless sub-fields are explicitly projected. Maps and lists cannot be explicitly
* selected in fieldIds.
* @param schema to project fields from
* @param fieldIds list of explicit fields to extract
* @return the schema with all fields fields not selected removed
*/
public static Schema project(Schema schema, Set<Integer> fieldIds) {
Preconditions.checkNotNull(schema, "Schema cannot be null");

Types.StructType result = project(schema.asStruct(), fieldIds);
if (schema.asStruct().equals(result)) {
return schema;
} else if (result != null) {
if (schema.getAliases() != null) {
return new Schema(result.fields(), schema.getAliases());
} else {
return new Schema(result.fields());
}
}
return new Schema(Collections.emptyList(), schema.getAliases());
}

public static Types.StructType project(Types.StructType struct, Set<Integer> fieldIds) {
Preconditions.checkNotNull(struct, "Struct cannot be null");
Preconditions.checkNotNull(fieldIds, "Field ids cannot be null");

Type result = visit(struct, new PruneColumns(fieldIds, false));
if (struct.equals(result)) {
return struct;
} else if (result != null) {
return result.asStructType();
}

return Types.StructType.of();
}

public static Schema select(Schema schema, Set<Integer> fieldIds) {
Preconditions.checkNotNull(schema, "Schema cannot be null");

Expand All @@ -63,8 +104,8 @@ public static Types.StructType select(Types.StructType struct, Set<Integer> fiel
Preconditions.checkNotNull(struct, "Struct cannot be null");
Preconditions.checkNotNull(fieldIds, "Field ids cannot be null");

Type result = visit(struct, new PruneColumns(fieldIds));
if (struct == result) {
Type result = visit(struct, new PruneColumns(fieldIds, true));
if (struct.equals(result)) {
return struct;
} else if (result != null) {
return result.asStructType();
Expand All @@ -74,30 +115,30 @@ public static Types.StructType select(Types.StructType struct, Set<Integer> fiel
}

public static Set<Integer> getProjectedIds(Schema schema) {
return ImmutableSet.copyOf(getIdsInternal(schema.asStruct()));
return ImmutableSet.copyOf(getIdsInternal(schema.asStruct(), true));
}

public static Set<Integer> getProjectedIds(Type type) {
if (type.isPrimitiveType()) {
return ImmutableSet.of();
}
return ImmutableSet.copyOf(getIdsInternal(type));
return ImmutableSet.copyOf(getIdsInternal(type, true));
}

private static Set<Integer> getIdsInternal(Type type) {
return visit(type, new GetProjectedIds());
private static Set<Integer> getIdsInternal(Type type, boolean includeStructIds) {
return visit(type, new GetProjectedIds(includeStructIds));
}

public static Types.StructType selectNot(Types.StructType struct, Set<Integer> fieldIds) {
Set<Integer> projectedIds = getIdsInternal(struct);
Set<Integer> projectedIds = getIdsInternal(struct, false);
projectedIds.removeAll(fieldIds);
return select(struct, projectedIds);
return project(struct, projectedIds);
}

public static Schema selectNot(Schema schema, Set<Integer> fieldIds) {
Set<Integer> projectedIds = getIdsInternal(schema.asStruct());
Set<Integer> projectedIds = getIdsInternal(schema.asStruct(), false);
projectedIds.removeAll(fieldIds);
return select(schema, projectedIds);
return project(schema, projectedIds);
}

public static Schema join(Schema left, Schema right) {
Expand Down
41 changes: 37 additions & 4 deletions api/src/main/java/org/apache/iceberg/util/StructProjection.java
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ public class StructProjection implements StructLike {
*/
public static StructProjection create(Schema schema, Set<Integer> ids) {
StructType structType = schema.asStruct();
return new StructProjection(structType, TypeUtil.select(structType, ids));
return new StructProjection(structType, TypeUtil.project(structType, ids));
}

/**
Expand All @@ -58,12 +58,30 @@ public static StructProjection create(Schema dataSchema, Schema projectedSchema)
return new StructProjection(dataSchema.asStruct(), projectedSchema.asStruct());
}

/**
* Creates a projecting wrapper for {@link StructLike} rows.
* <p>
* This projection allows missing fields and does not work with repeated types like lists and maps.
*
* @param structType type of rows wrapped by this projection
* @param projectedStructType result type of the projected rows
* @return a wrapper to project rows
*/
public static StructProjection createAllowMissing(StructType structType, StructType projectedStructType) {
return new StructProjection(structType, projectedStructType, true);
}

private final StructType type;
private final int[] positionMap;
private final StructProjection[] nestedProjections;
private StructLike struct;

private StructProjection(StructType structType, StructType projection) {
this(structType, projection, false);
}

@SuppressWarnings("checkstyle:CyclomaticComplexity")
private StructProjection(StructType structType, StructType projection, boolean allowMissing) {
this.type = projection;
this.positionMap = new int[projection.fields().size()];
this.nestedProjections = new StructProjection[projection.fields().size()];
Expand Down Expand Up @@ -116,7 +134,10 @@ private StructProjection(StructType structType, StructType projection) {
}
}

if (!found) {
if (!found && projectedField.isOptional() && allowMissing) {
positionMap[pos] = -1;
nestedProjections[pos] = null;
} else if (!found) {
throw new IllegalArgumentException(String.format("Cannot find field %s in %s", projectedField, structType));
}
}
Expand All @@ -134,11 +155,23 @@ public int size() {

@Override
public <T> T get(int pos, Class<T> javaClass) {
if (struct == null) {
// Return a null struct when projecting a nested required field from an optional struct.
// See more details in issue #2738.
return null;
}

int structPos = positionMap[pos];

if (nestedProjections[pos] != null) {
return javaClass.cast(nestedProjections[pos].wrap(struct.get(positionMap[pos], StructLike.class)));
return javaClass.cast(nestedProjections[pos].wrap(struct.get(structPos, StructLike.class)));
}

return struct.get(positionMap[pos], javaClass);
if (structPos != -1) {
return struct.get(structPos, javaClass);
} else {
return null;
}
}

@Override
Expand Down
Loading