Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PARQUET-1128: [Java] Upgrade the Apache Arrow version to 0.8.0 for SchemaConverter #443

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion parquet-arrow/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
<url>https://parquet.apache.org</url>

<properties>
<arrow.version>0.1.0</arrow.version>
<arrow.version>0.8.0</arrow.version>
</properties>

<dependencies>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@
import java.util.ArrayList;
import java.util.List;

import org.apache.arrow.flatbuf.Precision;
import org.apache.arrow.flatbuf.TimeUnit;
import org.apache.arrow.vector.types.DateUnit;
import org.apache.arrow.vector.types.FloatingPointPrecision;
import org.apache.arrow.vector.types.pojo.ArrowType;
import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeVisitor;
import org.apache.arrow.vector.types.pojo.ArrowType.Binary;
Expand All @@ -59,7 +59,7 @@
import org.apache.arrow.vector.types.pojo.ArrowType.Int;
import org.apache.arrow.vector.types.pojo.ArrowType.Interval;
import org.apache.arrow.vector.types.pojo.ArrowType.Null;
import org.apache.arrow.vector.types.pojo.ArrowType.Struct_;
import org.apache.arrow.vector.types.pojo.ArrowType.Struct;
import org.apache.arrow.vector.types.pojo.ArrowType.Time;
import org.apache.arrow.vector.types.pojo.ArrowType.Timestamp;
import org.apache.arrow.vector.types.pojo.ArrowType.Union;
Expand Down Expand Up @@ -141,13 +141,22 @@ public TypeMapping visit(Null type) {
}

@Override
public TypeMapping visit(Struct_ type) {
public TypeMapping visit(Struct type) {
List<TypeMapping> parquetTypes = fromArrow(children);
return new StructTypeMapping(field, addToBuilder(parquetTypes, Types.buildGroup(OPTIONAL)).named(fieldName), parquetTypes);
}

@Override
public TypeMapping visit(org.apache.arrow.vector.types.pojo.ArrowType.List type) {
return createListTypeMapping();
}

@Override
public TypeMapping visit(org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeList type) {
return createListTypeMapping();
}

private ListTypeMapping createListTypeMapping() {
if (children.size() != 1) {
throw new IllegalArgumentException("list fields must have exactly one child: " + field);
}
Expand All @@ -167,31 +176,31 @@ public TypeMapping visit(Union type) {
public TypeMapping visit(Int type) {
boolean signed = type.getIsSigned();
switch (type.getBitWidth()) {
case 8:
return primitive(INT32, signed ? INT_8 : UINT_8);
case 16:
return primitive(INT32, signed ? INT_16 : UINT_16);
case 32:
return primitive(INT32, signed ? INT_32 : UINT_32);
case 64:
return primitive(INT64, signed ? INT_64 : UINT_64);
default:
throw new IllegalArgumentException("Illegal int type: " + field);
case 8:
return primitive(INT32, signed ? INT_8 : UINT_8);
case 16:
return primitive(INT32, signed ? INT_16 : UINT_16);
case 32:
return primitive(INT32, signed ? INT_32 : UINT_32);
case 64:
return primitive(INT64, signed ? INT_64 : UINT_64);
default:
throw new IllegalArgumentException("Illegal int type: " + field);
}
}

@Override
public TypeMapping visit(FloatingPoint type) {
switch (type.getPrecision()) {
case Precision.HALF:
// TODO(PARQUET-757): original type HalfFloat
return primitive(FLOAT);
case Precision.SINGLE:
return primitive(FLOAT);
case Precision.DOUBLE:
return primitive(DOUBLE);
default:
throw new IllegalArgumentException("Illegal float type: " + field);
case HALF:
// TODO(PARQUET-757): original type HalfFloat
return primitive(FLOAT);
case SINGLE:
return primitive(FLOAT);
case DOUBLE:
return primitive(DOUBLE);
default:
throw new IllegalArgumentException("Illegal float type: " + field);
}
}

Expand Down Expand Up @@ -336,7 +345,7 @@ private TypeMapping fromParquetGroup(GroupType type, String name) {
OriginalType ot = type.getOriginalType();
if (ot == null) {
List<TypeMapping> typeMappings = fromParquet(type.getFields());
Field arrowField = new Field(name, type.isRepetition(OPTIONAL), new Struct_(), fields(typeMappings));
Field arrowField = new Field(name, type.isRepetition(OPTIONAL), new Struct(), fields(typeMappings));
return new StructTypeMapping(arrowField, type, typeMappings);
} else {
switch (ot) {
Expand Down Expand Up @@ -366,12 +375,12 @@ private TypeMapping field(ArrowType arrowType) {

@Override
public TypeMapping convertFLOAT(PrimitiveTypeName primitiveTypeName) throws RuntimeException {
return field(new ArrowType.FloatingPoint(Precision.SINGLE));
return field(new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE));
}

@Override
public TypeMapping convertDOUBLE(PrimitiveTypeName primitiveTypeName) throws RuntimeException {
return field(new ArrowType.FloatingPoint(Precision.DOUBLE));
return field(new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE));
}

@Override
Expand All @@ -381,41 +390,41 @@ public TypeMapping convertINT32(PrimitiveTypeName primitiveTypeName) throws Runt
return integer(32, true);
}
switch (ot) {
case INT_8:
return integer(8, true);
case INT_16:
return integer(16, true);
case INT_32:
return integer(32, true);
case UINT_8:
return integer(8, false);
case UINT_16:
return integer(16, false);
case UINT_32:
return integer(32, false);
case DECIMAL:
return decimal(type.getDecimalMetadata());
case DATE:
return field(new ArrowType.Date());
case TIMESTAMP_MICROS:
return field(new ArrowType.Timestamp(TimeUnit.MICROSECOND));
case TIMESTAMP_MILLIS:
return field(new ArrowType.Timestamp(TimeUnit.MILLISECOND));
case TIME_MILLIS:
return field(new ArrowType.Time());
default:
case TIME_MICROS:
case INT_64:
case UINT_64:
case UTF8:
case ENUM:
case BSON:
case INTERVAL:
case JSON:
case LIST:
case MAP:
case MAP_KEY_VALUE:
throw new IllegalArgumentException("illegal type " + type);
case INT_8:
return integer(8, true);
case INT_16:
return integer(16, true);
case INT_32:
return integer(32, true);
case UINT_8:
return integer(8, false);
case UINT_16:
return integer(16, false);
case UINT_32:
return integer(32, false);
case DECIMAL:
return decimal(type.getDecimalMetadata());
case DATE:
return field(new ArrowType.Date(DateUnit.DAY));
case TIMESTAMP_MICROS:
return field(new ArrowType.Timestamp(org.apache.arrow.vector.types.TimeUnit.MICROSECOND, "UTC"));
case TIMESTAMP_MILLIS:
return field(new ArrowType.Timestamp(org.apache.arrow.vector.types.TimeUnit.MILLISECOND, "UTC"));
case TIME_MILLIS:
return field(new ArrowType.Time(org.apache.arrow.vector.types.TimeUnit.MILLISECOND, 32));
default:
case TIME_MICROS:
case INT_64:
case UINT_64:
case UTF8:
case ENUM:
case BSON:
case INTERVAL:
case JSON:
case LIST:
case MAP:
case MAP_KEY_VALUE:
throw new IllegalArgumentException("illegal type " + type);
}
}

Expand All @@ -426,43 +435,42 @@ public TypeMapping convertINT64(PrimitiveTypeName primitiveTypeName) throws Runt
return integer(64, true);
}
switch (ot) {
case INT_8:
return integer(8, true);
case INT_16:
return integer(16, true);
case INT_32:
return integer(32, true);
case INT_64:
return integer(64, true);
case UINT_8:
return integer(8, false);
case UINT_16:
return integer(16, false);
case UINT_32:
return integer(32, false);
case UINT_64:
return integer(64, false);
case DECIMAL:
return decimal(type.getDecimalMetadata());
case DATE:
return field(new ArrowType.Date());
case TIMESTAMP_MICROS:
return field(new ArrowType.Timestamp(TimeUnit.MICROSECOND));
case TIMESTAMP_MILLIS:
return field(new ArrowType.Timestamp(TimeUnit.MILLISECOND));
case TIME_MILLIS:
return field(new ArrowType.Time());
default:
case TIME_MICROS:
case UTF8:
case ENUM:
case BSON:
case INTERVAL:
case JSON:
case LIST:
case MAP:
case MAP_KEY_VALUE:
throw new IllegalArgumentException("illegal type " + type);
case INT_8:
return integer(8, true);
case INT_16:
return integer(16, true);
case INT_32:
return integer(32, true);
case INT_64:
return integer(64, true);
case UINT_8:
return integer(8, false);
case UINT_16:
return integer(16, false);
case UINT_32:
return integer(32, false);
case UINT_64:
return integer(64, false);
case DECIMAL:
return decimal(type.getDecimalMetadata());
case DATE:
return field(new ArrowType.Date(DateUnit.DAY));
case TIMESTAMP_MICROS:
return field(new ArrowType.Timestamp(org.apache.arrow.vector.types.TimeUnit.MICROSECOND, "UTC"));
case TIMESTAMP_MILLIS:
return field(new ArrowType.Timestamp(org.apache.arrow.vector.types.TimeUnit.MILLISECOND, "UTC"));
default:
case TIME_MICROS:
case UTF8:
case ENUM:
case BSON:
case INTERVAL:
case JSON:
case LIST:
case MAP:
case MAP_KEY_VALUE:
case TIME_MILLIS:
throw new IllegalArgumentException("illegal type " + type);
}
}

Expand All @@ -489,12 +497,12 @@ public TypeMapping convertBINARY(PrimitiveTypeName primitiveTypeName) throws Run
return field(new ArrowType.Binary());
}
switch (ot) {
case UTF8:
return field(new ArrowType.Utf8());
case DECIMAL:
return decimal(type.getDecimalMetadata());
default:
throw new IllegalArgumentException("illegal type " + type);
case UTF8:
return field(new ArrowType.Utf8());
case DECIMAL:
return decimal(type.getDecimalMetadata());
default:
throw new IllegalArgumentException("illegal type " + type);
}
}

Expand Down Expand Up @@ -545,7 +553,7 @@ public TypeMapping visit(Null type) {
}

@Override
public TypeMapping visit(Struct_ type) {
public TypeMapping visit(Struct type) {
if (parquetField.isPrimitive()) {
throw new IllegalArgumentException("Parquet type not a group: " + parquetField);
}
Expand All @@ -555,6 +563,15 @@ public TypeMapping visit(Struct_ type) {

@Override
public TypeMapping visit(org.apache.arrow.vector.types.pojo.ArrowType.List type) {
return createListTypeMapping(type);
}

@Override
public TypeMapping visit(org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeList type) {
return createListTypeMapping(type);
}

private TypeMapping createListTypeMapping(ArrowType.ComplexType type) {
if (arrowField.getChildren().size() != 1) {
throw new IllegalArgumentException("Invalid list type: " + type);
}
Expand Down
Loading