Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@
import static com.facebook.presto.common.type.DateType.DATE;
import static com.facebook.presto.common.type.DecimalType.createDecimalType;
import static com.facebook.presto.common.type.Decimals.MAX_PRECISION;
import static com.facebook.presto.common.type.Decimals.MAX_SHORT_PRECISION;
import static com.facebook.presto.common.type.DoubleType.DOUBLE;
import static com.facebook.presto.common.type.IntegerType.INTEGER;
import static com.facebook.presto.common.type.RealType.REAL;
Expand Down Expand Up @@ -938,24 +939,66 @@ public void testDecimalBackedByINT64()
}
}

private void testDecimal(int precision, int scale, Optional<MessageType> parquetSchema)
throws Exception
{
ContiguousSet<BigInteger> values = bigIntegersBetween(BigDecimal.valueOf(Math.pow(10, precision - 1)).toBigInteger(), BigDecimal.valueOf(Math.pow(10, precision)).toBigInteger());
ImmutableList.Builder<SqlDecimal> expectedValues = new ImmutableList.Builder<>();
ImmutableList.Builder<HiveDecimal> writeValues = new ImmutableList.Builder<>();
for (BigInteger value : limit(values, 1_000)) {
writeValues.add(HiveDecimal.create(value, scale));
expectedValues.add(new SqlDecimal(value, precision, scale));
}
tester.testRoundTrip(new JavaHiveDecimalObjectInspector(new DecimalTypeInfo(precision, scale)),
writeValues.build(),
expectedValues.build(),
createDecimalType(precision, scale),
parquetSchema);
}

@Test
public void testShortDecimalBackedByFixedLenByteArray()
throws Exception
{
int[] scales = {0, 0, 2, 2, 4, 5, 0, 1, 5, 7, 4, 8, 4, 4, 13, 11, 16, 15};
for (int precision = 1; precision <= MAX_SHORT_PRECISION; precision++) {
int scale = scales[precision - 1];
testDecimal(precision, scale, Optional.empty());
}
}

@Test
public void testDecimalBackedByFixedLenByteArray()
public void testLongDecimalBackedByFixedLenByteArray()
throws Exception
{
int[] scales = {7, 13, 14, 8, 16, 20, 8, 4, 19, 25, 15, 23, 17, 2, 23, 0, 33, 8, 3, 12};
for (int precision = MAX_PRECISION_INT64 + 1; precision < MAX_PRECISION; precision++) {
int scale = scales[precision - MAX_PRECISION_INT64 - 1];
ContiguousSet<BigInteger> values = bigIntegersBetween(BigDecimal.valueOf(Math.pow(10, precision - 1)).toBigInteger(), BigDecimal.valueOf(Math.pow(10, precision)).toBigInteger());
ImmutableList.Builder<SqlDecimal> expectedValues = new ImmutableList.Builder<>();
ImmutableList.Builder<HiveDecimal> writeValues = new ImmutableList.Builder<>();
for (BigInteger value : limit(values, 1_000)) {
writeValues.add(HiveDecimal.create(value, scale));
expectedValues.add(new SqlDecimal(value, precision, scale));
}
tester.testRoundTrip(new JavaHiveDecimalObjectInspector(new DecimalTypeInfo(precision, scale)),
writeValues.build(),
expectedValues.build(),
createDecimalType(precision, scale));
testDecimal(precision, scale, Optional.empty());
}
}

@Test
public void testShortDecimalBackedByBinary()
throws Exception
{
int[] scales = {0, 0, 1, 2, 5, 4, 3, 4, 7, 6, 8, 9, 10, 1, 13, 11, 16, 15};
for (int precision = 1; precision <= MAX_SHORT_PRECISION; precision++) {
int scale = scales[precision - 1];
MessageType parquetSchema = parseMessageType(format("message hive_decimal { optional BINARY test (DECIMAL(%d, %d)); }", precision, scale));
testDecimal(precision, scale, Optional.of(parquetSchema));
}
}

@Test
public void testLongDecimalBackedByBinary()
throws Exception
{
int[] scales = {1, 1, 7, 8, 22, 3, 15, 14, 7, 21, 6, 12, 1, 15, 14, 29, 17, 7, 26};
for (int precision = MAX_PRECISION_INT64 + 1; precision < MAX_PRECISION; precision++) {
int scale = scales[precision - MAX_PRECISION_INT64 - 1];
MessageType parquetSchema = parseMessageType(format("message hive_decimal { optional BINARY test (DECIMAL(%d, %d)); }", precision, scale));
testDecimal(precision, scale, Optional.of(parquetSchema));
}
}

Expand Down
5 changes: 5 additions & 0 deletions presto-parquet/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,11 @@
<artifactId>units</artifactId>
</dependency>

<dependency>
<groupId>com.facebook.airlift</groupId>
<artifactId>log</artifactId>
</dependency>

<dependency>
<groupId>io.airlift</groupId>
<artifactId>aircompressor</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@
*/
package com.facebook.presto.parquet;

import com.facebook.presto.common.type.DecimalType;
import com.facebook.presto.common.type.Type;
import com.facebook.airlift.log.Logger;
import com.facebook.presto.parquet.batchreader.BinaryFlatBatchReader;
import com.facebook.presto.parquet.batchreader.BinaryNestedBatchReader;
import com.facebook.presto.parquet.batchreader.BooleanFlatBatchReader;
Expand All @@ -25,6 +24,8 @@
import com.facebook.presto.parquet.batchreader.Int64NestedBatchReader;
import com.facebook.presto.parquet.batchreader.Int64TimestampMicrosFlatBatchReader;
import com.facebook.presto.parquet.batchreader.Int64TimestampMicrosNestedBatchReader;
import com.facebook.presto.parquet.batchreader.LongDecimalFlatBatchReader;
import com.facebook.presto.parquet.batchreader.ShortDecimalFlatBatchReader;
import com.facebook.presto.parquet.batchreader.TimestampFlatBatchReader;
import com.facebook.presto.parquet.batchreader.TimestampNestedBatchReader;
import com.facebook.presto.parquet.reader.AbstractColumnReader;
Expand All @@ -40,43 +41,67 @@
import com.facebook.presto.parquet.reader.ShortDecimalColumnReader;
import com.facebook.presto.parquet.reader.TimestampColumnReader;
import com.facebook.presto.spi.PrestoException;
import org.apache.parquet.schema.LogicalTypeAnnotation.DecimalLogicalTypeAnnotation;

import java.util.Optional;

import static com.facebook.presto.parquet.ParquetTypeUtils.createDecimalType;
import static com.facebook.presto.parquet.ParquetTypeUtils.isDecimalType;
import static com.facebook.presto.parquet.ParquetTypeUtils.isShortDecimalType;
import static com.facebook.presto.parquet.ParquetTypeUtils.isTimeStampMicrosType;
import static com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED;
import static org.apache.parquet.schema.OriginalType.DECIMAL;
import static org.apache.parquet.schema.OriginalType.TIMESTAMP_MICROS;
import static org.apache.parquet.schema.OriginalType.TIME_MICROS;

public class ColumnReaderFactory
{
private static final Logger log = Logger.get(ColumnReaderFactory.class);
private ColumnReaderFactory()
{
}

public static ColumnReader createReader(RichColumnDescriptor descriptor, boolean batchReadEnabled)
{
// decimal is not supported in batch readers
if (batchReadEnabled && descriptor.getPrimitiveType().getOriginalType() != DECIMAL) {
if (batchReadEnabled) {
final boolean isNested = descriptor.getPath().length > 1;
switch (descriptor.getPrimitiveType().getPrimitiveTypeName()) {
case BOOLEAN:
return isNested ? new BooleanNestedBatchReader(descriptor) : new BooleanFlatBatchReader(descriptor);
case INT32:
if (!isNested && isShortDecimalType(descriptor)) {
return new ShortDecimalFlatBatchReader(descriptor);
}
case FLOAT:
return isNested ? new Int32NestedBatchReader(descriptor) : new Int32FlatBatchReader(descriptor);
case INT64:
if (isTimeStampMicrosType(descriptor)) {
return isNested ? new Int64TimestampMicrosNestedBatchReader(descriptor) : new Int64TimestampMicrosFlatBatchReader(descriptor);
}

Comment thread
yingsu00 marked this conversation as resolved.
if (!isNested && isShortDecimalType(descriptor)) {
int precision = ((DecimalLogicalTypeAnnotation) descriptor.getPrimitiveType().getLogicalTypeAnnotation()).getPrecision();
if (precision < 10) {
log.warn("PrimitiveTypeName is INT64 but precision is less then 10.");
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

delete the warning; it's not actionable

or if I'm wrong and this is a real problem, then a waring isn't enough. This should fail outright

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The spec says we need to produce a warning when precision < 10 for INT64. See https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#decimal

}
return new ShortDecimalFlatBatchReader(descriptor);
}
case DOUBLE:
return isNested ? new Int64NestedBatchReader(descriptor) : new Int64FlatBatchReader(descriptor);
case INT96:
return isNested ? new TimestampNestedBatchReader(descriptor) : new TimestampFlatBatchReader(descriptor);
case BINARY:
Optional<ColumnReader> decimalBatchColumnReader = createDecimalBatchColumnReader(descriptor);
if (decimalBatchColumnReader.isPresent()) {
return decimalBatchColumnReader.get();
}

return isNested ? new BinaryNestedBatchReader(descriptor) : new BinaryFlatBatchReader(descriptor);
case FIXED_LEN_BYTE_ARRAY:
if (!isNested) {
decimalBatchColumnReader = createDecimalBatchColumnReader(descriptor);
if (decimalBatchColumnReader.isPresent()) {
return decimalBatchColumnReader.get();
}
}
}
}

Expand Down Expand Up @@ -109,17 +134,24 @@ public static ColumnReader createReader(RichColumnDescriptor descriptor, boolean
}
}

private static Optional<ColumnReader> createDecimalBatchColumnReader(RichColumnDescriptor descriptor)
{
if (isDecimalType(descriptor)) {
if (isShortDecimalType(descriptor)) {
return Optional.of(new ShortDecimalFlatBatchReader(descriptor));
}
return Optional.of(new LongDecimalFlatBatchReader(descriptor));
}
return Optional.empty();
}

private static Optional<AbstractColumnReader> createDecimalColumnReader(RichColumnDescriptor descriptor)
{
Optional<Type> type = createDecimalType(descriptor);
if (type.isPresent()) {
DecimalType decimalType = (DecimalType) type.get();
if (decimalType.isShort()) {
if (isDecimalType(descriptor)) {
if (isShortDecimalType(descriptor)) {
return Optional.of(new ShortDecimalColumnReader(descriptor));
}
else {
return Optional.of(new LongDecimalColumnReader(descriptor));
}
return Optional.of(new LongDecimalColumnReader(descriptor));
}
return Optional.empty();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@
package com.facebook.presto.parquet;

import com.facebook.presto.common.Subfield;
import com.facebook.presto.common.type.DecimalType;
import com.facebook.presto.common.type.Type;
import com.google.common.collect.ImmutableList;
import org.apache.parquet.column.ColumnDescriptor;
import org.apache.parquet.column.Encoding;
Expand All @@ -26,8 +24,9 @@
import org.apache.parquet.io.MessageColumnIO;
import org.apache.parquet.io.ParquetDecodingException;
import org.apache.parquet.io.PrimitiveColumnIO;
import org.apache.parquet.schema.DecimalMetadata;
import org.apache.parquet.schema.GroupType;
import org.apache.parquet.schema.LogicalTypeAnnotation;
import org.apache.parquet.schema.LogicalTypeAnnotation.DecimalLogicalTypeAnnotation;
import org.apache.parquet.schema.MessageType;

import java.util.Arrays;
Expand All @@ -37,6 +36,7 @@
import java.util.Map;
import java.util.Optional;

import static com.facebook.presto.common.type.Decimals.MAX_SHORT_PRECISION;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.collect.Iterables.getOnlyElement;
import static java.util.stream.Collectors.joining;
Expand Down Expand Up @@ -229,19 +229,6 @@ public static ColumnIO lookupColumnByName(GroupColumnIO groupColumnIO, String co
return null;
}

public static Optional<Type> createDecimalType(RichColumnDescriptor descriptor)
{
if (descriptor.getPrimitiveType().getOriginalType() != DECIMAL) {
return Optional.empty();
}
return Optional.of(createDecimalType(descriptor.getPrimitiveType().getDecimalMetadata()));
}

private static Type createDecimalType(DecimalMetadata decimalMetadata)
{
return DecimalType.createDecimalType(decimalMetadata.getPrecision(), decimalMetadata.getScale());
}

/**
* For optional fields:
* definitionLevel == maxDefinitionLevel => Value is defined
Expand All @@ -253,20 +240,33 @@ public static boolean isValueNull(boolean required, int definitionLevel, int max
return !required && (definitionLevel == maxDefinitionLevel - 1);
}

// copied from presto-hive DecimalUtils
public static long getShortDecimalValue(byte[] bytes)
{
long value = 0;
if ((bytes[0] & 0x80) != 0) {
for (int i = 0; i < 8 - bytes.length; ++i) {
value |= 0xFFL << (8 * (7 - i));
}
}
return getShortDecimalValue(bytes, 0, bytes.length);
}

for (int i = 0; i < bytes.length; i++) {
value |= ((long) bytes[bytes.length - i - 1] & 0xFFL) << (8 * i);
public static long getShortDecimalValue(byte[] bytes, int startOffset, int length)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can this be private or not public?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, this method is used in many places.

{
long value = 0;
switch (length) {
case 8:
value |= bytes[startOffset + 7] & 0xFFL;
case 7:
value |= (bytes[startOffset + 6] & 0xFFL) << 8;
case 6:
value |= (bytes[startOffset + 5] & 0xFFL) << 16;
case 5:
value |= (bytes[startOffset + 4] & 0xFFL) << 24;
case 4:
value |= (bytes[startOffset + 3] & 0xFFL) << 32;
case 3:
value |= (bytes[startOffset + 2] & 0xFFL) << 40;
case 2:
value |= (bytes[startOffset + 1] & 0xFFL) << 48;
case 1:
value |= (bytes[startOffset] & 0xFFL) << 56;
}

value = value >> ((8 - length) * 8);
return value;
}

Expand Down Expand Up @@ -335,4 +335,20 @@ public static boolean isTimeStampMicrosType(ColumnDescriptor descriptor)
{
return TIMESTAMP_MICROS.equals(descriptor.getPrimitiveType().getOriginalType());
}

public static boolean isShortDecimalType(ColumnDescriptor descriptor)
{
LogicalTypeAnnotation logicalTypeAnnotation = descriptor.getPrimitiveType().getLogicalTypeAnnotation();
if (!(logicalTypeAnnotation instanceof DecimalLogicalTypeAnnotation)) {
return false;
}

DecimalLogicalTypeAnnotation decimalLogicalTypeAnnotation = (DecimalLogicalTypeAnnotation) logicalTypeAnnotation;
return decimalLogicalTypeAnnotation.getPrecision() <= MAX_SHORT_PRECISION;
}

public static boolean isDecimalType(ColumnDescriptor columnDescriptor)
{
return columnDescriptor.getPrimitiveType().getOriginalType() == DECIMAL;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -61,4 +61,9 @@ public static void unpack8Values(byte inByte, byte[] out, int outPos)
out[6 + outPos] = (byte) (inByte >> 6 & 1);
out[7 + outPos] = (byte) (inByte >> 7 & 1);
}

public static long propagateSignBit(long value, int bitsToPad)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This method could use a unit test that addresses it directly

{
return value << bitsToPad >> bitsToPad;
}
}
Loading