From 9dff72f671a653e003a63bd0a0c80285d1cab6ba Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Wed, 15 Mar 2023 13:21:49 +0100 Subject: [PATCH 1/3] Build: Move to Hadoop3 Including the latest version of Hive --- .../mr/hive/HiveIcebergInputFormat.java | 4 +--- .../mr/hive/HiveIcebergStorageHandler.java | 4 +--- .../IcebergDateObjectInspector.java | 14 +++++++------- .../IcebergTimestampObjectInspector.java | 18 +++++++++--------- ...cebergTimestampWithZoneObjectInspector.java | 14 +++++++------- versions.props | 4 ++-- 6 files changed, 27 insertions(+), 31 deletions(-) diff --git a/mr/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergInputFormat.java b/mr/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergInputFormat.java index 5f2eb9834b63..a14dfd40ccf5 100644 --- a/mr/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergInputFormat.java +++ b/mr/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergInputFormat.java @@ -134,9 +134,7 @@ public boolean shouldSkipCombine(Path path, Configuration conf) { return true; } - // Override annotation commented out, since this interface method has been introduced only in Hive - // 3 - // @Override + @Override public VectorizedSupport.Support[] getSupportedFeatures() { return new VectorizedSupport.Support[0]; } diff --git a/mr/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java b/mr/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java index da40f4c73ef3..0f4d1f9b5c7c 100644 --- a/mr/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java +++ b/mr/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java @@ -106,9 +106,7 @@ public void configureOutputJobProperties(TableDesc tableDesc, Map map) {} - // Override annotation commented out, since this interface method has been introduced only in Hive - // 3 - // @Override + @Override public void configureInputJobCredentials(TableDesc tableDesc, Map secrets) {} @Override diff --git a/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergDateObjectInspector.java b/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergDateObjectInspector.java index 17a82f430208..574840fe7d54 100644 --- a/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergDateObjectInspector.java +++ b/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergDateObjectInspector.java @@ -18,9 +18,9 @@ */ package org.apache.iceberg.mr.hive.serde.objectinspector; -import java.sql.Date; +import org.apache.hadoop.hive.common.type.Date; import java.time.LocalDate; -import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.io.DateWritableV2; import org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveJavaObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; @@ -41,12 +41,12 @@ private IcebergDateObjectInspector() { @Override public Date getPrimitiveJavaObject(Object o) { - return o == null ? null : Date.valueOf((LocalDate) o); + return o == null ? null : Date.ofEpochDay((int)((LocalDate) o).toEpochDay()); } @Override - public DateWritable getPrimitiveWritableObject(Object o) { - return o == null ? null : new DateWritable(DateTimeUtil.daysFromDate((LocalDate) o)); + public DateWritableV2 getPrimitiveWritableObject(Object o) { + return o == null ? null : new DateWritableV2(DateTimeUtil.daysFromDate((LocalDate) o)); } @Override @@ -56,7 +56,7 @@ public Object copyObject(Object o) { } if (o instanceof Date) { - return new Date(((Date) o).getTime()); + return Date.ofEpochDay(((Date) o).toEpochDay()); } else if (o instanceof LocalDate) { return LocalDate.of( ((LocalDate) o).getYear(), ((LocalDate) o).getMonth(), ((LocalDate) o).getDayOfMonth()); @@ -67,6 +67,6 @@ public Object copyObject(Object o) { @Override public LocalDate convert(Object o) { - return o == null ? null : ((Date) o).toLocalDate(); + return o == null ? null : LocalDate.ofEpochDay(((Date) o).toEpochDay()); } } diff --git a/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergTimestampObjectInspector.java b/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergTimestampObjectInspector.java index 08c74c9afa4a..27889e9dbdaf 100644 --- a/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergTimestampObjectInspector.java +++ b/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergTimestampObjectInspector.java @@ -18,9 +18,11 @@ */ package org.apache.iceberg.mr.hive.serde.objectinspector; -import java.sql.Timestamp; +import org.apache.hadoop.hive.common.type.Timestamp; import java.time.LocalDateTime; -import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import java.time.ZoneOffset; + +import org.apache.hadoop.hive.serde2.io.TimestampWritableV2; import org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveJavaObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; @@ -41,27 +43,25 @@ private IcebergTimestampObjectInspector() { @Override public LocalDateTime convert(Object o) { - return o == null ? null : ((Timestamp) o).toLocalDateTime(); + return o == null ? null : ((Timestamp) o).toSqlTimestamp().toLocalDateTime(); } @Override public Timestamp getPrimitiveJavaObject(Object o) { - return o == null ? null : Timestamp.valueOf((LocalDateTime) o); + return o == null ? null : Timestamp.ofEpochMilli(((LocalDateTime) o).toInstant(ZoneOffset.UTC).toEpochMilli()); } @Override - public TimestampWritable getPrimitiveWritableObject(Object o) { + public TimestampWritableV2 getPrimitiveWritableObject(Object o) { Timestamp ts = getPrimitiveJavaObject(o); - return ts == null ? null : new TimestampWritable(ts); + return ts == null ? null : new TimestampWritableV2(ts); } @Override public Object copyObject(Object o) { if (o instanceof Timestamp) { Timestamp ts = (Timestamp) o; - Timestamp copy = new Timestamp(ts.getTime()); - copy.setNanos(ts.getNanos()); - return copy; + return Timestamp.ofEpochMilli(ts.toEpochMilli(), ts.getNanos()); } else if (o instanceof LocalDateTime) { LocalDateTime ldt = (LocalDateTime) o; return LocalDateTime.of(ldt.toLocalDate(), ldt.toLocalTime()); diff --git a/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergTimestampWithZoneObjectInspector.java b/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergTimestampWithZoneObjectInspector.java index f315b0b6d8ea..96b5d93acd2c 100644 --- a/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergTimestampWithZoneObjectInspector.java +++ b/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergTimestampWithZoneObjectInspector.java @@ -18,13 +18,13 @@ */ package org.apache.iceberg.mr.hive.serde.objectinspector; -import java.sql.Timestamp; import java.time.OffsetDateTime; import java.time.ZoneOffset; -import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritableV2; import org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveJavaObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.common.type.Timestamp; public class IcebergTimestampWithZoneObjectInspector extends AbstractPrimitiveJavaObjectInspector implements TimestampObjectInspector, WriteObjectInspector { @@ -42,25 +42,25 @@ private IcebergTimestampWithZoneObjectInspector() { @Override public OffsetDateTime convert(Object o) { - return o == null ? null : OffsetDateTime.ofInstant(((Timestamp) o).toInstant(), ZoneOffset.UTC); + return o == null ? null : OffsetDateTime.ofInstant(((Timestamp) o).toSqlTimestamp().toInstant(), ZoneOffset.UTC); } @Override public Timestamp getPrimitiveJavaObject(Object o) { - return o == null ? null : Timestamp.from(((OffsetDateTime) o).toInstant()); + return o == null ? null : Timestamp.ofEpochMilli(((OffsetDateTime) o).toInstant().toEpochMilli()); } @Override - public TimestampWritable getPrimitiveWritableObject(Object o) { + public TimestampWritableV2 getPrimitiveWritableObject(Object o) { Timestamp ts = getPrimitiveJavaObject(o); - return ts == null ? null : new TimestampWritable(ts); + return ts == null ? null : new TimestampWritableV2(ts); } @Override public Object copyObject(Object o) { if (o instanceof Timestamp) { Timestamp ts = (Timestamp) o; - Timestamp copy = new Timestamp(ts.getTime()); + Timestamp copy = Timestamp.ofEpochMilli(ts.toEpochMilli()); copy.setNanos(ts.getNanos()); return copy; } else if (o instanceof OffsetDateTime) { diff --git a/versions.props b/versions.props index 34c6a5b86033..ba549d359268 100644 --- a/versions.props +++ b/versions.props @@ -1,8 +1,8 @@ org.slf4j:* = 1.7.36 org.apache.avro:avro = 1.11.1 org.apache.calcite:* = 1.10.0 -org.apache.hadoop:* = 2.7.3 -org.apache.hive:* = 2.3.8 +org.apache.hadoop:* = 3.3.4 +org.apache.hive:* = 3.1.3 org.apache.httpcomponents.client5:* = 5.2.1 org.apache.orc:* = 1.8.2 org.apache.parquet:* = 1.12.3 From e5c3eec1c12fbcd3dcb42f3543cd6df5a3a4aa4e Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Tue, 11 Apr 2023 09:29:33 +0200 Subject: [PATCH 2/3] Make Spotless happy --- .../objectinspector/IcebergDateObjectInspector.java | 4 ++-- .../IcebergTimestampObjectInspector.java | 7 ++++--- .../IcebergTimestampWithZoneObjectInspector.java | 10 +++++++--- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergDateObjectInspector.java b/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergDateObjectInspector.java index 574840fe7d54..57b56ba98de3 100644 --- a/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergDateObjectInspector.java +++ b/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergDateObjectInspector.java @@ -18,8 +18,8 @@ */ package org.apache.iceberg.mr.hive.serde.objectinspector; -import org.apache.hadoop.hive.common.type.Date; import java.time.LocalDate; +import org.apache.hadoop.hive.common.type.Date; import org.apache.hadoop.hive.serde2.io.DateWritableV2; import org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveJavaObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector; @@ -41,7 +41,7 @@ private IcebergDateObjectInspector() { @Override public Date getPrimitiveJavaObject(Object o) { - return o == null ? null : Date.ofEpochDay((int)((LocalDate) o).toEpochDay()); + return o == null ? null : Date.ofEpochDay((int) ((LocalDate) o).toEpochDay()); } @Override diff --git a/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergTimestampObjectInspector.java b/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergTimestampObjectInspector.java index 27889e9dbdaf..83f3df1b436a 100644 --- a/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergTimestampObjectInspector.java +++ b/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergTimestampObjectInspector.java @@ -18,10 +18,9 @@ */ package org.apache.iceberg.mr.hive.serde.objectinspector; -import org.apache.hadoop.hive.common.type.Timestamp; import java.time.LocalDateTime; import java.time.ZoneOffset; - +import org.apache.hadoop.hive.common.type.Timestamp; import org.apache.hadoop.hive.serde2.io.TimestampWritableV2; import org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveJavaObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector; @@ -48,7 +47,9 @@ public LocalDateTime convert(Object o) { @Override public Timestamp getPrimitiveJavaObject(Object o) { - return o == null ? null : Timestamp.ofEpochMilli(((LocalDateTime) o).toInstant(ZoneOffset.UTC).toEpochMilli()); + return o == null + ? null + : Timestamp.ofEpochMilli(((LocalDateTime) o).toInstant(ZoneOffset.UTC).toEpochMilli()); } @Override diff --git a/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergTimestampWithZoneObjectInspector.java b/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergTimestampWithZoneObjectInspector.java index 96b5d93acd2c..2e5f97318a85 100644 --- a/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergTimestampWithZoneObjectInspector.java +++ b/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergTimestampWithZoneObjectInspector.java @@ -20,11 +20,11 @@ import java.time.OffsetDateTime; import java.time.ZoneOffset; +import org.apache.hadoop.hive.common.type.Timestamp; import org.apache.hadoop.hive.serde2.io.TimestampWritableV2; import org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveJavaObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -import org.apache.hadoop.hive.common.type.Timestamp; public class IcebergTimestampWithZoneObjectInspector extends AbstractPrimitiveJavaObjectInspector implements TimestampObjectInspector, WriteObjectInspector { @@ -42,12 +42,16 @@ private IcebergTimestampWithZoneObjectInspector() { @Override public OffsetDateTime convert(Object o) { - return o == null ? null : OffsetDateTime.ofInstant(((Timestamp) o).toSqlTimestamp().toInstant(), ZoneOffset.UTC); + return o == null + ? null + : OffsetDateTime.ofInstant(((Timestamp) o).toSqlTimestamp().toInstant(), ZoneOffset.UTC); } @Override public Timestamp getPrimitiveJavaObject(Object o) { - return o == null ? null : Timestamp.ofEpochMilli(((OffsetDateTime) o).toInstant().toEpochMilli()); + return o == null + ? null + : Timestamp.ofEpochMilli(((OffsetDateTime) o).toInstant().toEpochMilli()); } @Override From 84cb4f1deddd48c67293cde8e9372baf6d8dc0eb Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Tue, 11 Apr 2023 09:39:42 +0200 Subject: [PATCH 3/3] Use Hex from Apache Commons --- .../apache/iceberg/aliyun/oss/mock/AliyunOSSMockLocalStore.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aliyun/src/test/java/org/apache/iceberg/aliyun/oss/mock/AliyunOSSMockLocalStore.java b/aliyun/src/test/java/org/apache/iceberg/aliyun/oss/mock/AliyunOSSMockLocalStore.java index 75766a671490..0bc45007ca27 100644 --- a/aliyun/src/test/java/org/apache/iceberg/aliyun/oss/mock/AliyunOSSMockLocalStore.java +++ b/aliyun/src/test/java/org/apache/iceberg/aliyun/oss/mock/AliyunOSSMockLocalStore.java @@ -36,8 +36,8 @@ import java.util.List; import java.util.Locale; import java.util.Map; +import org.apache.commons.codec.binary.Hex; import org.apache.commons.io.FileUtils; -import org.apache.directory.api.util.Hex; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.relocated.com.google.common.io.ByteStreams;