diff --git a/.github/workflows/bot.yml b/.github/workflows/bot.yml index 78c3ba9ed3c82..287a4b2759b61 100644 --- a/.github/workflows/bot.yml +++ b/.github/workflows/bot.yml @@ -166,10 +166,10 @@ jobs: steps: - uses: actions/checkout@v3 - - name: Set up JDK 8 + - name: Set up JDK 11 uses: actions/setup-java@v3 with: - java-version: '8' + java-version: '11' distribution: 'adopt' architecture: x64 cache: maven diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestHoodieMergeOnReadTable.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestHoodieMergeOnReadTable.java index b2fab0ae4927d..a629f9c325d5e 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestHoodieMergeOnReadTable.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestHoodieMergeOnReadTable.java @@ -72,8 +72,9 @@ import org.junit.jupiter.params.provider.ValueSource; import java.io.IOException; -import java.util.Arrays; +import java.util.Collections; import java.util.HashMap; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Properties; @@ -687,9 +688,9 @@ public void testHandleUpdateWithMultiplePartitions() throws Exception { BaseSparkDeltaCommitActionExecutor actionExecutor = new SparkDeleteDeltaCommitActionExecutor(context(), cfg, hoodieTable, newDeleteTime, HoodieJavaRDD.of(deleteRDD)); actionExecutor.getUpsertPartitioner(new WorkloadProfile(buildProfile(deleteRDD))); - final List> deleteStatus = jsc().parallelize(Arrays.asList(1)).map(x -> { - return actionExecutor.handleUpdate(partitionPath, fileId, fewRecordsForDelete.iterator()); - }).map(Transformations::flatten).collect(); + final List> deleteStatus = jsc().parallelize(Collections.singletonList(1)) + .map(x -> (Iterator>) actionExecutor.handleUpdate(partitionPath, fileId, fewRecordsForDelete.iterator())) + .map(Transformations::flatten).collect(); // Verify there are errors because records are from multiple partitions (but handleUpdate is invoked for // specific partition) diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/commit/TestCopyOnWriteActionExecutor.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/commit/TestCopyOnWriteActionExecutor.java index 4574b34393d54..a2c0e3b821989 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/commit/TestCopyOnWriteActionExecutor.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/commit/TestCopyOnWriteActionExecutor.java @@ -78,7 +78,9 @@ import java.nio.file.Paths; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.HashMap; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Properties; @@ -463,9 +465,9 @@ public void testInsertUpsertWithHoodieAvroPayload() throws Exception { final List inserts = dataGen.generateInsertsWithHoodieAvroPayload(instantTime, 100); BaseSparkCommitActionExecutor actionExecutor = new SparkInsertCommitActionExecutor(context, config, table, instantTime, context.parallelize(inserts)); - final List> ws = jsc.parallelize(Arrays.asList(1)).map(x -> { - return actionExecutor.handleInsert(UUID.randomUUID().toString(), inserts.iterator()); - }).map(Transformations::flatten).collect(); + final List> ws = jsc.parallelize(Collections.singletonList(1)) + .map(x -> (Iterator>) actionExecutor.handleInsert(UUID.randomUUID().toString(), inserts.iterator())) + .map(Transformations::flatten).collect(); WriteStatus writeStatus = ws.get(0).get(0); String fileId = writeStatus.getFileId(); @@ -477,9 +479,9 @@ public void testInsertUpsertWithHoodieAvroPayload() throws Exception { table = (HoodieSparkCopyOnWriteTable) HoodieSparkTable.create(config, context, HoodieTableMetaClient.reload(metaClient)); BaseSparkCommitActionExecutor newActionExecutor = new SparkUpsertCommitActionExecutor(context, config, table, instantTime, context.parallelize(updates)); - final List> updateStatus = jsc.parallelize(Arrays.asList(1)).map(x -> { - return newActionExecutor.handleUpdate(partitionPath, fileId, updates.iterator()); - }).map(Transformations::flatten).collect(); + final List> updateStatus = jsc.parallelize(Collections.singletonList(1)) + .map(x -> (Iterator>) newActionExecutor.handleUpdate(partitionPath, fileId, updates.iterator())) + .map(Transformations::flatten).collect(); assertEquals(updates.size() - numRecordsInPartition, updateStatus.get(0).get(0).getTotalErrorRecords()); } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/ParquetUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/util/ParquetUtils.java index 2decdd3fc5ef3..64cb7ba6d6fc9 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/util/ParquetUtils.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/util/ParquetUtils.java @@ -317,15 +317,15 @@ public List> readRangeFromParquetMetadata( Collector, ?, Map>>> groupingByCollector = Collectors.groupingBy(HoodieColumnRangeMetadata::getColumnName); - // Collect stats from all individual Parquet blocks - Map>> columnToStatsListMap = - (Map>>) metadata.getBlocks().stream().sequential() - .flatMap(blockMetaData -> - blockMetaData.getColumns().stream() + // Explicitly specify the type before collect since JDK11 struggles to infer it + Stream> hoodieColumnRangeMetadataStream = metadata.getBlocks().stream().sequential() + .flatMap(blockMetaData -> + blockMetaData.getColumns().stream() .filter(f -> cols.contains(f.getPath().toDotString())) .map(columnChunkMetaData -> { Statistics stats = columnChunkMetaData.getStatistics(); - return HoodieColumnRangeMetadata.create( + // Explicitly specify the type since JDK11 struggles to infer it + return (HoodieColumnRangeMetadata) HoodieColumnRangeMetadata.create( parquetFilePath.getName(), columnChunkMetaData.getPath().toDotString(), convertToNativeJavaType( @@ -342,8 +342,11 @@ public List> readRangeFromParquetMetadata( columnChunkMetaData.getTotalSize(), columnChunkMetaData.getTotalUncompressedSize()); }) - ) - .collect(groupingByCollector); + ); + + // Collect stats from all individual Parquet blocks + Map>> columnToStatsListMap = + hoodieColumnRangeMetadataStream.collect(groupingByCollector); // Combine those into file-level statistics // NOTE: Inlining this var makes javac (1.8) upset (due to its inability to infer diff --git a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java index 4982f876b55af..a3b78a60a0590 100644 --- a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java +++ b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java @@ -241,9 +241,10 @@ class ColumnStats { }); Collector, ?, Map>> collector = - Collectors.toMap(colRangeMetadata -> colRangeMetadata.getColumnName(), Function.identity()); + Collectors.toMap(HoodieColumnRangeMetadata::getColumnName, Function.identity()); - return (Map>) targetFields.stream() + // Explicitly specify the type before collect since JDK11 struggles to infer it + Stream> hoodieColumnRangeMetadataStream = targetFields.stream() .map(field -> { ColumnStats colStats = allColumnStats.get(field.name()); return HoodieColumnRangeMetadata.create( @@ -251,16 +252,17 @@ class ColumnStats { field.name(), colStats == null ? null : coerceToComparable(field.schema(), colStats.minValue), colStats == null ? null : coerceToComparable(field.schema(), colStats.maxValue), - colStats == null ? 0 : colStats.nullCount, - colStats == null ? 0 : colStats.valueCount, + colStats == null ? 0L : colStats.nullCount, + colStats == null ? 0L : colStats.valueCount, // NOTE: Size and compressed size statistics are set to 0 to make sure we're not // mixing up those provided by Parquet with the ones from other encodings, // since those are not directly comparable - 0, - 0 + 0L, + 0L ); - }) - .collect(collector); + }); + + return hoodieColumnRangeMetadataStream.collect(collector); } /** diff --git a/hudi-examples/hudi-examples-common/pom.xml b/hudi-examples/hudi-examples-common/pom.xml index caec2fadbaded..d67778c7c69aa 100644 --- a/hudi-examples/hudi-examples-common/pom.xml +++ b/hudi-examples/hudi-examples-common/pom.xml @@ -40,20 +40,6 @@ - - net.alchim31.maven - scala-maven-plugin - - - scala-compile-first - process-resources - - add-source - compile - - - - org.apache.maven.plugins maven-compiler-plugin diff --git a/hudi-examples/hudi-examples-java/pom.xml b/hudi-examples/hudi-examples-java/pom.xml index b1783b250673f..a4a1ea5aa928a 100644 --- a/hudi-examples/hudi-examples-java/pom.xml +++ b/hudi-examples/hudi-examples-java/pom.xml @@ -59,20 +59,6 @@ - - net.alchim31.maven - scala-maven-plugin - - - scala-compile-first - process-resources - - add-source - compile - - - - org.apache.maven.plugins maven-compiler-plugin diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMetadataRecordIndex.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMetadataRecordIndex.scala index e29b2a2b0ede0..e3aeebde2937a 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMetadataRecordIndex.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMetadataRecordIndex.scala @@ -18,7 +18,6 @@ package org.apache.hudi.functional -import org.apache.hadoop.fs.Path import org.apache.hudi.DataSourceWriteOptions._ import org.apache.hudi.common.config.{HoodieMetadataConfig, TypedProperties} import org.apache.hudi.common.model.HoodieTableType @@ -28,11 +27,10 @@ import org.apache.hudi.common.testutils.RawTripTestPayload.recordsToStrings import org.apache.hudi.config.{HoodieClusteringConfig, HoodieWriteConfig} import org.apache.hudi.metadata.{HoodieBackedTableMetadata, HoodieTableMetadataUtil, MetadataPartitionType} import org.apache.hudi.testutils.HoodieSparkClientTestBase + import org.apache.spark.sql._ import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue} import org.junit.jupiter.api._ -import org.junit.jupiter.params.ParameterizedTest -import org.junit.jupiter.params.provider.EnumSource import java.util.concurrent.atomic.AtomicInteger import scala.collection.JavaConverters._ diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestRecordLevelIndex.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestRecordLevelIndex.scala index 56866e7bf40a7..57d90d7095422 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestRecordLevelIndex.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestRecordLevelIndex.scala @@ -26,9 +26,9 @@ import org.apache.hudi.common.model._ import org.apache.hudi.common.table.timeline.{HoodieInstant, HoodieTimeline} import org.apache.hudi.config._ import org.apache.hudi.exception.HoodieWriteConflictException -import org.apache.hudi.functional.TestCOWDataSourceStorage.{SQL_DRIVER_IS_NOT_NULL, SQL_DRIVER_IS_NULL, SQL_QUERY_EQUALITY_VALIDATOR_CLASS_NAME, SQL_QUERY_INEQUALITY_VALIDATOR_CLASS_NAME, SQL_RIDER_IS_NOT_NULL, SQL_RIDER_IS_NULL} import org.apache.hudi.metadata.{HoodieBackedTableMetadata, MetadataPartitionType} import org.apache.hudi.util.JavaConversions + import org.apache.spark.sql._ import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue} import org.junit.jupiter.api._