diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieParquetInputFormat.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieParquetInputFormat.java index 5dde86a30dd90..bd9672c9c004b 100644 --- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieParquetInputFormat.java +++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieParquetInputFormat.java @@ -18,12 +18,17 @@ package org.apache.hudi.hadoop; -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.stream.Collectors; +import org.apache.hudi.common.model.HoodieDataFile; +import org.apache.hudi.common.model.HoodiePartitionMetadata; +import org.apache.hudi.common.table.HoodieTableMetaClient; +import org.apache.hudi.common.table.HoodieTimeline; +import org.apache.hudi.common.table.TableFileSystemView.ReadOptimizedView; +import org.apache.hudi.common.table.timeline.HoodieInstant; +import org.apache.hudi.common.table.view.HoodieTableFileSystemView; +import org.apache.hudi.exception.DatasetNotFoundException; +import org.apache.hudi.exception.HoodieIOException; +import org.apache.hudi.exception.InvalidDatasetException; + import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; @@ -37,19 +42,16 @@ import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapreduce.Job; -import org.apache.hudi.common.model.HoodieDataFile; -import org.apache.hudi.common.model.HoodiePartitionMetadata; -import org.apache.hudi.common.table.HoodieTableMetaClient; -import org.apache.hudi.common.table.HoodieTimeline; -import org.apache.hudi.common.table.TableFileSystemView.ReadOptimizedView; -import org.apache.hudi.common.table.timeline.HoodieInstant; -import org.apache.hudi.common.table.view.HoodieTableFileSystemView; -import org.apache.hudi.exception.DatasetNotFoundException; -import org.apache.hudi.exception.HoodieIOException; -import org.apache.hudi.exception.InvalidDatasetException; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + /** * HoodieInputFormat which understands the Hoodie File Structure and filters files based on the Hoodie Mode. If paths * that does not correspond to a hoodie dataset then they are passed in as is (as what FileInputFormat.listStatus() diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieROTablePathFilter.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieROTablePathFilter.java index bbd53ec5fe8ed..51a9805dcc4a5 100644 --- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieROTablePathFilter.java +++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieROTablePathFilter.java @@ -18,24 +18,26 @@ package org.apache.hudi.hadoop; -import java.io.Serializable; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.stream.Collectors; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.PathFilter; import org.apache.hudi.common.model.HoodieDataFile; import org.apache.hudi.common.model.HoodiePartitionMetadata; import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.view.HoodieTableFileSystemView; import org.apache.hudi.exception.DatasetNotFoundException; import org.apache.hudi.exception.HoodieException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathFilter; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; +import java.io.Serializable; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.stream.Collectors; + /** * Given a path is a part of - Hoodie dataset = accepts ONLY the latest version of each path - Non-Hoodie dataset = then * always accept diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/RecordReaderValueIterator.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/RecordReaderValueIterator.java index d68afb4bca61f..4201470cad06a 100644 --- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/RecordReaderValueIterator.java +++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/RecordReaderValueIterator.java @@ -18,14 +18,16 @@ package org.apache.hudi.hadoop; -import java.io.IOException; -import java.util.Iterator; -import java.util.NoSuchElementException; -import org.apache.hadoop.mapred.RecordReader; import org.apache.hudi.exception.HoodieException; + +import org.apache.hadoop.mapred.RecordReader; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; +import java.io.IOException; +import java.util.Iterator; +import java.util.NoSuchElementException; + /** * Provides Iterator Interface to iterate value entries read from record reader * diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/SafeParquetRecordReaderWrapper.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/SafeParquetRecordReaderWrapper.java index 8a42b13c87e07..15b9a2bd39d8b 100644 --- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/SafeParquetRecordReaderWrapper.java +++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/SafeParquetRecordReaderWrapper.java @@ -18,12 +18,13 @@ package org.apache.hudi.hadoop; -import java.io.IOException; import org.apache.hadoop.io.ArrayWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapred.RecordReader; +import java.io.IOException; + /** * Record Reader for parquet. Records read from this reader is safe to be buffered for concurrent processing. * diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/hive/HoodieCombineHiveInputFormat.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/hive/HoodieCombineHiveInputFormat.java index 5a39590f9b841..6512d94701ce7 100644 --- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/hive/HoodieCombineHiveInputFormat.java +++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/hive/HoodieCombineHiveInputFormat.java @@ -18,23 +18,10 @@ package org.apache.hudi.hadoop.hive; +import org.apache.hudi.hadoop.HoodieParquetInputFormat; +import org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat; + import com.google.common.annotations.VisibleForTesting; -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.Callable; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; @@ -70,11 +57,26 @@ import org.apache.hadoop.mapred.lib.CombineFileInputFormat; import org.apache.hadoop.mapred.lib.CombineFileSplit; import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hudi.hadoop.HoodieParquetInputFormat; -import org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; + /** * This is just a copy of the org.apache.hadoop.hive.ql.io.CombineHiveInputFormat from Hive 2.x Search for **MOD** to * see minor modifications to support custom inputformat in CombineHiveInputFormat. See diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/AbstractRealtimeRecordReader.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/AbstractRealtimeRecordReader.java index 215193f7122bb..68bf517815f34 100644 --- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/AbstractRealtimeRecordReader.java +++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/AbstractRealtimeRecordReader.java @@ -18,15 +18,14 @@ package org.apache.hudi.hadoop.realtime; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.LinkedHashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.TreeMap; -import java.util.stream.Collectors; +import org.apache.hudi.common.model.HoodieAvroPayload; +import org.apache.hudi.common.table.HoodieTableMetaClient; +import org.apache.hudi.common.util.HoodieAvroUtils; +import org.apache.hudi.common.util.LogReaderUtils; +import org.apache.hudi.common.util.collection.Pair; +import org.apache.hudi.exception.HoodieException; +import org.apache.hudi.exception.HoodieIOException; + import org.apache.avro.Schema; import org.apache.avro.Schema.Field; import org.apache.avro.generic.GenericArray; @@ -46,19 +45,22 @@ import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapred.JobConf; -import org.apache.hudi.common.model.HoodieAvroPayload; -import org.apache.hudi.common.table.HoodieTableMetaClient; -import org.apache.hudi.common.util.HoodieAvroUtils; -import org.apache.hudi.common.util.LogReaderUtils; -import org.apache.hudi.common.util.collection.Pair; -import org.apache.hudi.exception.HoodieException; -import org.apache.hudi.exception.HoodieIOException; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.apache.parquet.avro.AvroSchemaConverter; import org.apache.parquet.hadoop.ParquetFileReader; import org.apache.parquet.schema.MessageType; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeMap; +import java.util.stream.Collectors; + /** * Record Reader implementation to merge fresh avro data with base parquet data, to support real time queries. */ diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieParquetRealtimeInputFormat.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieParquetRealtimeInputFormat.java index 7a3492ef92ab9..6d467288c2a6a 100644 --- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieParquetRealtimeInputFormat.java +++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieParquetRealtimeInputFormat.java @@ -18,17 +18,22 @@ package org.apache.hudi.hadoop.realtime; +import org.apache.hudi.common.model.FileSlice; +import org.apache.hudi.common.model.HoodieLogFile; +import org.apache.hudi.common.model.HoodieRecord; +import org.apache.hudi.common.table.HoodieTableMetaClient; +import org.apache.hudi.common.table.HoodieTimeline; +import org.apache.hudi.common.table.timeline.HoodieInstant; +import org.apache.hudi.common.table.view.HoodieTableFileSystemView; +import org.apache.hudi.common.util.FSUtils; +import org.apache.hudi.common.util.Option; +import org.apache.hudi.exception.HoodieException; +import org.apache.hudi.exception.HoodieIOException; +import org.apache.hudi.hadoop.HoodieParquetInputFormat; +import org.apache.hudi.hadoop.UseFileSplitsFromInputFormat; + import com.google.common.base.Preconditions; import com.google.common.collect.Sets; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.function.Function; -import java.util.stream.Collectors; -import java.util.stream.Stream; import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; @@ -41,22 +46,19 @@ import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapred.Reporter; -import org.apache.hudi.common.model.FileSlice; -import org.apache.hudi.common.model.HoodieLogFile; -import org.apache.hudi.common.model.HoodieRecord; -import org.apache.hudi.common.table.HoodieTableMetaClient; -import org.apache.hudi.common.table.HoodieTimeline; -import org.apache.hudi.common.table.timeline.HoodieInstant; -import org.apache.hudi.common.table.view.HoodieTableFileSystemView; -import org.apache.hudi.common.util.FSUtils; -import org.apache.hudi.common.util.Option; -import org.apache.hudi.exception.HoodieException; -import org.apache.hudi.exception.HoodieIOException; -import org.apache.hudi.hadoop.HoodieParquetInputFormat; -import org.apache.hudi.hadoop.UseFileSplitsFromInputFormat; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.Stream; + /** * Input Format, that provides a real-time view of data in a Hoodie dataset */ diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieRealtimeFileSplit.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieRealtimeFileSplit.java index 5ae344ecdd062..0a050beb41de4 100644 --- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieRealtimeFileSplit.java +++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieRealtimeFileSplit.java @@ -18,13 +18,14 @@ package org.apache.hudi.hadoop.realtime; +import org.apache.hadoop.mapred.FileSplit; + import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; -import org.apache.hadoop.mapred.FileSplit; /** * Filesplit that wraps the base split and a list of log files to merge deltas from. diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieRealtimeRecordReader.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieRealtimeRecordReader.java index fb11d3979e3e5..e15631641a7b2 100644 --- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieRealtimeRecordReader.java +++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieRealtimeRecordReader.java @@ -18,15 +18,17 @@ package org.apache.hudi.hadoop.realtime; -import java.io.IOException; +import org.apache.hudi.exception.HoodieException; + import org.apache.hadoop.io.ArrayWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.RecordReader; -import org.apache.hudi.exception.HoodieException; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; +import java.io.IOException; + /** * Realtime Record Reader which can do compacted (merge-on-read) record reading or unmerged reading (parquet and log * files read in parallel) based on job configuration. diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeCompactedRecordReader.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeCompactedRecordReader.java index 7019907282854..deeaaf48192a8 100644 --- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeCompactedRecordReader.java +++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeCompactedRecordReader.java @@ -18,23 +18,25 @@ package org.apache.hudi.hadoop.realtime; -import java.io.IOException; -import java.util.Map; -import org.apache.avro.generic.GenericRecord; -import org.apache.hadoop.io.ArrayWritable; -import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapred.RecordReader; import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.model.HoodieRecordPayload; import org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner; import org.apache.hudi.common.util.FSUtils; import org.apache.hudi.common.util.HoodieAvroUtils; import org.apache.hudi.common.util.Option; + +import org.apache.avro.generic.GenericRecord; +import org.apache.hadoop.io.ArrayWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.RecordReader; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; +import java.io.IOException; +import java.util.Map; + class RealtimeCompactedRecordReader extends AbstractRealtimeRecordReader implements RecordReader { diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeUnmergedRecordReader.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeUnmergedRecordReader.java index f7a51e8209782..cd6f41dfe2eab 100644 --- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeUnmergedRecordReader.java +++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeUnmergedRecordReader.java @@ -18,15 +18,6 @@ package org.apache.hudi.hadoop.realtime; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; -import org.apache.avro.generic.GenericRecord; -import org.apache.hadoop.io.ArrayWritable; -import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapred.RecordReader; import org.apache.hudi.common.table.log.HoodieUnMergedLogRecordScanner; import org.apache.hudi.common.util.DefaultSizeEstimator; import org.apache.hudi.common.util.FSUtils; @@ -38,6 +29,17 @@ import org.apache.hudi.hadoop.RecordReaderValueIterator; import org.apache.hudi.hadoop.SafeParquetRecordReaderWrapper; +import org.apache.avro.generic.GenericRecord; +import org.apache.hadoop.io.ArrayWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.RecordReader; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + class RealtimeUnmergedRecordReader extends AbstractRealtimeRecordReader implements RecordReader { diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/InputFormatTestUtil.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/InputFormatTestUtil.java index 7a5b7d45b8230..8dd2f07f57565 100644 --- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/InputFormatTestUtil.java +++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/InputFormatTestUtil.java @@ -18,28 +18,30 @@ package org.apache.hudi.hadoop; -import java.io.File; -import java.io.FilenameFilter; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import java.util.UUID; -import org.apache.avro.Schema; -import org.apache.avro.generic.GenericRecord; -import org.apache.avro.generic.IndexedRecord; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.mapred.JobConf; import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.model.HoodieTestUtils; import org.apache.hudi.common.table.timeline.HoodieActiveTimeline; import org.apache.hudi.common.util.FSUtils; import org.apache.hudi.common.util.HoodieAvroUtils; import org.apache.hudi.common.util.SchemaTestUtil; + +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.generic.IndexedRecord; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.mapred.JobConf; import org.apache.parquet.avro.AvroParquetWriter; import org.junit.rules.TemporaryFolder; +import java.io.File; +import java.io.FilenameFilter; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.UUID; + public class InputFormatTestUtil { private static String TEST_WRITE_TOKEN = "1-0-1"; diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestAnnotation.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestAnnotation.java index 7e8f46df01a4f..3bdaa647feb8e 100644 --- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestAnnotation.java +++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestAnnotation.java @@ -18,10 +18,11 @@ package org.apache.hudi.hadoop; -import static org.junit.Assert.assertTrue; +import org.junit.Test; import java.lang.annotation.Annotation; -import org.junit.Test; + +import static org.junit.Assert.assertTrue; public class TestAnnotation { diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieInputFormat.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieInputFormat.java index 3bcf97d99e75e..0c3a002221c8e 100644 --- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieInputFormat.java +++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieInputFormat.java @@ -18,10 +18,8 @@ package org.apache.hudi.hadoop; -import static org.junit.Assert.assertEquals; +import org.apache.hudi.common.util.FSUtils; -import java.io.File; -import java.io.IOException; import org.apache.avro.Schema; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.io.ArrayWritable; @@ -30,12 +28,16 @@ import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.RecordReader; -import org.apache.hudi.common.util.FSUtils; import org.junit.Before; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; +import java.io.File; +import java.io.IOException; + +import static org.junit.Assert.assertEquals; + public class TestHoodieInputFormat { private HoodieParquetInputFormat inputFormat; diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieROTablePathFilter.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieROTablePathFilter.java index 3d17140d9ad7a..8d46b1067553c 100644 --- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieROTablePathFilter.java +++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieROTablePathFilter.java @@ -18,20 +18,22 @@ package org.apache.hudi.hadoop; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; - -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import org.apache.hadoop.fs.Path; import org.apache.hudi.common.HoodieCommonTestHarness; import org.apache.hudi.common.model.HoodieTestUtils; import org.apache.hudi.common.table.HoodieTableMetaClient; + +import org.apache.hadoop.fs.Path; import org.junit.Before; import org.junit.Test; import org.junit.rules.TemporaryFolder; +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + /** * */ diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestRecordReaderValueIterator.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestRecordReaderValueIterator.java index 9c050cd892ddc..2dfd036557c5f 100644 --- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestRecordReaderValueIterator.java +++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestRecordReaderValueIterator.java @@ -18,17 +18,19 @@ package org.apache.hudi.hadoop; -import java.io.IOException; -import java.util.List; -import java.util.stream.Collectors; -import java.util.stream.IntStream; +import org.apache.hudi.common.util.collection.Pair; + import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.RecordReader; -import org.apache.hudi.common.util.collection.Pair; import org.junit.Assert; import org.junit.Test; +import java.io.IOException; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + public class TestRecordReaderValueIterator { @Test diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeRecordReader.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeRecordReader.java index 021bf782b45c2..69c338564e9c4 100644 --- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeRecordReader.java +++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeRecordReader.java @@ -18,18 +18,26 @@ package org.apache.hudi.hadoop.realtime; -import static org.junit.Assert.assertTrue; +import org.apache.hudi.common.model.FileSlice; +import org.apache.hudi.common.model.HoodieLogFile; +import org.apache.hudi.common.model.HoodieTableType; +import org.apache.hudi.common.model.HoodieTestUtils; +import org.apache.hudi.common.table.HoodieTimeline; +import org.apache.hudi.common.table.log.HoodieLogFormat; +import org.apache.hudi.common.table.log.HoodieLogFormat.Writer; +import org.apache.hudi.common.table.log.block.HoodieAvroDataBlock; +import org.apache.hudi.common.table.log.block.HoodieCommandBlock; +import org.apache.hudi.common.table.log.block.HoodieCommandBlock.HoodieCommandBlockTypeEnum; +import org.apache.hudi.common.table.log.block.HoodieLogBlock; +import org.apache.hudi.common.table.log.block.HoodieLogBlock.HeaderMetadataType; +import org.apache.hudi.common.util.FSUtils; +import org.apache.hudi.common.util.HoodieAvroUtils; +import org.apache.hudi.common.util.SchemaTestUtil; +import org.apache.hudi.common.util.collection.Pair; +import org.apache.hudi.exception.HoodieException; +import org.apache.hudi.hadoop.InputFormatTestUtil; import com.google.common.collect.Maps; -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.stream.Collectors; import org.apache.avro.Schema; import org.apache.avro.Schema.Field; import org.apache.avro.generic.IndexedRecord; @@ -51,30 +59,25 @@ import org.apache.hadoop.mapred.FileSplit; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.RecordReader; -import org.apache.hudi.common.model.FileSlice; -import org.apache.hudi.common.model.HoodieLogFile; -import org.apache.hudi.common.model.HoodieTableType; -import org.apache.hudi.common.model.HoodieTestUtils; -import org.apache.hudi.common.table.HoodieTimeline; -import org.apache.hudi.common.table.log.HoodieLogFormat; -import org.apache.hudi.common.table.log.HoodieLogFormat.Writer; -import org.apache.hudi.common.table.log.block.HoodieAvroDataBlock; -import org.apache.hudi.common.table.log.block.HoodieCommandBlock; -import org.apache.hudi.common.table.log.block.HoodieCommandBlock.HoodieCommandBlockTypeEnum; -import org.apache.hudi.common.table.log.block.HoodieLogBlock; -import org.apache.hudi.common.table.log.block.HoodieLogBlock.HeaderMetadataType; -import org.apache.hudi.common.util.FSUtils; -import org.apache.hudi.common.util.HoodieAvroUtils; -import org.apache.hudi.common.util.SchemaTestUtil; -import org.apache.hudi.common.util.collection.Pair; -import org.apache.hudi.exception.HoodieException; -import org.apache.hudi.hadoop.InputFormatTestUtil; import org.junit.Assert; import org.junit.Before; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + + +import static org.junit.Assert.assertTrue; + public class TestHoodieRealtimeRecordReader { private static final String PARTITION_COLUMN = "datestr"; diff --git a/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestBase.java b/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestBase.java index 647a390cd0355..62e9711a09ec1 100644 --- a/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestBase.java +++ b/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestBase.java @@ -18,8 +18,8 @@ package org.apache.hudi.integ; -import static java.util.concurrent.TimeUnit.SECONDS; -import static org.awaitility.Awaitility.await; +import org.apache.hudi.common.util.FileIOUtils; +import org.apache.hudi.common.util.collection.Pair; import com.github.dockerjava.api.DockerClient; import com.github.dockerjava.api.command.DockerCmdExecFactory; @@ -31,6 +31,11 @@ import com.github.dockerjava.core.DockerClientConfig; import com.github.dockerjava.core.command.ExecStartResultCallback; import com.github.dockerjava.jaxrs.JerseyDockerCmdExecFactory; +import org.apache.log4j.LogManager; +import org.apache.log4j.Logger; +import org.junit.Assert; +import org.junit.Before; + import java.io.ByteArrayOutputStream; import java.io.IOException; import java.util.ArrayList; @@ -38,12 +43,9 @@ import java.util.List; import java.util.Map; import java.util.stream.Collectors; -import org.apache.hudi.common.util.FileIOUtils; -import org.apache.hudi.common.util.collection.Pair; -import org.apache.log4j.LogManager; -import org.apache.log4j.Logger; -import org.junit.Assert; -import org.junit.Before; + +import static java.util.concurrent.TimeUnit.SECONDS; +import static org.awaitility.Awaitility.await; public abstract class ITTestBase { diff --git a/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieDemo.java b/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieDemo.java index e92ef7b85c313..99082ecbae85e 100644 --- a/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieDemo.java +++ b/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieDemo.java @@ -18,11 +18,13 @@ package org.apache.hudi.integ; -import com.google.common.collect.ImmutableList; -import java.util.List; import org.apache.hudi.common.util.collection.Pair; + +import com.google.common.collect.ImmutableList; import org.junit.Test; +import java.util.List; + /** * Goes through steps described in https://hudi.incubator.apache.org/docker_demo.html * diff --git a/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieSanity.java b/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieSanity.java index 7e73460653d48..ac54df949dc30 100644 --- a/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieSanity.java +++ b/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieSanity.java @@ -20,6 +20,7 @@ import org.apache.hudi.common.util.collection.Pair; import org.apache.hudi.common.model.HoodieTableType; + import org.junit.Assert; import org.junit.Test; diff --git a/hudi-spark/src/main/java/org/apache/hudi/BaseAvroPayload.java b/hudi-spark/src/main/java/org/apache/hudi/BaseAvroPayload.java index 30c3fdd17906b..126836281027b 100644 --- a/hudi-spark/src/main/java/org/apache/hudi/BaseAvroPayload.java +++ b/hudi-spark/src/main/java/org/apache/hudi/BaseAvroPayload.java @@ -18,13 +18,15 @@ package org.apache.hudi; -import java.io.IOException; -import java.io.Serializable; -import org.apache.avro.generic.GenericRecord; import org.apache.hudi.common.util.HoodieAvroUtils; import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieIOException; +import org.apache.avro.generic.GenericRecord; + +import java.io.IOException; +import java.io.Serializable; + /** * Base class for all AVRO record based payloads, that can be ordered based on a field */ diff --git a/hudi-spark/src/main/java/org/apache/hudi/ComplexKeyGenerator.java b/hudi-spark/src/main/java/org/apache/hudi/ComplexKeyGenerator.java index 15f47d39d0de3..9441947654d5d 100644 --- a/hudi-spark/src/main/java/org/apache/hudi/ComplexKeyGenerator.java +++ b/hudi-spark/src/main/java/org/apache/hudi/ComplexKeyGenerator.java @@ -18,13 +18,15 @@ package org.apache.hudi; -import java.util.Arrays; -import java.util.List; -import org.apache.avro.generic.GenericRecord; import org.apache.hudi.common.model.HoodieKey; import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.exception.HoodieKeyException; +import org.apache.avro.generic.GenericRecord; + +import java.util.Arrays; +import java.util.List; + /** * Complex key generator, which takes names of fields to be used for recordKey and partitionPath as configs. */ diff --git a/hudi-spark/src/main/java/org/apache/hudi/DataSourceUtils.java b/hudi-spark/src/main/java/org/apache/hudi/DataSourceUtils.java index 6938e4b4416d3..e7cfcc8fdddf6 100644 --- a/hudi-spark/src/main/java/org/apache/hudi/DataSourceUtils.java +++ b/hudi-spark/src/main/java/org/apache/hudi/DataSourceUtils.java @@ -18,14 +18,6 @@ package org.apache.hudi; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Map; -import java.util.stream.Collectors; -import org.apache.avro.Schema.Field; -import org.apache.avro.generic.GenericRecord; import org.apache.hudi.client.embedded.EmbeddedTimelineService; import org.apache.hudi.common.model.HoodieKey; import org.apache.hudi.common.model.HoodieRecord; @@ -43,9 +35,19 @@ import org.apache.hudi.hive.PartitionValueExtractor; import org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor; import org.apache.hudi.index.HoodieIndex; + +import org.apache.avro.Schema.Field; +import org.apache.avro.generic.GenericRecord; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + /** * Utilities used throughout the data source */ diff --git a/hudi-spark/src/main/java/org/apache/hudi/HoodieDataSourceHelpers.java b/hudi-spark/src/main/java/org/apache/hudi/HoodieDataSourceHelpers.java index d3103682221f2..f9df30ad2be1a 100644 --- a/hudi-spark/src/main/java/org/apache/hudi/HoodieDataSourceHelpers.java +++ b/hudi-spark/src/main/java/org/apache/hudi/HoodieDataSourceHelpers.java @@ -18,16 +18,18 @@ package org.apache.hudi; -import com.google.common.collect.Sets; -import java.util.List; -import java.util.stream.Collectors; -import org.apache.hadoop.fs.FileSystem; import org.apache.hudi.common.model.HoodieTableType; import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.HoodieTimeline; import org.apache.hudi.common.table.timeline.HoodieActiveTimeline; import org.apache.hudi.common.table.timeline.HoodieInstant; +import com.google.common.collect.Sets; +import org.apache.hadoop.fs.FileSystem; + +import java.util.List; +import java.util.stream.Collectors; + /** * List of helpers to aid, construction of instanttime for read and write operations using datasource */ diff --git a/hudi-spark/src/main/java/org/apache/hudi/KeyGenerator.java b/hudi-spark/src/main/java/org/apache/hudi/KeyGenerator.java index c35663e6089d9..4b8084b9f088c 100644 --- a/hudi-spark/src/main/java/org/apache/hudi/KeyGenerator.java +++ b/hudi-spark/src/main/java/org/apache/hudi/KeyGenerator.java @@ -18,11 +18,13 @@ package org.apache.hudi; -import java.io.Serializable; -import org.apache.avro.generic.GenericRecord; import org.apache.hudi.common.model.HoodieKey; import org.apache.hudi.common.util.TypedProperties; +import org.apache.avro.generic.GenericRecord; + +import java.io.Serializable; + /** * Abstract class to extend for plugging in extraction of {@link HoodieKey} from an Avro record */ diff --git a/hudi-spark/src/main/java/org/apache/hudi/NonpartitionedKeyGenerator.java b/hudi-spark/src/main/java/org/apache/hudi/NonpartitionedKeyGenerator.java index 4cfbd55bd031d..8c0a664d6b59f 100644 --- a/hudi-spark/src/main/java/org/apache/hudi/NonpartitionedKeyGenerator.java +++ b/hudi-spark/src/main/java/org/apache/hudi/NonpartitionedKeyGenerator.java @@ -18,11 +18,12 @@ package org.apache.hudi; -import org.apache.avro.generic.GenericRecord; import org.apache.hudi.common.model.HoodieKey; import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.exception.HoodieKeyException; +import org.apache.avro.generic.GenericRecord; + /** * Simple Key generator for unpartitioned Hive Tables */ diff --git a/hudi-spark/src/main/java/org/apache/hudi/OverwriteWithLatestAvroPayload.java b/hudi-spark/src/main/java/org/apache/hudi/OverwriteWithLatestAvroPayload.java index f2646ccd78d42..e860837f2fa0b 100644 --- a/hudi-spark/src/main/java/org/apache/hudi/OverwriteWithLatestAvroPayload.java +++ b/hudi-spark/src/main/java/org/apache/hudi/OverwriteWithLatestAvroPayload.java @@ -18,14 +18,16 @@ package org.apache.hudi; -import java.io.IOException; -import org.apache.avro.Schema; -import org.apache.avro.generic.GenericRecord; -import org.apache.avro.generic.IndexedRecord; import org.apache.hudi.common.model.HoodieRecordPayload; import org.apache.hudi.common.util.HoodieAvroUtils; import org.apache.hudi.common.util.Option; +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.generic.IndexedRecord; + +import java.io.IOException; + /** * Default payload used for delta streamer. *

diff --git a/hudi-spark/src/main/java/org/apache/hudi/QuickstartUtils.java b/hudi-spark/src/main/java/org/apache/hudi/QuickstartUtils.java index 504ad18f3a2b8..d09716d7ad5c7 100644 --- a/hudi-spark/src/main/java/org/apache/hudi/QuickstartUtils.java +++ b/hudi-spark/src/main/java/org/apache/hudi/QuickstartUtils.java @@ -18,6 +18,16 @@ package org.apache.hudi; +import org.apache.hudi.common.model.HoodieKey; +import org.apache.hudi.common.model.HoodieRecord; +import org.apache.hudi.common.util.HoodieAvroUtils; +import org.apache.hudi.common.util.Option; +import org.apache.hudi.exception.HoodieIOException; + +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericRecord; + import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; @@ -29,14 +39,6 @@ import java.util.stream.Collectors; import java.util.stream.IntStream; import java.util.stream.Stream; -import org.apache.avro.Schema; -import org.apache.avro.generic.GenericData; -import org.apache.avro.generic.GenericRecord; -import org.apache.hudi.common.model.HoodieKey; -import org.apache.hudi.common.model.HoodieRecord; -import org.apache.hudi.common.util.HoodieAvroUtils; -import org.apache.hudi.common.util.Option; -import org.apache.hudi.exception.HoodieIOException; /** * Class to be used in quickstart guide for generating inserts and updates against a corpus. Test data uses a toy Uber diff --git a/hudi-spark/src/main/java/org/apache/hudi/SimpleKeyGenerator.java b/hudi-spark/src/main/java/org/apache/hudi/SimpleKeyGenerator.java index 8f5948442c4ba..f458906406cd6 100644 --- a/hudi-spark/src/main/java/org/apache/hudi/SimpleKeyGenerator.java +++ b/hudi-spark/src/main/java/org/apache/hudi/SimpleKeyGenerator.java @@ -18,11 +18,12 @@ package org.apache.hudi; -import org.apache.avro.generic.GenericRecord; import org.apache.hudi.common.model.HoodieKey; import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.exception.HoodieKeyException; +import org.apache.avro.generic.GenericRecord; + /** * Simple key generator, which takes names of fields to be used for recordKey and partitionPath as configs. */ diff --git a/hudi-spark/src/test/java/DataSourceTestUtils.java b/hudi-spark/src/test/java/DataSourceTestUtils.java index 15fea332eae2b..ab76444db1de4 100644 --- a/hudi-spark/src/test/java/DataSourceTestUtils.java +++ b/hudi-spark/src/test/java/DataSourceTestUtils.java @@ -16,14 +16,15 @@ * limitations under the License. */ -import java.io.IOException; -import java.util.List; -import java.util.stream.Collectors; import org.apache.hudi.common.TestRawTripPayload; import org.apache.hudi.common.model.HoodieKey; import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.util.Option; +import java.io.IOException; +import java.util.List; +import java.util.stream.Collectors; + /** * Test utils for data source tests. */ diff --git a/hudi-spark/src/test/java/HoodieJavaApp.java b/hudi-spark/src/test/java/HoodieJavaApp.java index 50ac65ca42107..389c015542443 100644 --- a/hudi-spark/src/test/java/HoodieJavaApp.java +++ b/hudi-spark/src/test/java/HoodieJavaApp.java @@ -16,11 +16,6 @@ * limitations under the License. */ -import com.beust.jcommander.JCommander; -import com.beust.jcommander.Parameter; -import java.util.ArrayList; -import java.util.List; -import org.apache.hadoop.fs.FileSystem; import org.apache.hudi.DataSourceReadOptions; import org.apache.hudi.DataSourceWriteOptions; import org.apache.hudi.HoodieDataSourceHelpers; @@ -33,6 +28,10 @@ import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.hive.MultiPartKeysValueExtractor; import org.apache.hudi.hive.NonPartitionedExtractor; + +import com.beust.jcommander.JCommander; +import com.beust.jcommander.Parameter; +import org.apache.hadoop.fs.FileSystem; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.apache.spark.api.java.JavaSparkContext; @@ -42,6 +41,9 @@ import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SparkSession; +import java.util.ArrayList; +import java.util.List; + /** * Sample program that writes & reads hoodie datasets via the Spark datasource */ diff --git a/hudi-spark/src/test/java/HoodieJavaStreamingApp.java b/hudi-spark/src/test/java/HoodieJavaStreamingApp.java index 492cc980456da..c448abbc73981 100644 --- a/hudi-spark/src/test/java/HoodieJavaStreamingApp.java +++ b/hudi-spark/src/test/java/HoodieJavaStreamingApp.java @@ -16,14 +16,6 @@ * limitations under the License. */ -import com.beust.jcommander.JCommander; -import com.beust.jcommander.Parameter; -import java.util.List; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; import org.apache.hudi.DataSourceReadOptions; import org.apache.hudi.DataSourceWriteOptions; import org.apache.hudi.HoodieDataSourceHelpers; @@ -31,6 +23,11 @@ import org.apache.hudi.common.model.HoodieTableType; import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.hive.MultiPartKeysValueExtractor; + +import com.beust.jcommander.JCommander; +import com.beust.jcommander.Parameter; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.apache.spark.api.java.JavaSparkContext; @@ -42,6 +39,11 @@ import org.apache.spark.sql.streaming.OutputMode; import org.apache.spark.sql.streaming.ProcessingTime; +import java.util.List; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; + /** * Sample program that writes & reads hoodie datasets via the Spark datasource streaming */ diff --git a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/FileSystemViewHandler.java b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/FileSystemViewHandler.java index cb3d8a70ef4c3..da2f305cdb3ed 100644 --- a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/FileSystemViewHandler.java +++ b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/FileSystemViewHandler.java @@ -18,17 +18,6 @@ package org.apache.hudi.timeline.service; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.base.Preconditions; -import io.javalin.Context; -import io.javalin.Handler; -import io.javalin.Javalin; -import java.io.IOException; -import java.util.Arrays; -import java.util.List; -import java.util.stream.Collectors; -import org.apache.hadoop.conf.Configuration; import org.apache.hudi.common.table.HoodieTimeline; import org.apache.hudi.common.table.SyncableFileSystemView; import org.apache.hudi.common.table.timeline.dto.CompactionOpDTO; @@ -42,10 +31,23 @@ import org.apache.hudi.timeline.service.handlers.DataFileHandler; import org.apache.hudi.timeline.service.handlers.FileSliceHandler; import org.apache.hudi.timeline.service.handlers.TimelineHandler; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.base.Preconditions; +import io.javalin.Context; +import io.javalin.Handler; +import io.javalin.Javalin; +import org.apache.hadoop.conf.Configuration; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.jetbrains.annotations.NotNull; +import java.io.IOException; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; + /** * Main REST Handler class that handles local view staleness and delegates calls to slice/data-file/timeline handlers */ diff --git a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/TimelineService.java b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/TimelineService.java index 2272757c70408..16ff2f7a9ce1f 100644 --- a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/TimelineService.java +++ b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/TimelineService.java @@ -18,21 +18,23 @@ package org.apache.hudi.timeline.service; -import com.beust.jcommander.JCommander; -import com.beust.jcommander.Parameter; -import io.javalin.Javalin; -import java.io.IOException; -import java.io.Serializable; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; import org.apache.hudi.common.SerializableConfiguration; import org.apache.hudi.common.table.view.FileSystemViewManager; import org.apache.hudi.common.table.view.FileSystemViewStorageConfig; import org.apache.hudi.common.table.view.FileSystemViewStorageType; import org.apache.hudi.common.util.FSUtils; + +import com.beust.jcommander.JCommander; +import com.beust.jcommander.Parameter; +import io.javalin.Javalin; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; +import java.io.IOException; +import java.io.Serializable; + /** * A stand alone timeline service exposing File-System View interfaces to clients */ diff --git a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/DataFileHandler.java b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/DataFileHandler.java index f3364495c147a..2b92bbf681b15 100644 --- a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/DataFileHandler.java +++ b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/DataFileHandler.java @@ -18,14 +18,16 @@ package org.apache.hudi.timeline.service.handlers; +import org.apache.hudi.common.table.timeline.dto.DataFileDTO; +import org.apache.hudi.common.table.view.FileSystemViewManager; + +import org.apache.hadoop.conf.Configuration; + import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.stream.Collectors; -import org.apache.hadoop.conf.Configuration; -import org.apache.hudi.common.table.timeline.dto.DataFileDTO; -import org.apache.hudi.common.table.view.FileSystemViewManager; /** * REST Handler servicing data-file requests diff --git a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/FileSliceHandler.java b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/FileSliceHandler.java index eb283fb3d195b..e58f83556ad7a 100644 --- a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/FileSliceHandler.java +++ b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/FileSliceHandler.java @@ -18,16 +18,18 @@ package org.apache.hudi.timeline.service.handlers; +import org.apache.hudi.common.table.timeline.dto.CompactionOpDTO; +import org.apache.hudi.common.table.timeline.dto.FileGroupDTO; +import org.apache.hudi.common.table.timeline.dto.FileSliceDTO; +import org.apache.hudi.common.table.view.FileSystemViewManager; + +import org.apache.hadoop.conf.Configuration; + import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.stream.Collectors; -import org.apache.hadoop.conf.Configuration; -import org.apache.hudi.common.table.timeline.dto.CompactionOpDTO; -import org.apache.hudi.common.table.timeline.dto.FileGroupDTO; -import org.apache.hudi.common.table.timeline.dto.FileSliceDTO; -import org.apache.hudi.common.table.view.FileSystemViewManager; /** * REST Handler servicing file-slice requests diff --git a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/Handler.java b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/Handler.java index 03be706fed8b3..884bc4227bdcf 100644 --- a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/Handler.java +++ b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/Handler.java @@ -18,10 +18,12 @@ package org.apache.hudi.timeline.service.handlers; -import java.io.IOException; +import org.apache.hudi.common.table.view.FileSystemViewManager; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; -import org.apache.hudi.common.table.view.FileSystemViewManager; + +import java.io.IOException; public abstract class Handler { diff --git a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/TimelineHandler.java b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/TimelineHandler.java index 49fc2ce98de22..faa81a4d06402 100644 --- a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/TimelineHandler.java +++ b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/TimelineHandler.java @@ -18,14 +18,16 @@ package org.apache.hudi.timeline.service.handlers; +import org.apache.hudi.common.table.timeline.dto.InstantDTO; +import org.apache.hudi.common.table.timeline.dto.TimelineDTO; +import org.apache.hudi.common.table.view.FileSystemViewManager; + +import org.apache.hadoop.conf.Configuration; + import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; -import org.apache.hadoop.conf.Configuration; -import org.apache.hudi.common.table.timeline.dto.InstantDTO; -import org.apache.hudi.common.table.timeline.dto.TimelineDTO; -import org.apache.hudi.common.table.view.FileSystemViewManager; /** * REST Handler servicing timeline requests diff --git a/hudi-timeline-service/src/test/java/org/apache/hudi/timeline/table/view/TestRemoteHoodieTableFileSystemView.java b/hudi-timeline-service/src/test/java/org/apache/hudi/timeline/table/view/TestRemoteHoodieTableFileSystemView.java index 7c55949682a8d..d69a8281700ed 100644 --- a/hudi-timeline-service/src/test/java/org/apache/hudi/timeline/table/view/TestRemoteHoodieTableFileSystemView.java +++ b/hudi-timeline-service/src/test/java/org/apache/hudi/timeline/table/view/TestRemoteHoodieTableFileSystemView.java @@ -27,6 +27,7 @@ import org.apache.hudi.common.table.view.RemoteHoodieTableFileSystemView; import org.apache.hudi.common.table.view.TestHoodieTableFileSystemView; import org.apache.hudi.timeline.service.TimelineService; + import org.apache.log4j.LogManager; import org.apache.log4j.Logger; diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HDFSParquetImporter.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HDFSParquetImporter.java index 34552bb0b31a4..62d2aa79b7ef5 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HDFSParquetImporter.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HDFSParquetImporter.java @@ -18,26 +18,6 @@ package org.apache.hudi.utilities; -import com.beust.jcommander.IValueValidator; -import com.beust.jcommander.JCommander; -import com.beust.jcommander.Parameter; -import com.beust.jcommander.ParameterException; -import com.google.common.annotations.VisibleForTesting; -import java.io.IOException; -import java.io.Serializable; -import java.time.Instant; -import java.time.ZoneId; -import java.time.format.DateTimeFormatter; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Properties; -import org.apache.avro.Schema; -import org.apache.avro.generic.GenericRecord; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hudi.HoodieWriteClient; import org.apache.hudi.WriteStatus; import org.apache.hudi.common.HoodieJsonPayload; @@ -50,12 +30,35 @@ import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.exception.HoodieIOException; + +import com.beust.jcommander.IValueValidator; +import com.beust.jcommander.JCommander; +import com.beust.jcommander.Parameter; +import com.beust.jcommander.ParameterException; +import com.google.common.annotations.VisibleForTesting; +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericRecord; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.apache.parquet.avro.AvroReadSupport; import org.apache.parquet.hadoop.ParquetInputFormat; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; + +import java.io.IOException; +import java.io.Serializable; +import java.time.Instant; +import java.time.ZoneId; +import java.time.format.DateTimeFormatter; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Properties; + import scala.Tuple2; /** diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HiveIncrementalPuller.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HiveIncrementalPuller.java index 2cba5fe789a58..62ae769affc17 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HiveIncrementalPuller.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HiveIncrementalPuller.java @@ -18,8 +18,25 @@ package org.apache.hudi.utilities; +import org.apache.hudi.common.table.HoodieTableMetaClient; +import org.apache.hudi.common.table.timeline.HoodieInstant; +import org.apache.hudi.common.util.FileIOUtils; +import org.apache.hudi.common.util.Option; +import org.apache.hudi.exception.HoodieException; +import org.apache.hudi.utilities.exception.HoodieIncrementalPullException; +import org.apache.hudi.utilities.exception.HoodieIncrementalPullSQLException; + import com.beust.jcommander.JCommander; import com.beust.jcommander.Parameter; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsAction; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.log4j.LogManager; +import org.apache.log4j.Logger; +import org.stringtemplate.v4.ST; + import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; @@ -32,21 +49,6 @@ import java.util.List; import java.util.Scanner; import java.util.stream.Collectors; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.permission.FsAction; -import org.apache.hadoop.fs.permission.FsPermission; -import org.apache.hudi.common.table.HoodieTableMetaClient; -import org.apache.hudi.common.table.timeline.HoodieInstant; -import org.apache.hudi.common.util.FileIOUtils; -import org.apache.hudi.common.util.Option; -import org.apache.hudi.exception.HoodieException; -import org.apache.hudi.utilities.exception.HoodieIncrementalPullException; -import org.apache.hudi.utilities.exception.HoodieIncrementalPullSQLException; -import org.apache.log4j.LogManager; -import org.apache.log4j.Logger; -import org.stringtemplate.v4.ST; /** * Utility to pull data after a given commit, based on the supplied HiveQL and save the delta as another hive temporary diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieCleaner.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieCleaner.java index 8ca8cd1aa8983..27c32206304dd 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieCleaner.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieCleaner.java @@ -18,22 +18,24 @@ package org.apache.hudi.utilities; -import com.beust.jcommander.JCommander; -import com.beust.jcommander.Parameter; -import java.io.IOException; -import java.io.Serializable; -import java.util.ArrayList; -import java.util.List; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; import org.apache.hudi.HoodieWriteClient; import org.apache.hudi.common.util.FSUtils; import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.config.HoodieWriteConfig; + +import com.beust.jcommander.JCommander; +import com.beust.jcommander.Parameter; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.apache.spark.api.java.JavaSparkContext; +import java.io.IOException; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; + public class HoodieCleaner { private static volatile Logger log = LogManager.getLogger(HoodieCleaner.class); diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieCompactionAdminTool.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieCompactionAdminTool.java index 63bf441ff4e1b..d42a45dfc329a 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieCompactionAdminTool.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieCompactionAdminTool.java @@ -18,22 +18,24 @@ package org.apache.hudi.utilities; -import com.beust.jcommander.JCommander; -import com.beust.jcommander.Parameter; -import java.io.ObjectOutputStream; -import java.io.Serializable; -import java.util.List; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; import org.apache.hudi.CompactionAdminClient; import org.apache.hudi.CompactionAdminClient.RenameOpResult; import org.apache.hudi.CompactionAdminClient.ValidationOpResult; import org.apache.hudi.common.model.HoodieFileGroupId; import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.util.FSUtils; + +import com.beust.jcommander.JCommander; +import com.beust.jcommander.Parameter; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.apache.spark.api.java.JavaSparkContext; +import java.io.ObjectOutputStream; +import java.io.Serializable; +import java.util.List; + public class HoodieCompactionAdminTool { private final Config cfg; diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieCompactor.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieCompactor.java index 4f72b2429374b..48fbbafaca35c 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieCompactor.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieCompactor.java @@ -18,23 +18,25 @@ package org.apache.hudi.utilities; -import com.beust.jcommander.JCommander; -import com.beust.jcommander.Parameter; -import java.io.Serializable; -import java.util.ArrayList; -import java.util.List; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; import org.apache.hudi.HoodieWriteClient; import org.apache.hudi.WriteStatus; import org.apache.hudi.common.util.FSUtils; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.TypedProperties; + +import com.beust.jcommander.JCommander; +import com.beust.jcommander.Parameter; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; + public class HoodieCompactor { private static volatile Logger logger = LogManager.getLogger(HoodieCompactor.class); diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieSnapshotCopier.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieSnapshotCopier.java index a37f7da1bcd58..4f76f43f7ef53 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieSnapshotCopier.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieSnapshotCopier.java @@ -18,17 +18,6 @@ package org.apache.hudi.utilities; -import com.beust.jcommander.JCommander; -import com.beust.jcommander.Parameter; -import java.io.IOException; -import java.io.Serializable; -import java.util.ArrayList; -import java.util.List; -import java.util.stream.Stream; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.FileUtil; -import org.apache.hadoop.fs.Path; import org.apache.hudi.common.SerializableConfiguration; import org.apache.hudi.common.model.HoodieDataFile; import org.apache.hudi.common.model.HoodiePartitionMetadata; @@ -40,10 +29,24 @@ import org.apache.hudi.common.table.view.HoodieTableFileSystemView; import org.apache.hudi.common.util.FSUtils; import org.apache.hudi.common.util.Option; + +import com.beust.jcommander.JCommander; +import com.beust.jcommander.Parameter; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.fs.Path; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaSparkContext; + +import java.io.IOException; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Stream; + import scala.Tuple2; /** diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieWithTimelineServer.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieWithTimelineServer.java index dc49ebd888e44..16ccb143e7934 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieWithTimelineServer.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieWithTimelineServer.java @@ -22,6 +22,12 @@ import com.beust.jcommander.Parameter; import com.google.common.base.Preconditions; import io.javalin.Javalin; +import org.apache.http.HttpResponse; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClientBuilder; +import org.apache.spark.api.java.JavaSparkContext; + import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; @@ -31,11 +37,6 @@ import java.util.ArrayList; import java.util.List; import java.util.stream.IntStream; -import org.apache.http.HttpResponse; -import org.apache.http.client.methods.HttpGet; -import org.apache.http.impl.client.CloseableHttpClient; -import org.apache.http.impl.client.HttpClientBuilder; -import org.apache.spark.api.java.JavaSparkContext; public class HoodieWithTimelineServer implements Serializable { diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/UtilHelpers.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/UtilHelpers.java index bbb18af8abd7c..1f33ca6194502 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/UtilHelpers.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/UtilHelpers.java @@ -18,18 +18,6 @@ package org.apache.hudi.utilities; -import com.google.common.base.Preconditions; -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStream; -import java.io.StringReader; -import java.nio.ByteBuffer; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; import org.apache.hudi.HoodieWriteClient; import org.apache.hudi.WriteStatus; import org.apache.hudi.common.util.DFSPropertiesConfiguration; @@ -44,6 +32,11 @@ import org.apache.hudi.utilities.schema.SchemaProvider; import org.apache.hudi.utilities.sources.Source; import org.apache.hudi.utilities.transform.Transformer; + +import com.google.common.base.Preconditions; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.apache.spark.Accumulator; @@ -52,6 +45,15 @@ import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.SparkSession; +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.StringReader; +import java.nio.ByteBuffer; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + /** * Bunch of helper methods */ diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/adhoc/UpgradePayloadFromUberToApache.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/adhoc/UpgradePayloadFromUberToApache.java index 8c04f673b91c3..6793d94eee293 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/adhoc/UpgradePayloadFromUberToApache.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/adhoc/UpgradePayloadFromUberToApache.java @@ -18,8 +18,19 @@ package org.apache.hudi.utilities.adhoc; +import org.apache.hudi.common.model.HoodieTableType; +import org.apache.hudi.common.table.HoodieTableConfig; +import org.apache.hudi.common.table.HoodieTableMetaClient; +import org.apache.hudi.common.util.FSUtils; +import org.apache.hudi.config.HoodieCompactionConfig; + import com.beust.jcommander.JCommander; import com.beust.jcommander.Parameter; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.log4j.LogManager; +import org.apache.log4j.Logger; + import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; @@ -27,15 +38,6 @@ import java.util.HashMap; import java.util.Map; import java.util.Properties; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.hudi.common.model.HoodieTableType; -import org.apache.hudi.common.table.HoodieTableConfig; -import org.apache.hudi.common.table.HoodieTableMetaClient; -import org.apache.hudi.common.util.FSUtils; -import org.apache.hudi.config.HoodieCompactionConfig; -import org.apache.log4j.LogManager; -import org.apache.log4j.Logger; /** * This is an one-time use class meant for migrating the configuration for "hoodie.compaction.payload.class" in diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/AbstractDeltaStreamerService.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/AbstractDeltaStreamerService.java index ceb745a70174c..bd4d8a269ffcb 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/AbstractDeltaStreamerService.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/AbstractDeltaStreamerService.java @@ -18,6 +18,11 @@ package org.apache.hudi.utilities.deltastreamer; +import org.apache.hudi.common.util.collection.Pair; + +import org.apache.log4j.LogManager; +import org.apache.log4j.Logger; + import java.io.Serializable; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutionException; @@ -25,9 +30,6 @@ import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; import java.util.function.Function; -import org.apache.hudi.common.util.collection.Pair; -import org.apache.log4j.LogManager; -import org.apache.log4j.Logger; /** * Base Class for running delta-sync/compaction in separate thread and controlling their life-cyle diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/Compactor.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/Compactor.java index a72b68a45479d..3285ba70136c7 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/Compactor.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/Compactor.java @@ -18,18 +18,20 @@ package org.apache.hudi.utilities.deltastreamer; -import java.io.IOException; -import java.io.Serializable; import org.apache.hudi.HoodieWriteClient; import org.apache.hudi.WriteStatus; import org.apache.hudi.common.table.timeline.HoodieInstant; import org.apache.hudi.common.util.Option; import org.apache.hudi.exception.HoodieException; + import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; +import java.io.IOException; +import java.io.Serializable; + /** * Run one round of compaction */ diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/DeltaSync.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/DeltaSync.java index 73ef8a35efbc9..532ec697f7e63 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/DeltaSync.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/DeltaSync.java @@ -18,25 +18,6 @@ package org.apache.hudi.utilities.deltastreamer; -import static org.apache.hudi.utilities.schema.RowBasedSchemaProvider.HOODIE_RECORD_NAMESPACE; -import static org.apache.hudi.utilities.schema.RowBasedSchemaProvider.HOODIE_RECORD_STRUCT_NAME; - -import com.codahale.metrics.Timer; -import com.google.common.base.Preconditions; -import java.io.IOException; -import java.io.Serializable; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Objects; -import java.util.function.Function; -import java.util.stream.Collectors; -import org.apache.avro.Schema; -import org.apache.avro.generic.GenericRecord; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hudi.AvroConversionUtils; import org.apache.hudi.DataSourceUtils; import org.apache.hudi.HoodieWriteClient; @@ -66,6 +47,15 @@ import org.apache.hudi.utilities.schema.SchemaProvider; import org.apache.hudi.utilities.sources.InputBatch; import org.apache.hudi.utilities.transform.Transformer; + +import com.codahale.metrics.Timer; +import com.google.common.base.Preconditions; +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericRecord; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.apache.spark.api.java.JavaRDD; @@ -73,8 +63,22 @@ import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.SparkSession; + +import java.io.IOException; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Objects; +import java.util.function.Function; +import java.util.stream.Collectors; + import scala.collection.JavaConversions; +import static org.apache.hudi.utilities.schema.RowBasedSchemaProvider.HOODIE_RECORD_NAMESPACE; +import static org.apache.hudi.utilities.schema.RowBasedSchemaProvider.HOODIE_RECORD_STRUCT_NAME; + + /** * Sync's one batch of data to hoodie dataset */ diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamer.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamer.java index db8b40aebd781..9893f0d87740e 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamer.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamer.java @@ -18,30 +18,6 @@ package org.apache.hudi.utilities.deltastreamer; -import com.beust.jcommander.IStringConverter; -import com.beust.jcommander.JCommander; -import com.beust.jcommander.Parameter; -import com.beust.jcommander.ParameterException; -import com.google.common.base.Preconditions; -import java.io.IOException; -import java.io.Serializable; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.concurrent.BlockingQueue; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.LinkedBlockingQueue; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.locks.Condition; -import java.util.concurrent.locks.ReentrantLock; -import java.util.stream.Collectors; -import java.util.stream.IntStream; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hudi.HoodieWriteClient; import org.apache.hudi.OverwriteWithLatestAvroPayload; import org.apache.hudi.common.model.HoodieTableType; @@ -60,11 +36,37 @@ import org.apache.hudi.utilities.UtilHelpers; import org.apache.hudi.utilities.schema.SchemaProvider; import org.apache.hudi.utilities.sources.JsonDFSSource; + +import com.beust.jcommander.IStringConverter; +import com.beust.jcommander.JCommander; +import com.beust.jcommander.Parameter; +import com.beust.jcommander.ParameterException; +import com.google.common.base.Preconditions; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.SparkSession; +import java.io.IOException; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.locks.Condition; +import java.util.concurrent.locks.ReentrantLock; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + /** * An Utility which can incrementally take the output from {@link HiveIncrementalPuller} and apply it to the target * dataset. Does not maintain any state, queries at runtime to see how far behind the target dataset is from the source diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamerMetrics.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamerMetrics.java index 4132f5b876c9c..19f8e10ab5bc7 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamerMetrics.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamerMetrics.java @@ -18,10 +18,11 @@ package org.apache.hudi.utilities.deltastreamer; -import com.codahale.metrics.Timer; import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.metrics.Metrics; +import com.codahale.metrics.Timer; + public class HoodieDeltaStreamerMetrics { private HoodieWriteConfig config = null; diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/SchedulerConfGenerator.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/SchedulerConfGenerator.java index cb4477a203456..66d2c475a1ecc 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/SchedulerConfGenerator.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/SchedulerConfGenerator.java @@ -18,6 +18,13 @@ package org.apache.hudi.utilities.deltastreamer; +import org.apache.hudi.common.model.HoodieTableType; +import org.apache.hudi.common.util.Option; + +import org.apache.log4j.LogManager; +import org.apache.log4j.Logger; +import org.apache.spark.SparkConf; + import java.io.BufferedWriter; import java.io.File; import java.io.FileWriter; @@ -25,11 +32,6 @@ import java.util.HashMap; import java.util.Map; import java.util.UUID; -import org.apache.hudi.common.model.HoodieTableType; -import org.apache.hudi.common.util.Option; -import org.apache.log4j.LogManager; -import org.apache.log4j.Logger; -import org.apache.spark.SparkConf; /** * Utility Class to generate Spark Scheduling allocation file. This kicks in only when user sets diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/SourceFormatAdapter.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/SourceFormatAdapter.java index ab3b0708e31a2..b41efcde289f6 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/SourceFormatAdapter.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/SourceFormatAdapter.java @@ -18,11 +18,6 @@ package org.apache.hudi.utilities.deltastreamer; -import static org.apache.hudi.utilities.schema.RowBasedSchemaProvider.HOODIE_RECORD_NAMESPACE; -import static org.apache.hudi.utilities.schema.RowBasedSchemaProvider.HOODIE_RECORD_STRUCT_NAME; - -import org.apache.avro.Schema; -import org.apache.avro.generic.GenericRecord; import org.apache.hudi.AvroConversionUtils; import org.apache.hudi.common.util.Option; import org.apache.hudi.utilities.sources.AvroSource; @@ -32,11 +27,17 @@ import org.apache.hudi.utilities.sources.RowSource; import org.apache.hudi.utilities.sources.Source; import org.apache.hudi.utilities.sources.helpers.AvroConvertor; + +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericRecord; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.types.StructType; +import static org.apache.hudi.utilities.schema.RowBasedSchemaProvider.HOODIE_RECORD_NAMESPACE; +import static org.apache.hudi.utilities.schema.RowBasedSchemaProvider.HOODIE_RECORD_STRUCT_NAME; + /** * Adapts data-format provided by the source to the data-format required by the client (DeltaStreamer) */ diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/exception/HoodieIncrementalPullException.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/exception/HoodieIncrementalPullException.java index 58cad2c2c55d8..bab12b072b24d 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/exception/HoodieIncrementalPullException.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/exception/HoodieIncrementalPullException.java @@ -18,9 +18,10 @@ package org.apache.hudi.utilities.exception; -import java.sql.SQLException; import org.apache.hudi.exception.HoodieException; +import java.sql.SQLException; + public class HoodieIncrementalPullException extends HoodieException { public HoodieIncrementalPullException(String msg, SQLException e) { diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/keygen/TimestampBasedKeyGenerator.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/keygen/TimestampBasedKeyGenerator.java index 4f91f959f0690..04c6ecef23ac6 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/keygen/TimestampBasedKeyGenerator.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/keygen/TimestampBasedKeyGenerator.java @@ -18,14 +18,6 @@ package org.apache.hudi.utilities.keygen; -import java.io.Serializable; -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.Arrays; -import java.util.Collections; -import java.util.Date; -import java.util.TimeZone; -import org.apache.avro.generic.GenericRecord; import org.apache.hudi.DataSourceUtils; import org.apache.hudi.SimpleKeyGenerator; import org.apache.hudi.common.model.HoodieKey; @@ -34,6 +26,16 @@ import org.apache.hudi.exception.HoodieNotSupportedException; import org.apache.hudi.utilities.exception.HoodieDeltaStreamerException; +import org.apache.avro.generic.GenericRecord; + +import java.io.Serializable; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.Arrays; +import java.util.Collections; +import java.util.Date; +import java.util.TimeZone; + /** * Key generator, that relies on timestamps for partitioning field. Still picks record key by name. */ diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/perf/TimelineServerPerf.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/perf/TimelineServerPerf.java index ef1e986c754c0..a840ff4a2229d 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/perf/TimelineServerPerf.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/perf/TimelineServerPerf.java @@ -18,11 +18,30 @@ package org.apache.hudi.utilities.perf; +import org.apache.hudi.common.model.FileSlice; +import org.apache.hudi.common.table.HoodieTableMetaClient; +import org.apache.hudi.common.table.SyncableFileSystemView; +import org.apache.hudi.common.table.view.FileSystemViewStorageConfig; +import org.apache.hudi.common.table.view.FileSystemViewStorageType; +import org.apache.hudi.common.table.view.RemoteHoodieTableFileSystemView; +import org.apache.hudi.common.util.FSUtils; +import org.apache.hudi.common.util.Option; +import org.apache.hudi.timeline.service.TimelineService; +import org.apache.hudi.utilities.UtilHelpers; + import com.beust.jcommander.JCommander; import com.beust.jcommander.Parameter; import com.codahale.metrics.Histogram; import com.codahale.metrics.Snapshot; import com.codahale.metrics.UniformReservoir; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.log4j.LogManager; +import org.apache.log4j.Logger; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaSparkContext; + import java.io.IOException; import java.io.Serializable; import java.util.ArrayList; @@ -36,23 +55,6 @@ import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; import java.util.stream.IntStream; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hudi.common.model.FileSlice; -import org.apache.hudi.common.table.HoodieTableMetaClient; -import org.apache.hudi.common.table.SyncableFileSystemView; -import org.apache.hudi.common.table.view.FileSystemViewStorageConfig; -import org.apache.hudi.common.table.view.FileSystemViewStorageType; -import org.apache.hudi.common.table.view.RemoteHoodieTableFileSystemView; -import org.apache.hudi.common.util.FSUtils; -import org.apache.hudi.common.util.Option; -import org.apache.hudi.timeline.service.TimelineService; -import org.apache.hudi.utilities.UtilHelpers; -import org.apache.log4j.LogManager; -import org.apache.log4j.Logger; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; public class TimelineServerPerf implements Serializable { diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/FilebasedSchemaProvider.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/FilebasedSchemaProvider.java index 3eb3a44ecaea1..5776984c82efc 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/FilebasedSchemaProvider.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/FilebasedSchemaProvider.java @@ -18,17 +18,19 @@ package org.apache.hudi.utilities.schema; -import java.io.IOException; -import java.util.Collections; -import org.apache.avro.Schema; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; import org.apache.hudi.DataSourceUtils; import org.apache.hudi.common.util.FSUtils; import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.exception.HoodieIOException; + +import org.apache.avro.Schema; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.apache.spark.api.java.JavaSparkContext; +import java.io.IOException; +import java.util.Collections; + /** * A simple schema provider, that reads off files on DFS */ diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/NullTargetSchemaRegistryProvider.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/NullTargetSchemaRegistryProvider.java index d7415a05ee64b..7b3172c420ef9 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/NullTargetSchemaRegistryProvider.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/NullTargetSchemaRegistryProvider.java @@ -18,8 +18,9 @@ package org.apache.hudi.utilities.schema; -import org.apache.avro.Schema; import org.apache.hudi.common.util.TypedProperties; + +import org.apache.avro.Schema; import org.apache.spark.api.java.JavaSparkContext; /** diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/RowBasedSchemaProvider.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/RowBasedSchemaProvider.java index 3cec79c527f77..4b708fa01c2d8 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/RowBasedSchemaProvider.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/RowBasedSchemaProvider.java @@ -18,8 +18,9 @@ package org.apache.hudi.utilities.schema; -import org.apache.avro.Schema; import org.apache.hudi.AvroConversionUtils; + +import org.apache.avro.Schema; import org.apache.spark.sql.types.StructType; public class RowBasedSchemaProvider extends SchemaProvider { diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/SchemaProvider.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/SchemaProvider.java index c1f7c349d0564..8378383c5ef0e 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/SchemaProvider.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/SchemaProvider.java @@ -18,11 +18,13 @@ package org.apache.hudi.utilities.schema; -import java.io.Serializable; -import org.apache.avro.Schema; import org.apache.hudi.common.util.TypedProperties; + +import org.apache.avro.Schema; import org.apache.spark.api.java.JavaSparkContext; +import java.io.Serializable; + /** * Class to provide schema for reading data and also writing into a Hoodie table */ diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/SchemaRegistryProvider.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/SchemaRegistryProvider.java index 7e2ac1641ed1f..d03c6daac4373 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/SchemaRegistryProvider.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/SchemaRegistryProvider.java @@ -18,16 +18,18 @@ package org.apache.hudi.utilities.schema; +import org.apache.hudi.DataSourceUtils; +import org.apache.hudi.common.util.TypedProperties; +import org.apache.hudi.exception.HoodieIOException; + import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.avro.Schema; +import org.apache.spark.api.java.JavaSparkContext; + import java.io.IOException; import java.net.URL; import java.util.Collections; -import org.apache.avro.Schema; -import org.apache.hudi.DataSourceUtils; -import org.apache.hudi.common.util.TypedProperties; -import org.apache.hudi.exception.HoodieIOException; -import org.apache.spark.api.java.JavaSparkContext; /** * Obtains latest schema from the Confluent/Kafka schema-registry diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/AvroDFSSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/AvroDFSSource.java index b7f6f8c73274e..4cd8f075d9d8c 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/AvroDFSSource.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/AvroDFSSource.java @@ -18,15 +18,16 @@ package org.apache.hudi.utilities.sources; -import org.apache.avro.generic.GenericRecord; -import org.apache.avro.mapred.AvroKey; -import org.apache.avro.mapreduce.AvroKeyInputFormat; -import org.apache.hadoop.io.NullWritable; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.common.util.collection.Pair; import org.apache.hudi.utilities.schema.SchemaProvider; import org.apache.hudi.utilities.sources.helpers.DFSPathSelector; + +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.mapred.AvroKey; +import org.apache.avro.mapreduce.AvroKeyInputFormat; +import org.apache.hadoop.io.NullWritable; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/AvroKafkaSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/AvroKafkaSource.java index 2ce8b43717ac4..9588a814b6a85 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/AvroKafkaSource.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/AvroKafkaSource.java @@ -18,14 +18,15 @@ package org.apache.hudi.utilities.sources; -import io.confluent.kafka.serializers.KafkaAvroDecoder; -import kafka.serializer.StringDecoder; -import org.apache.avro.generic.GenericRecord; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.utilities.schema.SchemaProvider; import org.apache.hudi.utilities.sources.helpers.KafkaOffsetGen; import org.apache.hudi.utilities.sources.helpers.KafkaOffsetGen.CheckpointUtils; + +import io.confluent.kafka.serializers.KafkaAvroDecoder; +import kafka.serializer.StringDecoder; +import org.apache.avro.generic.GenericRecord; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.apache.spark.api.java.JavaRDD; diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/AvroSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/AvroSource.java index 3137cd6fca59b..24d787883ac26 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/AvroSource.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/AvroSource.java @@ -18,9 +18,10 @@ package org.apache.hudi.utilities.sources; -import org.apache.avro.generic.GenericRecord; import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.utilities.schema.SchemaProvider; + +import org.apache.avro.generic.GenericRecord; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.SparkSession; diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/HiveIncrPullSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/HiveIncrPullSource.java index b37a2191fcd22..4e4d603402d0c 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/HiveIncrPullSource.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/HiveIncrPullSource.java @@ -18,19 +18,6 @@ package org.apache.hudi.utilities.sources; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import java.util.stream.Collectors; -import org.apache.avro.generic.GenericRecord; -import org.apache.avro.mapred.AvroKey; -import org.apache.avro.mapreduce.AvroKeyInputFormat; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.NullWritable; import org.apache.hudi.DataSourceUtils; import org.apache.hudi.common.util.FSUtils; import org.apache.hudi.common.util.Option; @@ -38,6 +25,14 @@ import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.utilities.HiveIncrementalPuller; import org.apache.hudi.utilities.schema.SchemaProvider; + +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.mapred.AvroKey; +import org.apache.avro.mapreduce.AvroKeyInputFormat; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.NullWritable; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.apache.spark.api.java.JavaPairRDD; @@ -45,6 +40,13 @@ import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.SparkSession; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.stream.Collectors; + /** * Source to read deltas produced by {@link HiveIncrementalPuller}, commit by commit and apply to the target table *

diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/HoodieIncrSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/HoodieIncrSource.java index f5ed4e31e110d..3edb296911705 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/HoodieIncrSource.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/HoodieIncrSource.java @@ -18,7 +18,6 @@ package org.apache.hudi.utilities.sources; -import java.util.Arrays; import org.apache.hudi.DataSourceReadOptions; import org.apache.hudi.DataSourceUtils; import org.apache.hudi.common.model.HoodieRecord; @@ -28,12 +27,15 @@ import org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor; import org.apache.hudi.utilities.schema.SchemaProvider; import org.apache.hudi.utilities.sources.helpers.IncrSourceHelper; + import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.DataFrameReader; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.SparkSession; +import java.util.Arrays; + public class HoodieIncrSource extends RowSource { protected static class Config { diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/JsonDFSSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/JsonDFSSource.java index 6be3a54ba5889..ed9b82ae295fd 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/JsonDFSSource.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/JsonDFSSource.java @@ -23,6 +23,7 @@ import org.apache.hudi.common.util.collection.Pair; import org.apache.hudi.utilities.schema.SchemaProvider; import org.apache.hudi.utilities.sources.helpers.DFSPathSelector; + import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.SparkSession; diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/JsonKafkaSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/JsonKafkaSource.java index 8e95a8cec3ffa..ba68ac92dae2e 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/JsonKafkaSource.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/JsonKafkaSource.java @@ -18,12 +18,13 @@ package org.apache.hudi.utilities.sources; -import kafka.serializer.StringDecoder; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.utilities.schema.SchemaProvider; import org.apache.hudi.utilities.sources.helpers.KafkaOffsetGen; import org.apache.hudi.utilities.sources.helpers.KafkaOffsetGen.CheckpointUtils; + +import kafka.serializer.StringDecoder; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.apache.spark.api.java.JavaRDD; diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/JsonSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/JsonSource.java index 5c9db183ddeb4..fb569e904d716 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/JsonSource.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/JsonSource.java @@ -20,6 +20,7 @@ import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.utilities.schema.SchemaProvider; + import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.SparkSession; diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/ParquetDFSSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/ParquetDFSSource.java index 22ac3f99c7dc0..3695fcee0620b 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/ParquetDFSSource.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/ParquetDFSSource.java @@ -18,12 +18,13 @@ package org.apache.hudi.utilities.sources; -import org.apache.avro.generic.GenericRecord; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.common.util.collection.Pair; import org.apache.hudi.utilities.schema.SchemaProvider; import org.apache.hudi.utilities.sources.helpers.DFSPathSelector; + +import org.apache.avro.generic.GenericRecord; import org.apache.parquet.avro.AvroParquetInputFormat; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/ParquetSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/ParquetSource.java index edcc6883ab350..58fe5ad953043 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/ParquetSource.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/ParquetSource.java @@ -18,9 +18,10 @@ package org.apache.hudi.utilities.sources; -import org.apache.avro.generic.GenericRecord; import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.utilities.schema.SchemaProvider; + +import org.apache.avro.generic.GenericRecord; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.SparkSession; diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/RowSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/RowSource.java index 467c66710b284..9e289f102ae7a 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/RowSource.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/RowSource.java @@ -23,6 +23,7 @@ import org.apache.hudi.common.util.collection.Pair; import org.apache.hudi.utilities.schema.RowBasedSchemaProvider; import org.apache.hudi.utilities.schema.SchemaProvider; + import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/Source.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/Source.java index 0ed1e6c838317..48e3bd74136eb 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/Source.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/Source.java @@ -18,15 +18,17 @@ package org.apache.hudi.utilities.sources; -import java.io.Serializable; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.utilities.schema.SchemaProvider; + import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.SparkSession; +import java.io.Serializable; + /** * Represents a source from which we can tail data. Assumes a constructor that takes properties. */ diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/AvroConvertor.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/AvroConvertor.java index d5c46212a1b39..97bb9372bb6ec 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/AvroConvertor.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/AvroConvertor.java @@ -18,13 +18,15 @@ package org.apache.hudi.utilities.sources.helpers; +import org.apache.hudi.avro.MercifulJsonConverter; + import com.twitter.bijection.Injection; import com.twitter.bijection.avro.GenericAvroCodecs; -import java.io.IOException; -import java.io.Serializable; import org.apache.avro.Schema; import org.apache.avro.generic.GenericRecord; -import org.apache.hudi.avro.MercifulJsonConverter; + +import java.io.IOException; +import java.io.Serializable; /** * Convert a variety of datum into Avro GenericRecords. Has a bunch of lazy fields to circumvent issues around diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/DFSPathSelector.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/DFSPathSelector.java index ce979d654d114..2bc0466c29ce2 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/DFSPathSelector.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/DFSPathSelector.java @@ -18,18 +18,6 @@ package org.apache.hudi.utilities.sources.helpers; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Comparator; -import java.util.List; -import java.util.stream.Collectors; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.LocatedFileStatus; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.RemoteIterator; import org.apache.hudi.DataSourceUtils; import org.apache.hudi.common.util.FSUtils; import org.apache.hudi.common.util.Option; @@ -38,6 +26,20 @@ import org.apache.hudi.common.util.collection.Pair; import org.apache.hudi.exception.HoodieIOException; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocatedFileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RemoteIterator; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Comparator; +import java.util.List; +import java.util.stream.Collectors; + public class DFSPathSelector { /** diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/IncrSourceHelper.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/IncrSourceHelper.java index 62d35cd7e36a5..c6430eafffb66 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/IncrSourceHelper.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/IncrSourceHelper.java @@ -18,12 +18,13 @@ package org.apache.hudi.utilities.sources.helpers; -import com.google.common.base.Preconditions; import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.HoodieTimeline; import org.apache.hudi.common.table.timeline.HoodieInstant; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.collection.Pair; + +import com.google.common.base.Preconditions; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.Row; diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/KafkaOffsetGen.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/KafkaOffsetGen.java index 873e7930e9551..6bde10f5ae328 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/KafkaOffsetGen.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/KafkaOffsetGen.java @@ -18,23 +18,26 @@ package org.apache.hudi.utilities.sources.helpers; -import java.util.Arrays; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.HashSet; -import java.util.stream.Collectors; -import kafka.common.TopicAndPartition; import org.apache.hudi.DataSourceUtils; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.exception.HoodieNotSupportedException; import org.apache.hudi.utilities.exception.HoodieDeltaStreamerException; + +import kafka.common.TopicAndPartition; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.apache.spark.streaming.kafka.KafkaCluster; import org.apache.spark.streaming.kafka.KafkaCluster.LeaderOffset; import org.apache.spark.streaming.kafka.OffsetRange; + +import java.util.Arrays; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.stream.Collectors; + import scala.Predef; import scala.collection.JavaConverters; import scala.collection.immutable.Map; diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/FlatteningTransformer.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/FlatteningTransformer.java index 858fbd8e0ff23..7c41e8dc1ce09 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/FlatteningTransformer.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/FlatteningTransformer.java @@ -18,8 +18,8 @@ package org.apache.hudi.utilities.transform; -import java.util.UUID; import org.apache.hudi.common.util.TypedProperties; + import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.apache.spark.api.java.JavaSparkContext; @@ -29,6 +29,8 @@ import org.apache.spark.sql.types.StructField; import org.apache.spark.sql.types.StructType; +import java.util.UUID; + /** * Transformer that can flatten nested objects. It currently doesn't unnest arrays. */ diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/IdentityTransformer.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/IdentityTransformer.java index ffc0fadc1e869..f74291f8d15e6 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/IdentityTransformer.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/IdentityTransformer.java @@ -19,6 +19,7 @@ package org.apache.hudi.utilities.transform; import org.apache.hudi.common.util.TypedProperties; + import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/SqlQueryBasedTransformer.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/SqlQueryBasedTransformer.java index b5e2e37332083..ff563a13d2b7a 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/SqlQueryBasedTransformer.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/SqlQueryBasedTransformer.java @@ -18,8 +18,8 @@ package org.apache.hudi.utilities.transform; -import java.util.UUID; import org.apache.hudi.common.util.TypedProperties; + import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.apache.spark.api.java.JavaSparkContext; @@ -27,6 +27,8 @@ import org.apache.spark.sql.Row; import org.apache.spark.sql.SparkSession; +import java.util.UUID; + /** * A transformer that allows a sql-query template be used to transform the source before writing to Hudi data-set. * diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/Transformer.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/Transformer.java index 7433f6fa990a1..f97b3024baae8 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/Transformer.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/Transformer.java @@ -19,6 +19,7 @@ package org.apache.hudi.utilities.transform; import org.apache.hudi.common.util.TypedProperties; + import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestFlatteningTransformer.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestFlatteningTransformer.java index 3e7d217ce3351..d119102ca10b7 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestFlatteningTransformer.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestFlatteningTransformer.java @@ -18,15 +18,16 @@ package org.apache.hudi.utilities; -import static org.junit.Assert.assertEquals; - import org.apache.hudi.utilities.transform.FlatteningTransformer; + import org.apache.spark.sql.types.DataTypes; import org.apache.spark.sql.types.Metadata; import org.apache.spark.sql.types.StructField; import org.apache.spark.sql.types.StructType; import org.junit.Test; +import static org.junit.Assert.assertEquals; + public class TestFlatteningTransformer { @Test diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHDFSParquetImporter.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHDFSParquetImporter.java index 045042fdf76ad..6ed937c32400f 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHDFSParquetImporter.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHDFSParquetImporter.java @@ -18,19 +18,14 @@ package org.apache.hudi.utilities; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import org.apache.hudi.HoodieReadClient; +import org.apache.hudi.HoodieWriteClient; +import org.apache.hudi.common.HoodieTestDataGenerator; +import org.apache.hudi.common.minicluster.HdfsTestService; +import org.apache.hudi.common.model.HoodieTestUtils; +import org.apache.hudi.common.table.HoodieTimeline; +import org.apache.hudi.common.table.timeline.HoodieActiveTimeline; -import java.io.IOException; -import java.io.Serializable; -import java.text.ParseException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicInteger; import org.apache.avro.generic.GenericRecord; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.LocatedFileStatus; @@ -38,13 +33,6 @@ import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.MiniDFSCluster; -import org.apache.hudi.HoodieReadClient; -import org.apache.hudi.HoodieWriteClient; -import org.apache.hudi.common.HoodieTestDataGenerator; -import org.apache.hudi.common.minicluster.HdfsTestService; -import org.apache.hudi.common.model.HoodieTestUtils; -import org.apache.hudi.common.table.HoodieTimeline; -import org.apache.hudi.common.table.timeline.HoodieActiveTimeline; import org.apache.parquet.avro.AvroParquetWriter; import org.apache.parquet.hadoop.ParquetWriter; import org.apache.spark.SparkConf; @@ -54,6 +42,20 @@ import org.junit.BeforeClass; import org.junit.Test; +import java.io.IOException; +import java.io.Serializable; +import java.text.ParseException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + public class TestHDFSParquetImporter implements Serializable { private static String dfsBasePath; diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHoodieDeltaStreamer.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHoodieDeltaStreamer.java index 5cdb532dca8fe..d2be9133c557a 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHoodieDeltaStreamer.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHoodieDeltaStreamer.java @@ -18,23 +18,6 @@ package org.apache.hudi.utilities; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Random; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; -import java.util.concurrent.TimeUnit; -import java.util.function.Function; -import java.util.stream.Collectors; -import org.apache.avro.Schema; -import org.apache.avro.generic.GenericRecord; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; import org.apache.hudi.DataSourceWriteOptions; import org.apache.hudi.SimpleKeyGenerator; import org.apache.hudi.common.model.HoodieCommitMetadata; @@ -62,6 +45,11 @@ import org.apache.hudi.utilities.sources.config.TestSourceConfig; import org.apache.hudi.utilities.transform.SqlQueryBasedTransformer; import org.apache.hudi.utilities.transform.Transformer; + +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericRecord; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.apache.spark.api.java.JavaRDD; @@ -80,6 +68,20 @@ import org.junit.BeforeClass; import org.junit.Test; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Random; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.function.Function; +import java.util.stream.Collectors; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + /** * Basic tests against {@link HoodieDeltaStreamer}, by issuing bulk_inserts, upserts, inserts. Check counts at the end. */ diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHoodieSnapshotCopier.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHoodieSnapshotCopier.java index f64c38bff70a1..349b5aab64f93 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHoodieSnapshotCopier.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHoodieSnapshotCopier.java @@ -18,25 +18,27 @@ package org.apache.hudi.utilities; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; - -import java.io.File; -import java.io.IOException; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; import org.apache.hudi.common.HoodieCommonTestHarness; import org.apache.hudi.common.HoodieTestDataGenerator; import org.apache.hudi.common.model.HoodieTestUtils; import org.apache.hudi.common.util.FSUtils; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaSparkContext; import org.junit.After; import org.junit.Before; import org.junit.Test; +import java.io.File; +import java.io.IOException; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + public class TestHoodieSnapshotCopier extends HoodieCommonTestHarness { private static String TEST_WRITE_TOKEN = "1-0-1"; diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestSchedulerConfGenerator.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestSchedulerConfGenerator.java index 3f83752800819..af1f566c2982b 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestSchedulerConfGenerator.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestSchedulerConfGenerator.java @@ -18,15 +18,17 @@ package org.apache.hudi.utilities; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNull; - -import java.util.Map; import org.apache.hudi.common.model.HoodieTableType; import org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer; import org.apache.hudi.utilities.deltastreamer.SchedulerConfGenerator; + import org.junit.Test; +import java.util.Map; + +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; + public class TestSchedulerConfGenerator { @Test diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/UtilitiesTestBase.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/UtilitiesTestBase.java index 46b0dab9000c7..cfe679850b633 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/UtilitiesTestBase.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/UtilitiesTestBase.java @@ -18,21 +18,6 @@ package org.apache.hudi.utilities; -import com.google.common.collect.ImmutableList; -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStreamReader; -import java.io.PrintStream; -import java.util.ArrayList; -import java.util.List; -import org.apache.avro.generic.GenericRecord; -import org.apache.avro.generic.IndexedRecord; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hdfs.DistributedFileSystem; -import org.apache.hadoop.hdfs.MiniDFSCluster; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hive.service.server.HiveServer2; import org.apache.hudi.common.HoodieTestDataGenerator; import org.apache.hudi.common.TestRawTripPayload; import org.apache.hudi.common.minicluster.HdfsTestService; @@ -46,6 +31,16 @@ import org.apache.hudi.hive.HoodieHiveClient; import org.apache.hudi.hive.util.HiveTestService; import org.apache.hudi.utilities.sources.TestDataSource; + +import com.google.common.collect.ImmutableList; +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.generic.IndexedRecord; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.DistributedFileSystem; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hive.service.server.HiveServer2; import org.apache.parquet.avro.AvroParquetWriter; import org.apache.parquet.hadoop.ParquetWriter; import org.apache.spark.api.java.JavaSparkContext; @@ -56,6 +51,13 @@ import org.junit.Before; import org.junit.BeforeClass; +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.List; + /** * Abstract test that provides a dfs & spark contexts. * diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/AbstractBaseTestSource.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/AbstractBaseTestSource.java index 58a7d580ff7e9..3dc6b331a611f 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/AbstractBaseTestSource.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/AbstractBaseTestSource.java @@ -18,13 +18,6 @@ package org.apache.hudi.utilities.sources; -import java.io.File; -import java.io.IOException; -import java.util.HashMap; -import java.util.Map; -import java.util.stream.Stream; -import org.apache.avro.generic.GenericRecord; -import org.apache.avro.generic.IndexedRecord; import org.apache.hudi.common.HoodieTestDataGenerator; import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.util.Option; @@ -33,9 +26,18 @@ import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.utilities.schema.SchemaProvider; import org.apache.hudi.utilities.sources.config.TestSourceConfig; + +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.generic.IndexedRecord; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.SparkSession; +import java.io.File; +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.stream.Stream; + public abstract class AbstractBaseTestSource extends AvroSource { static final int DEFAULT_PARTITION_NUM = 0; diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/DistributedTestDataSource.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/DistributedTestDataSource.java index 8e877678f4c2c..9e8b3c4e8d28c 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/DistributedTestDataSource.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/DistributedTestDataSource.java @@ -18,18 +18,20 @@ package org.apache.hudi.utilities.sources; -import java.util.Iterator; -import java.util.stream.Collectors; -import java.util.stream.IntStream; -import org.apache.avro.generic.GenericRecord; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.utilities.schema.SchemaProvider; import org.apache.hudi.utilities.sources.config.TestSourceConfig; + +import org.apache.avro.generic.GenericRecord; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.SparkSession; +import java.util.Iterator; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + /** * A Test DataSource which scales test-data generation by using spark parallelism. */ diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestDFSSource.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestDFSSource.java index 4d4fafbc9c566..f8b4869a4c392 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestDFSSource.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestDFSSource.java @@ -18,16 +18,6 @@ package org.apache.hudi.utilities.sources; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; - -import java.io.IOException; -import java.util.List; -import org.apache.avro.generic.GenericRecord; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.LocatedFileStatus; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.RemoteIterator; import org.apache.hudi.AvroConversionUtils; import org.apache.hudi.common.HoodieTestDataGenerator; import org.apache.hudi.common.util.Option; @@ -35,6 +25,12 @@ import org.apache.hudi.utilities.UtilitiesTestBase; import org.apache.hudi.utilities.deltastreamer.SourceFormatAdapter; import org.apache.hudi.utilities.schema.FilebasedSchemaProvider; + +import org.apache.avro.generic.GenericRecord; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.LocatedFileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RemoteIterator; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; @@ -45,6 +41,12 @@ import org.junit.BeforeClass; import org.junit.Test; +import java.io.IOException; +import java.util.List; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + /** * Basic tests against all subclasses of {@link JsonDFSSource} and {@link ParquetDFSSource} */ diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestDataSource.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestDataSource.java index 1ba75eab0c261..c6130a6214c7b 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestDataSource.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestDataSource.java @@ -18,18 +18,20 @@ package org.apache.hudi.utilities.sources; -import java.util.List; -import java.util.stream.Collectors; -import org.apache.avro.generic.GenericRecord; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.utilities.schema.SchemaProvider; + +import org.apache.avro.generic.GenericRecord; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.SparkSession; +import java.util.List; +import java.util.stream.Collectors; + /** * An implementation of {@link Source}, that emits test upserts. */ diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestKafkaSource.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestKafkaSource.java index c1ca1f0a62f3b..f2aa79465ef7d 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestKafkaSource.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestKafkaSource.java @@ -18,12 +18,6 @@ package org.apache.hudi.utilities.sources; -import static org.junit.Assert.assertEquals; - -import java.io.IOException; -import java.util.HashMap; -import kafka.common.TopicAndPartition; -import org.apache.avro.generic.GenericRecord; import org.apache.hudi.AvroConversionUtils; import org.apache.hudi.common.HoodieTestDataGenerator; import org.apache.hudi.common.util.Option; @@ -33,6 +27,9 @@ import org.apache.hudi.utilities.schema.FilebasedSchemaProvider; import org.apache.hudi.utilities.sources.helpers.KafkaOffsetGen.CheckpointUtils; import org.apache.hudi.utilities.sources.helpers.KafkaOffsetGen.Config; + +import kafka.common.TopicAndPartition; +import org.apache.avro.generic.GenericRecord; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; @@ -45,6 +42,11 @@ import org.junit.BeforeClass; import org.junit.Test; +import java.io.IOException; +import java.util.HashMap; + +import static org.junit.Assert.assertEquals; + /** * Tests against {@link AvroKafkaSource} */