From d4c516d494d5152ee990ef992a251253d17aeed5 Mon Sep 17 00:00:00 2001 From: Sagar Sumit Date: Wed, 17 Nov 2021 20:57:20 +0530 Subject: [PATCH 01/11] [HUDI-2783] Upgrade HBase Fix some unit tests Resolve dependency issue Upgrade Hadoop to 2.10.1 and fix HFile inline reader test Separate hbase shaded version for presto bundle Resolve hbase dep conflicts in flink, utilities and hadoop-mr bundles --- .../io/storage/HoodieFileWriterFactory.java | 4 +- .../hudi/io/storage/HoodieHFileConfig.java | 14 ++-- .../hudi/io/storage/HoodieHFileWriter.java | 30 ++++---- hudi-client/hudi-spark-client/pom.xml | 6 ++ .../hudi/testutils/HoodieClientTestUtils.java | 8 ++- hudi-common/pom.xml | 1 - .../bootstrap/index/HFileBootstrapIndex.java | 17 +++-- .../log/AbstractHoodieLogRecordReader.java | 4 ++ .../table/log/block/HoodieHFileDataBlock.java | 1 + .../io/storage/HoodieHBaseKVComparator.java | 4 +- .../hudi/io/storage/HoodieHFileReader.java | 67 +++++++++--------- .../TestInLineFileSystemHFileInLining.java | 68 +++++++++---------- packaging/hudi-flink-bundle/pom.xml | 28 +++++++- packaging/hudi-hadoop-mr-bundle/pom.xml | 46 ++++++++++++- packaging/hudi-spark-bundle/pom.xml | 32 ++++++++- packaging/hudi-utilities-bundle/pom.xml | 38 ++++++++++- pom.xml | 9 ++- 17 files changed, 263 insertions(+), 114 deletions(-) diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriterFactory.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriterFactory.java index 38db1cde41226..f5e6514c5f34f 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriterFactory.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriterFactory.java @@ -85,12 +85,12 @@ private static HoodieFi TaskContextSupplier taskContextSupplier) throws IOException { BloomFilter filter = createBloomFilter(config); - HoodieHFileConfig hfileConfig = new HoodieHFileConfig(hoodieTable.getHadoopConf(), + HoodieHFileConfig hFileConfig = new HoodieHFileConfig(hoodieTable.getHadoopConf(), config.getHFileCompressionAlgorithm(), config.getHFileBlockSize(), config.getHFileMaxFileSize(), HoodieHFileReader.KEY_FIELD_NAME, PREFETCH_ON_OPEN, CACHE_DATA_IN_L1, DROP_BEHIND_CACHE_COMPACTION, filter, HFILE_COMPARATOR); - return new HoodieHFileWriter<>(instantTime, path, hfileConfig, schema, taskContextSupplier, config.populateMetaFields()); + return new HoodieHFileWriter<>(instantTime, path, hFileConfig, schema, taskContextSupplier, config.populateMetaFields()); } private static HoodieFileWriter newOrcFileWriter( diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileConfig.java index 1079566b782f1..255921cf7dd67 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileConfig.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileConfig.java @@ -21,14 +21,14 @@ import org.apache.hudi.common.bloom.BloomFilter; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.CellComparator; import org.apache.hadoop.hbase.HColumnDescriptor; -import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.io.compress.Compression; import org.apache.hadoop.hbase.io.hfile.CacheConfig; public class HoodieHFileConfig { - public static final KeyValue.KVComparator HFILE_COMPARATOR = new HoodieHBaseKVComparator(); + public static final CellComparator HFILE_COMPARATOR = new HoodieHBaseKVComparator(); public static final boolean PREFETCH_ON_OPEN = CacheConfig.DEFAULT_PREFETCH_ON_OPEN; public static final boolean CACHE_DATA_IN_L1 = HColumnDescriptor.DEFAULT_CACHE_DATA_IN_L1; // This is private in CacheConfig so have been copied here. @@ -42,12 +42,12 @@ public class HoodieHFileConfig { private final boolean dropBehindCacheCompaction; private final Configuration hadoopConf; private final BloomFilter bloomFilter; - private final KeyValue.KVComparator hfileComparator; + private final CellComparator hFileComparator; private final String keyFieldName; public HoodieHFileConfig(Configuration hadoopConf, Compression.Algorithm compressionAlgorithm, int blockSize, long maxFileSize, String keyFieldName, boolean prefetchBlocksOnOpen, boolean cacheDataInL1, - boolean dropBehindCacheCompaction, BloomFilter bloomFilter, KeyValue.KVComparator hfileComparator) { + boolean dropBehindCacheCompaction, BloomFilter bloomFilter, CellComparator hFileComparator) { this.hadoopConf = hadoopConf; this.compressionAlgorithm = compressionAlgorithm; this.blockSize = blockSize; @@ -56,7 +56,7 @@ public HoodieHFileConfig(Configuration hadoopConf, Compression.Algorithm compres this.cacheDataInL1 = cacheDataInL1; this.dropBehindCacheCompaction = dropBehindCacheCompaction; this.bloomFilter = bloomFilter; - this.hfileComparator = hfileComparator; + this.hFileComparator = hFileComparator; this.keyFieldName = keyFieldName; } @@ -96,8 +96,8 @@ public BloomFilter getBloomFilter() { return bloomFilter; } - public KeyValue.KVComparator getHfileComparator() { - return hfileComparator; + public CellComparator getHFileComparator() { + return hFileComparator; } public String getKeyFieldName() { diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileWriter.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileWriter.java index 2ad6d7f9220b0..409e84e06153d 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileWriter.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileWriter.java @@ -59,7 +59,7 @@ public class HoodieHFileWriter + + org.apache.zookeeper + zookeeper + ${zookeeper.version} + test + diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java index 05d7f99446e94..8a3abfd6e1cbf 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java @@ -66,6 +66,8 @@ import java.util.stream.Collectors; import java.util.stream.Stream; +import static org.apache.hudi.io.storage.HoodieHFileReader.KEY_SCHEMA; + /** * Utility methods to aid testing inside the HoodieClient module. */ @@ -241,9 +243,9 @@ public static Stream readHFile(JavaSparkContext jsc, String[] pat Schema schema = null; for (String path : paths) { try { - HFile.Reader reader = HFile.createReader(fs, new Path(path), cacheConfig, fs.getConf()); + HFile.Reader reader = HFile.createReader(fs, new Path(path), cacheConfig, true, fs.getConf()); if (schema == null) { - schema = new Schema.Parser().parse(new String(reader.loadFileInfo().get("schema".getBytes()))); + schema = new Schema.Parser().parse(new String(reader.getHFileInfo().get(KEY_SCHEMA.getBytes()))); } HFileScanner scanner = reader.getScanner(false, false); if (!scanner.seekTo()) { @@ -252,7 +254,7 @@ public static Stream readHFile(JavaSparkContext jsc, String[] pat } do { - Cell c = scanner.getKeyValue(); + Cell c = scanner.getCell(); byte[] value = Arrays.copyOfRange(c.getValueArray(), c.getValueOffset(), c.getValueOffset() + c.getValueLength()); valuesAsList.add(HoodieAvroUtils.bytesToAvro(value, schema)); } while (scanner.next()); diff --git a/hudi-common/pom.xml b/hudi-common/pom.xml index 1a558aeae3326..b00b82f86c172 100644 --- a/hudi-common/pom.xml +++ b/hudi-common/pom.xml @@ -221,7 +221,6 @@ org.apache.hbase hbase-client ${hbase.version} - test diff --git a/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/HFileBootstrapIndex.java b/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/HFileBootstrapIndex.java index 3700d01a60ea6..7f36a47a4d24c 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/HFileBootstrapIndex.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/HFileBootstrapIndex.java @@ -37,6 +37,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.CellComparatorImpl; import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.KeyValue; @@ -178,9 +179,7 @@ private static String getUserKeyFromCellKey(String cellKey) { private static HFile.Reader createReader(String hFilePath, Configuration conf, FileSystem fileSystem) { try { LOG.info("Opening HFile for reading :" + hFilePath); - HFile.Reader reader = HFile.createReader(fileSystem, new HFilePathForReader(hFilePath), - new CacheConfig(conf), conf); - return reader; + return HFile.createReader(fileSystem, new HFilePathForReader(hFilePath), new CacheConfig(conf), true, conf); } catch (IOException ioe) { throw new HoodieIOException(ioe.getMessage(), ioe); } @@ -259,7 +258,7 @@ private void initIndexInfo() { private HoodieBootstrapIndexInfo fetchBootstrapIndexInfo() throws IOException { return TimelineMetadataUtils.deserializeAvroMetadata( - partitionIndexReader().loadFileInfo().get(INDEX_INFO_KEY), + partitionIndexReader().getHFileInfo().get(INDEX_INFO_KEY), HoodieBootstrapIndexInfo.class); } @@ -306,7 +305,7 @@ private List getAllKeys(HFileScanner scanner, Function convert try { boolean available = scanner.seekTo(); while (available) { - keys.add(converter.apply(getUserKeyFromCellKey(CellUtil.getCellKeyAsString(scanner.getKeyValue())))); + keys.add(converter.apply(getUserKeyFromCellKey(CellUtil.getCellKeyAsString(scanner.getCell())))); available = scanner.next(); } } catch (IOException ioe) { @@ -528,13 +527,13 @@ public void close() { @Override public void begin() { try { - HFileContext meta = new HFileContextBuilder().build(); + HFileContext meta = new HFileContextBuilder().withCellComparator(new HoodieKVComparator()).build(); this.indexByPartitionWriter = HFile.getWriterFactory(metaClient.getHadoopConf(), new CacheConfig(metaClient.getHadoopConf())).withPath(metaClient.getFs(), indexByPartitionPath) - .withFileContext(meta).withComparator(new HoodieKVComparator()).create(); + .withFileContext(meta).create(); this.indexByFileIdWriter = HFile.getWriterFactory(metaClient.getHadoopConf(), new CacheConfig(metaClient.getHadoopConf())).withPath(metaClient.getFs(), indexByFileIdPath) - .withFileContext(meta).withComparator(new HoodieKVComparator()).create(); + .withFileContext(meta).create(); } catch (IOException ioe) { throw new HoodieIOException(ioe.getMessage(), ioe); } @@ -581,6 +580,6 @@ public String getName() { * This class is explicitly used as Key Comparator to workaround hard coded * legacy format class names inside HBase. Otherwise we will face issues with shading. */ - public static class HoodieKVComparator extends KeyValue.KVComparator { + public static class HoodieKVComparator extends CellComparatorImpl { } } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/AbstractHoodieLogRecordReader.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/AbstractHoodieLogRecordReader.java index 2b9761176521b..2524346307e14 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/AbstractHoodieLogRecordReader.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/AbstractHoodieLogRecordReader.java @@ -52,6 +52,7 @@ import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.Deque; import java.util.HashSet; import java.util.List; @@ -427,6 +428,9 @@ private void processQueuedBlocksForInstant(Deque logBlocks, int processDataBlock((HoodieAvroDataBlock) lastBlock, keys); break; case HFILE_DATA_BLOCK: + if (!keys.isPresent()) { + keys = Option.of(Collections.emptyList()); + } processDataBlock((HoodieHFileDataBlock) lastBlock, keys); break; case PARQUET_DATA_BLOCK: diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java index 8977134740f3c..d54d11689ab39 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java @@ -95,6 +95,7 @@ protected byte[] serializeRecords(List records) throws IOExceptio HFileContext context = new HFileContextBuilder() .withBlockSize(DEFAULT_BLOCK_SIZE) .withCompression(compressionAlgorithm.get()) + .withCellComparator(new HoodieHBaseKVComparator()) .build(); Configuration conf = new Configuration(); diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHBaseKVComparator.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHBaseKVComparator.java index 2d4d96959e150..aaf1dcd7037b7 100644 --- a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHBaseKVComparator.java +++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHBaseKVComparator.java @@ -19,11 +19,11 @@ package org.apache.hudi.io.storage; -import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.CellComparatorImpl; /** * This class is explicitly used as Key Comparator to work around the hard coded * legacy format class names inside HBase. Otherwise, we will face issues with shading. */ -public class HoodieHBaseKVComparator extends KeyValue.KVComparator { +public class HoodieHBaseKVComparator extends CellComparatorImpl { } diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java index 3404d2bd55b55..6b68e0ab911a9 100644 --- a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java +++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java @@ -19,14 +19,12 @@ package org.apache.hudi.io.storage; import java.io.IOException; -import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.Iterator; import java.util.LinkedList; import java.util.List; -import java.util.Map; import java.util.Set; import java.util.TreeSet; import java.util.stream.Collectors; @@ -44,12 +42,15 @@ import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper; import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.hbase.io.hfile.HFile; +import org.apache.hadoop.hbase.io.hfile.HFileInfo; import org.apache.hadoop.hbase.io.hfile.HFileScanner; +import org.apache.hadoop.hbase.io.hfile.ReaderContext; +import org.apache.hadoop.hbase.io.hfile.ReaderContextBuilder; +import org.apache.hadoop.hbase.nio.ByteBuff; import org.apache.hadoop.hbase.util.Pair; import org.apache.hudi.avro.HoodieAvroUtils; import org.apache.hudi.common.bloom.BloomFilter; import org.apache.hudi.common.bloom.BloomFilterFactory; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.ValidationUtils; import org.apache.hudi.common.util.io.ByteBufferBackedInputStream; @@ -58,6 +59,8 @@ import org.apache.log4j.LogManager; import org.apache.log4j.Logger; +import static org.apache.hudi.common.fs.FSUtils.getFs; + public class HoodieHFileReader implements HoodieFileReader { private static final Logger LOG = LogManager.getLogger(HoodieHFileReader.class); private Path path; @@ -79,14 +82,14 @@ public class HoodieHFileReader implements HoodieFileRea public HoodieHFileReader(Configuration configuration, Path path, CacheConfig cacheConfig) throws IOException { this.conf = configuration; this.path = path; - this.reader = HFile.createReader(FSUtils.getFs(path.toString(), configuration), path, cacheConfig, conf); + this.reader = HFile.createReader(getFs(path.toString(), configuration), path, cacheConfig, true, conf); } public HoodieHFileReader(Configuration configuration, Path path, CacheConfig cacheConfig, FileSystem fs) throws IOException { this.conf = configuration; this.path = path; this.fsDataInputStream = fs.open(path); - this.reader = HFile.createReader(fs, path, cacheConfig, configuration); + this.reader = HFile.createReader(fs, path, cacheConfig, true configuration); } public HoodieHFileReader(byte[] content) throws IOException { @@ -94,30 +97,32 @@ public HoodieHFileReader(byte[] content) throws IOException { Path path = new Path("hoodie"); SeekableByteArrayInputStream bis = new SeekableByteArrayInputStream(content); FSDataInputStream fsdis = new FSDataInputStream(bis); - this.reader = HFile.createReader(FSUtils.getFs("hoodie", conf), path, new FSDataInputStreamWrapper(fsdis), - content.length, new CacheConfig(conf), conf); + FSDataInputStreamWrapper stream = new FSDataInputStreamWrapper(fsdis); + ReaderContext context = new ReaderContextBuilder() + .withFilePath(path) + .withInputStreamWrapper(stream) + .withFileSize(getFs("hoodie", conf).getFileStatus(path).getLen()) + .withFileSystem(stream.getHfs()) + .withPrimaryReplicaReader(true) + .withReaderType(ReaderContext.ReaderType.STREAM) + .build(); + HFileInfo fileInfo = new HFileInfo(context, conf); + this.reader = HFile.createReader(context, fileInfo, new CacheConfig(conf), conf); + fileInfo.initMetaAndIndex(reader); } @Override public String[] readMinMaxRecordKeys() { - try { - Map fileInfo = reader.loadFileInfo(); - return new String[] { new String(fileInfo.get(KEY_MIN_RECORD.getBytes())), - new String(fileInfo.get(KEY_MAX_RECORD.getBytes()))}; - } catch (IOException e) { - throw new HoodieException("Could not read min/max record key out of file information block correctly from path", e); - } + HFileInfo fileInfo = reader.getHFileInfo(); + return new String[] { new String(fileInfo.get(KEY_MIN_RECORD.getBytes())), + new String(fileInfo.get(KEY_MAX_RECORD.getBytes()))}; } @Override public Schema getSchema() { if (schema == null) { - try { - Map fileInfo = reader.loadFileInfo(); - schema = new Schema.Parser().parse(new String(fileInfo.get(KEY_SCHEMA.getBytes()))); - } catch (IOException e) { - throw new HoodieException("Could not read schema of file from path", e); - } + HFileInfo fileInfo = reader.getHFileInfo(); + schema = new Schema.Parser().parse(new String(fileInfo.get(KEY_SCHEMA.getBytes()))); } return schema; @@ -125,10 +130,10 @@ public Schema getSchema() { @Override public BloomFilter readBloomFilter() { - Map fileInfo; + HFileInfo fileInfo; try { - fileInfo = reader.loadFileInfo(); - ByteBuffer serializedFilter = reader.getMetaBlock(KEY_BLOOM_FILTER_META_BLOCK, false); + fileInfo = reader.getHFileInfo(); + ByteBuff serializedFilter = reader.getMetaBlock(KEY_BLOOM_FILTER_META_BLOCK, false).getBufferReadOnly(); byte[] filterBytes = new byte[serializedFilter.remaining()]; serializedFilter.get(filterBytes); // read the bytes that were written return BloomFilterFactory.fromString(new String(filterBytes), @@ -191,7 +196,7 @@ public List> readAllRecords(Schema writerSchema, Schema readerSc final HFileScanner scanner = reader.getScanner(false, false); if (scanner.seekTo()) { do { - Cell c = scanner.getKeyValue(); + Cell c = scanner.getCell(); final Pair keyAndRecordPair = getRecordFromCell(c, writerSchema, readerSchema, keyFieldSchema); recordList.add(keyAndRecordPair); } while (scanner.next()); @@ -204,19 +209,19 @@ public List> readAllRecords(Schema writerSchema, Schema readerSc } public List> readAllRecords() throws IOException { - Schema schema = new Schema.Parser().parse(new String(reader.loadFileInfo().get(KEY_SCHEMA.getBytes()))); + Schema schema = new Schema.Parser().parse(new String(reader.getHFileInfo().get(KEY_SCHEMA.getBytes()))); return readAllRecords(schema, schema); } public List> readRecords(List keys) throws IOException { - reader.loadFileInfo(); - Schema schema = new Schema.Parser().parse(new String(reader.loadFileInfo().get(KEY_SCHEMA.getBytes()))); + reader.getHFileInfo(); + Schema schema = new Schema.Parser().parse(new String(reader.getHFileInfo().get(KEY_SCHEMA.getBytes()))); return readRecords(keys, schema); } public List> readRecords(List keys, Schema schema) throws IOException { this.schema = schema; - reader.loadFileInfo(); + reader.getHFileInfo(); List> records = new ArrayList<>(); for (String key: keys) { Option value = getRecordByKey(key, schema); @@ -243,7 +248,7 @@ public boolean hasNext() { // To handle when hasNext() is called multiple times for idempotency and/or the first time if (this.next == null && !this.eof) { if (!scanner.isSeeked() && scanner.seekTo()) { - final Pair keyAndRecordPair = getRecordFromCell(scanner.getKeyValue(), getSchema(), readerSchema, keyFieldSchema); + final Pair keyAndRecordPair = getRecordFromCell(scanner.getCell(), getSchema(), readerSchema, keyFieldSchema); this.next = keyAndRecordPair.getSecond(); } } @@ -264,7 +269,7 @@ public R next() { } R retVal = this.next; if (scanner.next()) { - final Pair keyAndRecordPair = getRecordFromCell(scanner.getKeyValue(), getSchema(), readerSchema, keyFieldSchema); + final Pair keyAndRecordPair = getRecordFromCell(scanner.getCell(), getSchema(), readerSchema, keyFieldSchema); this.next = keyAndRecordPair.getSecond(); } else { this.next = null; @@ -304,7 +309,7 @@ public Option getRecordByKey(String key, Schema readerSchema) throws IOException } if (keyScanner.seekTo(kv) == 0) { - Cell c = keyScanner.getKeyValue(); + Cell c = keyScanner.getCell(); // Extract the byte value before releasing the lock since we cannot hold on to the returned cell afterwards value = Arrays.copyOfRange(c.getValueArray(), c.getValueOffset(), c.getValueOffset() + c.getValueLength()); } diff --git a/hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystemHFileInLining.java b/hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystemHFileInLining.java index cc59b46024792..f09ecf76b2d88 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystemHFileInLining.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystemHFileInLining.java @@ -19,12 +19,12 @@ package org.apache.hudi.common.fs.inline; import org.apache.hudi.common.testutils.FileSystemTestUtils; -import org.apache.hudi.io.storage.HoodieHBaseKVComparator; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.io.hfile.CacheConfig; @@ -39,10 +39,12 @@ import java.io.File; import java.io.IOException; import java.nio.ByteBuffer; +import java.util.Arrays; import java.util.HashSet; import java.util.Set; import java.util.UUID; +import static org.apache.hadoop.hbase.CellComparatorImpl.COMPARATOR; import static org.apache.hudi.common.testutils.FileSystemTestUtils.FILE_SCHEME; import static org.apache.hudi.common.testutils.FileSystemTestUtils.RANDOM; import static org.apache.hudi.common.testutils.FileSystemTestUtils.getPhantomFile; @@ -56,11 +58,12 @@ */ public class TestInLineFileSystemHFileInLining { + private static final String LOCAL_FORMATTER = "%010d"; + private static final String VALUE_PREFIX = "value"; + private static final int MIN_BLOCK_SIZE = 1024; private final Configuration inMemoryConf; private final Configuration inlineConf; - private final int minBlockSize = 1024; - private static final String LOCAL_FORMATTER = "%010d"; - private int maxRows = 100 + RANDOM.nextInt(1000); + private final int maxRows = 100 + RANDOM.nextInt(1000); private Path generatedPath; public TestInLineFileSystemHFileInLining() { @@ -88,12 +91,11 @@ public void testSimpleInlineFileSystem() throws IOException { CacheConfig cacheConf = new CacheConfig(inMemoryConf); FSDataOutputStream fout = createFSOutput(outerInMemFSPath, inMemoryConf); HFileContext meta = new HFileContextBuilder() - .withBlockSize(minBlockSize) + .withBlockSize(MIN_BLOCK_SIZE).withCellComparator(COMPARATOR) .build(); HFile.Writer writer = HFile.getWriterFactory(inMemoryConf, cacheConf) .withOutputStream(fout) .withFileContext(meta) - .withComparator(new HoodieHBaseKVComparator()) .create(); writeRecords(writer); @@ -110,9 +112,9 @@ public void testSimpleInlineFileSystem() throws IOException { InLineFileSystem inlineFileSystem = (InLineFileSystem) inlinePath.getFileSystem(inlineConf); FSDataInputStream fin = inlineFileSystem.open(inlinePath); - HFile.Reader reader = HFile.createReader(inlineFileSystem, inlinePath, cacheConf, inlineConf); + HFile.Reader reader = HFile.createReader(inlineFileSystem, inlinePath, cacheConf, true, inlineConf); // Load up the index. - reader.loadFileInfo(); + reader.getHFileInfo(); // Get a scanner that caches and that does not use pread. HFileScanner scanner = reader.getScanner(true, false); // Align scanner at start of the file. @@ -121,21 +123,24 @@ public void testSimpleInlineFileSystem() throws IOException { Set rowIdsToSearch = getRandomValidRowIds(10); for (int rowId : rowIdsToSearch) { - assertEquals(0, scanner.seekTo(KeyValue.createKeyValueFromKey(getSomeKey(rowId))), + KeyValue keyValue = new KeyValue.KeyOnlyKeyValue(getSomeKey(rowId)); + assertEquals(0, scanner.seekTo(keyValue), "location lookup failed"); // read the key and see if it matches - ByteBuffer readKey = scanner.getKey(); - assertArrayEquals(getSomeKey(rowId), Bytes.toBytes(readKey), "seeked key does not match"); - scanner.seekTo(KeyValue.createKeyValueFromKey(getSomeKey(rowId))); + Cell cell = scanner.getCell(); + byte[] key = Arrays.copyOfRange(cell.getRowArray(), cell.getRowOffset(), cell.getRowOffset() + cell.getRowLength()); + assertArrayEquals(Arrays.copyOfRange(keyValue.getRowArray(), keyValue.getRowOffset(), keyValue.getRowOffset() + keyValue.getRowLength()), key, + "seeked key does not match"); + scanner.seekTo(keyValue); ByteBuffer val1 = scanner.getValue(); - scanner.seekTo(KeyValue.createKeyValueFromKey(getSomeKey(rowId))); + scanner.seekTo(keyValue); ByteBuffer val2 = scanner.getValue(); assertArrayEquals(Bytes.toBytes(val1), Bytes.toBytes(val2)); } int[] invalidRowIds = {-4, maxRows, maxRows + 1, maxRows + 120, maxRows + 160, maxRows + 1000}; for (int rowId : invalidRowIds) { - assertNotEquals(0, scanner.seekTo(KeyValue.createKeyValueFromKey(getSomeKey(rowId))), + assertNotEquals(0, scanner.seekTo(new KeyValue.KeyOnlyKeyValue(getSomeKey(rowId))), "location lookup should have failed"); } reader.close(); @@ -155,7 +160,7 @@ private Set getRandomValidRowIds(int count) { } private byte[] getSomeKey(int rowId) { - KeyValue kv = new KeyValue(String.format(LOCAL_FORMATTER, Integer.valueOf(rowId)).getBytes(), + KeyValue kv = new KeyValue(String.format(LOCAL_FORMATTER, rowId).getBytes(), Bytes.toBytes("family"), Bytes.toBytes("qual"), HConstants.LATEST_TIMESTAMP, KeyValue.Type.Put); return kv.getKey(); } @@ -169,17 +174,15 @@ private void writeRecords(HFile.Writer writer) throws IOException { writer.close(); } - private int writeSomeRecords(HFile.Writer writer) + private void writeSomeRecords(HFile.Writer writer) throws IOException { - String value = "value"; KeyValue kv; for (int i = 0; i < (maxRows); i++) { - String key = String.format(LOCAL_FORMATTER, Integer.valueOf(i)); + String key = String.format(LOCAL_FORMATTER, i); kv = new KeyValue(Bytes.toBytes(key), Bytes.toBytes("family"), Bytes.toBytes("qual"), - Bytes.toBytes(value + key)); + Bytes.toBytes(VALUE_PREFIX + key)); writer.append(kv); } - return (maxRows); } private void readAllRecords(HFileScanner scanner) throws IOException { @@ -187,30 +190,27 @@ private void readAllRecords(HFileScanner scanner) throws IOException { } // read the records and check - private int readAndCheckbytes(HFileScanner scanner, int start, int n) + private void readAndCheckbytes(HFileScanner scanner, int start, int n) throws IOException { - String value = "value"; int i = start; for (; i < (start + n); i++) { - ByteBuffer key = scanner.getKey(); - ByteBuffer val = scanner.getValue(); - String keyStr = String.format(LOCAL_FORMATTER, Integer.valueOf(i)); - String valStr = value + keyStr; + Cell cell = scanner.getCell(); + byte[] key = Arrays.copyOfRange(cell.getRowArray(), cell.getRowOffset(), cell.getRowOffset() + cell.getRowLength()); + byte[] val = Arrays.copyOfRange(cell.getValueArray(), cell.getValueOffset(), cell.getValueOffset() + cell.getValueLength()); + String keyStr = String.format(LOCAL_FORMATTER, i); + String valStr = VALUE_PREFIX + keyStr; KeyValue kv = new KeyValue(Bytes.toBytes(keyStr), Bytes.toBytes("family"), Bytes.toBytes("qual"), Bytes.toBytes(valStr)); - byte[] keyBytes = new KeyValue.KeyOnlyKeyValue(Bytes.toBytes(key), 0, - Bytes.toBytes(key).length).getKey(); - assertArrayEquals(kv.getKey(), keyBytes, - "bytes for keys do not match " + keyStr + " " + Bytes.toString(Bytes.toBytes(key))); - byte[] valBytes = Bytes.toBytes(val); - assertArrayEquals(Bytes.toBytes(valStr), valBytes, - "bytes for vals do not match " + valStr + " " + Bytes.toString(valBytes)); + byte[] keyBytes = new KeyValue.KeyOnlyKeyValue(key, 0, key.length).getKey(); + assertArrayEquals(Arrays.copyOfRange(kv.getRowArray(), kv.getRowOffset(), kv.getRowOffset() + kv.getRowLength()), keyBytes, + "bytes for keys do not match " + keyStr + " " + Bytes.toString(key)); + assertArrayEquals(Bytes.toBytes(valStr), val, + "bytes for vals do not match " + valStr + " " + Bytes.toString(val)); if (!scanner.next()) { break; } } assertEquals(i, start + n - 1); - return (start + n); } private long generateOuterFile(Path outerPath, byte[] inlineBytes) throws IOException { diff --git a/packaging/hudi-flink-bundle/pom.xml b/packaging/hudi-flink-bundle/pom.xml index 222478090b4b0..7f34371db0ab2 100644 --- a/packaging/hudi-flink-bundle/pom.xml +++ b/packaging/hudi-flink-bundle/pom.xml @@ -148,7 +148,11 @@ org.apache.hbase:hbase-common org.apache.hbase:hbase-client org.apache.hbase:hbase-server - org.apache.hbase:hbase-protocol + org.apache.hbase:hbase-protocol-shaded + org.apache.hbase.thirdparty:hbase-shaded-miscellaneous + org.apache.hbase.thirdparty:hbase-shaded-netty + org.apache.hbase.thirdparty:hbase-shaded-protobuf + org.apache.hbase:hbase-hadoop-compat org.apache.htrace:htrace-core commons-codec:commons-codec @@ -597,9 +601,29 @@ org.apache.hbase - hbase-protocol + hbase-protocol-shaded ${hbase.version} + + org.apache.hbase.thirdparty + hbase-shaded-miscellaneous + ${hbase-thirdparty.version} + + + org.apache.hbase.thirdparty + hbase-shaded-netty + ${hbase-thirdparty.version} + + + org.apache.hbase + hbase-hadoop-compat + ${hbase.version} + + + org.apache.hbase.thirdparty + hbase-shaded-protobuf + ${hbase-thirdparty.version} + org.apache.htrace htrace-core diff --git a/packaging/hudi-hadoop-mr-bundle/pom.xml b/packaging/hudi-hadoop-mr-bundle/pom.xml index f6215b1e017a5..d8cf3165bf50b 100644 --- a/packaging/hudi-hadoop-mr-bundle/pom.xml +++ b/packaging/hudi-hadoop-mr-bundle/pom.xml @@ -74,8 +74,12 @@ com.esotericsoftware:minlog org.apache.hbase:hbase-common org.apache.hbase:hbase-client - org.apache.hbase:hbase-protocol + org.apache.hbase:hbase-protocol-shaded org.apache.hbase:hbase-server + org.apache.hbase.thirdparty:hbase-shaded-miscellaneous + org.apache.hbase.thirdparty:hbase-shaded-netty + org.apache.hbase.thirdparty:hbase-shaded-protobuf + org.apache.hbase:hbase-hadoop-compat org.apache.htrace:htrace-core com.yammer.metrics:metrics-core com.google.guava:guava @@ -180,6 +184,12 @@ org.apache.hbase hbase-common ${hbase.version} + + + guava + com.google.guava + + @@ -188,6 +198,10 @@ ${hbase.version} compile + + guava + com.google.guava + org.apache.hbase hbase-common @@ -210,5 +224,35 @@ + + org.apache.hbase + hbase-client + ${hbase.version} + + + org.apache.hbase + hbase-protocol-shaded + ${hbase.version} + + + org.apache.hbase.thirdparty + hbase-shaded-miscellaneous + ${hbase-thirdparty.version} + + + org.apache.hbase.thirdparty + hbase-shaded-netty + ${hbase-thirdparty.version} + + + org.apache.hbase + hbase-hadoop-compat + ${hbase.version} + + + org.apache.hbase.thirdparty + hbase-shaded-protobuf + ${hbase-thirdparty.version} + diff --git a/packaging/hudi-spark-bundle/pom.xml b/packaging/hudi-spark-bundle/pom.xml index a877d10a586a8..6fc6babecc483 100644 --- a/packaging/hudi-spark-bundle/pom.xml +++ b/packaging/hudi-spark-bundle/pom.xml @@ -116,8 +116,12 @@ org.apache.hbase:hbase-client org.apache.hbase:hbase-common - org.apache.hbase:hbase-protocol + org.apache.hbase:hbase-protocol-shaded org.apache.hbase:hbase-server + org.apache.hbase.thirdparty:hbase-shaded-miscellaneous + org.apache.hbase.thirdparty:hbase-shaded-netty + org.apache.hbase.thirdparty:hbase-shaded-protobuf + org.apache.hbase:hbase-hadoop-compat org.apache.htrace:htrace-core org.apache.curator:curator-framework org.apache.curator:curator-client @@ -333,6 +337,10 @@ ${hbase.version} compile + + guava + com.google.guava + org.apache.hbase hbase-common @@ -362,9 +370,29 @@ org.apache.hbase - hbase-protocol + hbase-protocol-shaded + ${hbase.version} + + + org.apache.hbase.thirdparty + hbase-shaded-miscellaneous + ${hbase-thirdparty.version} + + + org.apache.hbase.thirdparty + hbase-shaded-netty + ${hbase-thirdparty.version} + + + org.apache.hbase + hbase-hadoop-compat ${hbase.version} + + org.apache.hbase.thirdparty + hbase-shaded-protobuf + ${hbase-thirdparty.version} + diff --git a/packaging/hudi-utilities-bundle/pom.xml b/packaging/hudi-utilities-bundle/pom.xml index 1ffca7634a1ff..4ad7bb707b77e 100644 --- a/packaging/hudi-utilities-bundle/pom.xml +++ b/packaging/hudi-utilities-bundle/pom.xml @@ -150,8 +150,12 @@ org.apache.hbase:hbase-client org.apache.hbase:hbase-common - org.apache.hbase:hbase-protocol + org.apache.hbase:hbase-protocol-shaded org.apache.hbase:hbase-server + org.apache.hbase.thirdparty:hbase-shaded-miscellaneous + org.apache.hbase.thirdparty:hbase-shaded-netty + org.apache.hbase.thirdparty:hbase-shaded-protobuf + org.apache.hbase:hbase-hadoop-compat org.apache.htrace:htrace-core org.apache.curator:curator-framework org.apache.curator:curator-client @@ -345,6 +349,12 @@ org.apache.hbase hbase-common ${hbase.version} + + + guava + com.google.guava + + org.apache.hbase @@ -352,6 +362,10 @@ ${hbase.version} compile + + guava + com.google.guava + org.apache.hbase hbase-common @@ -381,9 +395,29 @@ org.apache.hbase - hbase-protocol + hbase-protocol-shaded + ${hbase.version} + + + org.apache.hbase.thirdparty + hbase-shaded-miscellaneous + ${hbase-thirdparty.version} + + + org.apache.hbase.thirdparty + hbase-shaded-netty + ${hbase-thirdparty.version} + + + org.apache.hbase + hbase-hadoop-compat ${hbase.version} + + org.apache.hbase.thirdparty + hbase-shaded-protobuf + ${hbase-thirdparty.version} + diff --git a/pom.xml b/pom.xml index 1b28ae1bb9a49..a7661c1989ebe 100644 --- a/pom.xml +++ b/pom.xml @@ -106,7 +106,7 @@ 2.17.0 1.7.30 2.9.9 - 2.7.3 + 2.10.1 org.apache.hive 2.3.1 core @@ -134,7 +134,9 @@ 0.12.0 9.4.15.v20190215 3.1.0-incubating - 1.2.3 + 2.4.7 + 1.2.3 + 3.5.1 1.9.13 1.4.199 3.1.2 @@ -166,6 +168,7 @@ 3.17.3 3.11.4 1.1.0 + 3.5.7 8000 http://localhost:${dynamodb-local.port} @@ -1512,7 +1515,7 @@ https://docs.spring.io/spring-shell/docs/1.2.0.RELEASE https://fasterxml.github.io/jackson-databind/javadoc/2.6 https://hadoop.apache.org/docs/r${hadoop.version}/api - https://hbase.apache.org/1.2/apidocs + https://hbase.apache.org/2.4/apidocs https://hive.apache.org/javadocs/r2.3.6/api https://javadoc.io/static/io.javalin/javalin/2.3.0 https://javadoc.io/doc/org.apache.parquet/parquet-avro/${parquet.version} From 3f778ee9618f73195edc84f48af8e40b94d4e563 Mon Sep 17 00:00:00 2001 From: Sagar Sumit Date: Wed, 8 Dec 2021 20:52:35 +0530 Subject: [PATCH 02/11] Fix metablock buffered read in hfile reader Diasble access time validation --- .../java/org/apache/hudi/io/storage/HoodieHFileReader.java | 2 +- .../org/apache/hudi/common/fs/inline/TestInLineFileSystem.java | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java index 6b68e0ab911a9..68c45e2cf7f2a 100644 --- a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java +++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java @@ -133,7 +133,7 @@ public BloomFilter readBloomFilter() { HFileInfo fileInfo; try { fileInfo = reader.getHFileInfo(); - ByteBuff serializedFilter = reader.getMetaBlock(KEY_BLOOM_FILTER_META_BLOCK, false).getBufferReadOnly(); + ByteBuff serializedFilter = reader.getMetaBlock(KEY_BLOOM_FILTER_META_BLOCK, false).getBufferWithoutHeader(); byte[] filterBytes = new byte[serializedFilter.remaining()]; serializedFilter.get(filterBytes); // read the bytes that were written return BloomFilterFactory.fromString(new String(filterBytes), diff --git a/hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystem.java b/hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystem.java index 92f83aad7fd7e..0f364eddbc614 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystem.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystem.java @@ -369,7 +369,8 @@ private Path getRandomInlinePath() { private void verifyFileStatus(FileStatus expected, Path inlinePath, long expectedLength, FileStatus actual) { assertEquals(inlinePath, actual.getPath()); assertEquals(expectedLength, actual.getLen()); - assertEquals(expected.getAccessTime(), actual.getAccessTime()); + // removing below assertion as it is flaky on rare occasion (difference is in single-digit ms) + // assertEquals(expected.getAccessTime(), actual.getAccessTime()); assertEquals(expected.getBlockSize(), actual.getBlockSize()); assertEquals(expected.getGroup(), actual.getGroup()); assertEquals(expected.getModificationTime(), actual.getModificationTime()); From 14e258027f3678d315e03de22a77bf944917d20d Mon Sep 17 00:00:00 2001 From: Sagar Sumit Date: Mon, 3 Jan 2022 22:17:17 +0530 Subject: [PATCH 03/11] Replace hbase-shaded-server and shade some more deps --- packaging/hudi-presto-bundle/pom.xml | 18 +++++++++++++++++ packaging/hudi-trino-bundle/pom.xml | 29 ++++++++++++++++------------ 2 files changed, 35 insertions(+), 12 deletions(-) diff --git a/packaging/hudi-presto-bundle/pom.xml b/packaging/hudi-presto-bundle/pom.xml index 90c1087dcb4d2..d7f0167d36897 100644 --- a/packaging/hudi-presto-bundle/pom.xml +++ b/packaging/hudi-presto-bundle/pom.xml @@ -78,13 +78,23 @@ org.apache.hbase:hbase-protocol org.apache.hbase:hbase-server org.apache.htrace:htrace-core + org.apache.hbase:hbase-annotations + org.apache.hbase.thirdparty:hbase-shaded-protobuf + org.apache.hbase.thirdparty:hbase-shaded-netty + org.apache.hbase.thirdparty:hbase-shaded-miscellaneous + org.apache.htrace:htrace-core4 com.yammer.metrics:metrics-core com.google.guava:guava + commons-io:commons-io commons-lang:commons-lang com.google.protobuf:protobuf-java + + org.apache.parquet.avro. + org.apache.hudi.org.apache.parquet.avro. + org.apache.avro. org.apache.hudi.org.apache.avro. @@ -105,6 +115,10 @@ com.esotericsoftware.minlog. org.apache.hudi.com.esotericsoftware.minlog. + + org.apache.htrace. + ${trino.bundle.bootstrap.shade.prefix}org.apache.htrace. + com.yammer.metrics. org.apache.hudi.com.yammer.metrics. @@ -113,6 +127,10 @@ com.google.common. ${presto.bundle.bootstrap.shade.prefix}com.google.common. + + org.apache.commons.io. + ${trino.bundle.bootstrap.shade.prefix}org.apache.commons.io. + org.apache.commons.lang. ${presto.bundle.bootstrap.shade.prefix}org.apache.commons.lang. diff --git a/packaging/hudi-trino-bundle/pom.xml b/packaging/hudi-trino-bundle/pom.xml index adf73f1bb0b83..dffa9779ca2f6 100644 --- a/packaging/hudi-trino-bundle/pom.xml +++ b/packaging/hudi-trino-bundle/pom.xml @@ -79,15 +79,22 @@ org.apache.hbase:hbase-protocol org.apache.hbase:hbase-server org.apache.hbase:hbase-annotations - org.apache.htrace:htrace-core + org.apache.hbase.thirdparty:hbase-shaded-protobuf + org.apache.hbase.thirdparty:hbase-shaded-netty + org.apache.hbase.thirdparty:hbase-shaded-miscellaneous + org.apache.htrace:htrace-core4 com.yammer.metrics:metrics-core com.google.guava:guava + commons-io:commons-io commons-lang:commons-lang com.google.protobuf:protobuf-java - + + org.apache.parquet.avro. + org.apache.hudi.org.apache.parquet.avro. + org.apache.avro. org.apache.hudi.org.apache.avro. @@ -108,6 +115,10 @@ com.esotericsoftware.minlog. org.apache.hudi.com.esotericsoftware.minlog. + + org.apache.htrace. + ${trino.bundle.bootstrap.shade.prefix}org.apache.htrace. + com.yammer.metrics. org.apache.hudi.com.yammer.metrics. @@ -116,6 +127,10 @@ com.google.common. ${trino.bundle.bootstrap.shade.prefix}com.google.common. + + org.apache.commons.io. + ${trino.bundle.bootstrap.shade.prefix}org.apache.commons.io. + org.apache.commons.lang. ${trino.bundle.bootstrap.shade.prefix}org.apache.commons.lang. @@ -159,16 +174,6 @@ org.apache.hudi hudi-common ${project.version} - - - org.apache.hbase - hbase-server - - - org.apache.hbase - hbase-client - - org.apache.hudi From 8fa78bfed84f3840b88a447730240941475954c5 Mon Sep 17 00:00:00 2001 From: Y Ethan Guo Date: Thu, 17 Feb 2022 14:39:33 -0800 Subject: [PATCH 04/11] Rebase master --- .../io/storage/HoodieFileWriterFactory.java | 4 +- .../hudi/io/storage/HoodieHFileConfig.java | 8 +-- .../hudi/io/storage/HoodieHFileWriter.java | 35 ++++++------ .../table/log/block/HoodieHFileDataBlock.java | 21 +++---- .../hudi/io/storage/HoodieHFileReader.java | 57 ++++++++++--------- pom.xml | 2 +- 6 files changed, 65 insertions(+), 62 deletions(-) diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriterFactory.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriterFactory.java index f5e6514c5f34f..38db1cde41226 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriterFactory.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriterFactory.java @@ -85,12 +85,12 @@ private static HoodieFi TaskContextSupplier taskContextSupplier) throws IOException { BloomFilter filter = createBloomFilter(config); - HoodieHFileConfig hFileConfig = new HoodieHFileConfig(hoodieTable.getHadoopConf(), + HoodieHFileConfig hfileConfig = new HoodieHFileConfig(hoodieTable.getHadoopConf(), config.getHFileCompressionAlgorithm(), config.getHFileBlockSize(), config.getHFileMaxFileSize(), HoodieHFileReader.KEY_FIELD_NAME, PREFETCH_ON_OPEN, CACHE_DATA_IN_L1, DROP_BEHIND_CACHE_COMPACTION, filter, HFILE_COMPARATOR); - return new HoodieHFileWriter<>(instantTime, path, hFileConfig, schema, taskContextSupplier, config.populateMetaFields()); + return new HoodieHFileWriter<>(instantTime, path, hfileConfig, schema, taskContextSupplier, config.populateMetaFields()); } private static HoodieFileWriter newOrcFileWriter( diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileConfig.java index 255921cf7dd67..5ce377901a4ba 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileConfig.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileConfig.java @@ -42,12 +42,12 @@ public class HoodieHFileConfig { private final boolean dropBehindCacheCompaction; private final Configuration hadoopConf; private final BloomFilter bloomFilter; - private final CellComparator hFileComparator; + private final CellComparator hfileComparator; private final String keyFieldName; public HoodieHFileConfig(Configuration hadoopConf, Compression.Algorithm compressionAlgorithm, int blockSize, long maxFileSize, String keyFieldName, boolean prefetchBlocksOnOpen, boolean cacheDataInL1, - boolean dropBehindCacheCompaction, BloomFilter bloomFilter, CellComparator hFileComparator) { + boolean dropBehindCacheCompaction, BloomFilter bloomFilter, CellComparator hfileComparator) { this.hadoopConf = hadoopConf; this.compressionAlgorithm = compressionAlgorithm; this.blockSize = blockSize; @@ -56,7 +56,7 @@ public HoodieHFileConfig(Configuration hadoopConf, Compression.Algorithm compres this.cacheDataInL1 = cacheDataInL1; this.dropBehindCacheCompaction = dropBehindCacheCompaction; this.bloomFilter = bloomFilter; - this.hFileComparator = hFileComparator; + this.hfileComparator = hfileComparator; this.keyFieldName = keyFieldName; } @@ -97,7 +97,7 @@ public BloomFilter getBloomFilter() { } public CellComparator getHFileComparator() { - return hFileComparator; + return hfileComparator; } public String getKeyFieldName() { diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileWriter.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileWriter.java index 409e84e06153d..0be0646c5d578 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileWriter.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileWriter.java @@ -25,6 +25,8 @@ import org.apache.hudi.common.fs.HoodieWrapperFileSystem; import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.model.HoodieRecordPayload; +import org.apache.hudi.common.util.Option; +import org.apache.hudi.common.util.StringUtils; import org.apache.avro.Schema; import org.apache.avro.generic.GenericRecord; @@ -38,8 +40,6 @@ import org.apache.hadoop.hbase.io.hfile.HFileContext; import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder; import org.apache.hadoop.io.Writable; -import org.apache.hudi.common.util.Option; -import org.apache.hudi.common.util.StringUtils; import java.io.DataInput; import java.io.DataOutput; @@ -59,7 +59,7 @@ public class HoodieHFileWriter records) throws IOExceptio } HFile.Writer writer = HFile.getWriterFactory(conf, cacheConfig) - .withOutputStream(ostream).withFileContext(context).withComparator(new HoodieHBaseKVComparator()).create(); + .withOutputStream(ostream).withFileContext(context).create(); // Write the records sortedRecordsMap.forEach((recordKey, recordBytes) -> { diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java index 68c45e2cf7f2a..5180853eada10 100644 --- a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java +++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java @@ -18,16 +18,15 @@ package org.apache.hudi.io.storage; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.Iterator; -import java.util.LinkedList; -import java.util.List; -import java.util.Set; -import java.util.TreeSet; -import java.util.stream.Collectors; +import org.apache.hudi.avro.HoodieAvroUtils; +import org.apache.hudi.common.bloom.BloomFilter; +import org.apache.hudi.common.bloom.BloomFilterFactory; +import org.apache.hudi.common.fs.FSUtils; +import org.apache.hudi.common.util.Option; +import org.apache.hudi.common.util.ValidationUtils; +import org.apache.hudi.common.util.io.ByteBufferBackedInputStream; +import org.apache.hudi.exception.HoodieException; +import org.apache.hudi.exception.HoodieIOException; import org.apache.avro.Schema; import org.apache.avro.generic.IndexedRecord; @@ -48,18 +47,20 @@ import org.apache.hadoop.hbase.io.hfile.ReaderContextBuilder; import org.apache.hadoop.hbase.nio.ByteBuff; import org.apache.hadoop.hbase.util.Pair; -import org.apache.hudi.avro.HoodieAvroUtils; -import org.apache.hudi.common.bloom.BloomFilter; -import org.apache.hudi.common.bloom.BloomFilterFactory; -import org.apache.hudi.common.util.Option; -import org.apache.hudi.common.util.ValidationUtils; -import org.apache.hudi.common.util.io.ByteBufferBackedInputStream; -import org.apache.hudi.exception.HoodieException; -import org.apache.hudi.exception.HoodieIOException; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; -import static org.apache.hudi.common.fs.FSUtils.getFs; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeSet; +import java.util.stream.Collectors; public class HoodieHFileReader implements HoodieFileReader { private static final Logger LOG = LogManager.getLogger(HoodieHFileReader.class); @@ -82,14 +83,14 @@ public class HoodieHFileReader implements HoodieFileRea public HoodieHFileReader(Configuration configuration, Path path, CacheConfig cacheConfig) throws IOException { this.conf = configuration; this.path = path; - this.reader = HFile.createReader(getFs(path.toString(), configuration), path, cacheConfig, true, conf); + this.reader = HFile.createReader(FSUtils.getFs(path.toString(), configuration), path, cacheConfig, true, conf); } public HoodieHFileReader(Configuration configuration, Path path, CacheConfig cacheConfig, FileSystem fs) throws IOException { this.conf = configuration; this.path = path; this.fsDataInputStream = fs.open(path); - this.reader = HFile.createReader(fs, path, cacheConfig, true configuration); + this.reader = HFile.createReader(fs, path, cacheConfig, true, configuration); } public HoodieHFileReader(byte[] content) throws IOException { @@ -99,13 +100,13 @@ public HoodieHFileReader(byte[] content) throws IOException { FSDataInputStream fsdis = new FSDataInputStream(bis); FSDataInputStreamWrapper stream = new FSDataInputStreamWrapper(fsdis); ReaderContext context = new ReaderContextBuilder() - .withFilePath(path) - .withInputStreamWrapper(stream) - .withFileSize(getFs("hoodie", conf).getFileStatus(path).getLen()) - .withFileSystem(stream.getHfs()) - .withPrimaryReplicaReader(true) - .withReaderType(ReaderContext.ReaderType.STREAM) - .build(); + .withFilePath(path) + .withInputStreamWrapper(stream) + .withFileSize(FSUtils.getFs("hoodie", conf).getFileStatus(path).getLen()) + .withFileSystem(stream.getHfs()) + .withPrimaryReplicaReader(true) + .withReaderType(ReaderContext.ReaderType.STREAM) + .build(); HFileInfo fileInfo = new HFileInfo(context, conf); this.reader = HFile.createReader(context, fileInfo, new CacheConfig(conf), conf); fileInfo.initMetaAndIndex(reader); diff --git a/pom.xml b/pom.xml index a7661c1989ebe..dccfd63816673 100644 --- a/pom.xml +++ b/pom.xml @@ -134,7 +134,7 @@ 0.12.0 9.4.15.v20190215 3.1.0-incubating - 2.4.7 + 2.4.9 1.2.3 3.5.1 1.9.13 From 72877c8b03a427566deb99cf813b07c53f11438c Mon Sep 17 00:00:00 2001 From: Y Ethan Guo Date: Thu, 17 Feb 2022 16:11:56 -0800 Subject: [PATCH 05/11] Fix HoodieHFileReader --- .../java/org/apache/hudi/io/storage/HoodieHFileWriter.java | 1 - .../java/org/apache/hudi/io/storage/HoodieHFileReader.java | 7 +++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileWriter.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileWriter.java index 0be0646c5d578..5dcd2e0a32e51 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileWriter.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileWriter.java @@ -80,7 +80,6 @@ public HoodieHFileWriter(String instantTime, Path file, HoodieHFileConfig hfileC Configuration conf = FSUtils.registerFileSystem(file, hfileConfig.getHadoopConf()); this.file = HoodieWrapperFileSystem.convertToHoodiePath(file, conf); this.fs = (HoodieWrapperFileSystem) this.file.getFileSystem(conf); - this.hfileConfig = hfileConfig; this.schema = schema; this.keyFieldSchema = Option.ofNullable(schema.getField(hfileConfig.getKeyFieldName())); diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java index 5180853eada10..8409eddc9f946 100644 --- a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java +++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java @@ -38,6 +38,7 @@ import org.apache.hadoop.fs.Seekable; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.fs.HFileSystem; import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper; import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.hbase.io.hfile.HFile; @@ -99,11 +100,13 @@ public HoodieHFileReader(byte[] content) throws IOException { SeekableByteArrayInputStream bis = new SeekableByteArrayInputStream(content); FSDataInputStream fsdis = new FSDataInputStream(bis); FSDataInputStreamWrapper stream = new FSDataInputStreamWrapper(fsdis); + FileSystem fs = FSUtils.getFs("hoodie", conf); + HFileSystem hfs = (fs instanceof HFileSystem) ? (HFileSystem) fs : new HFileSystem(fs); ReaderContext context = new ReaderContextBuilder() .withFilePath(path) .withInputStreamWrapper(stream) - .withFileSize(FSUtils.getFs("hoodie", conf).getFileStatus(path).getLen()) - .withFileSystem(stream.getHfs()) + .withFileSize(content.length) + .withFileSystem(hfs) .withPrimaryReplicaReader(true) .withReaderType(ReaderContext.ReaderType.STREAM) .build(); From b1a0ef9025e4a19da144901092c94438ad48228d Mon Sep 17 00:00:00 2001 From: Y Ethan Guo Date: Thu, 17 Feb 2022 17:31:10 -0800 Subject: [PATCH 06/11] Revert hadoop upgrade --- pom.xml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index dccfd63816673..2bba07c9692a5 100644 --- a/pom.xml +++ b/pom.xml @@ -106,7 +106,7 @@ 2.17.0 1.7.30 2.9.9 - 2.10.1 + 2.7.3 org.apache.hive 2.3.1 core @@ -135,7 +135,6 @@ 9.4.15.v20190215 3.1.0-incubating 2.4.9 - 1.2.3 3.5.1 1.9.13 1.4.199 From b4cc5c73732106ad1528ef52b52e3dedfcc305d1 Mon Sep 17 00:00:00 2001 From: Y Ethan Guo Date: Fri, 18 Feb 2022 18:26:54 -0800 Subject: [PATCH 07/11] Fix hudi-hadoop-mr-bundle to include more dependencies and address CI failures --- ...ker-compose_hadoop284_hive233_spark244.yml | 4 +- docker/hoodie/hadoop/hive_base/Dockerfile | 3 + hudi-aws/pom.xml | 19 +++++ hudi-client/hudi-client-common/pom.xml | 39 ++++++++++ hudi-client/hudi-java-client/pom.xml | 20 +++++ hudi-common/pom.xml | 3 +- .../org/apache/hudi/integ/ITTestBase.java | 18 ++++- packaging/hudi-hadoop-mr-bundle/pom.xml | 75 ++----------------- packaging/hudi-spark-bundle/pom.xml | 6 ++ 9 files changed, 111 insertions(+), 76 deletions(-) diff --git a/docker/compose/docker-compose_hadoop284_hive233_spark244.yml b/docker/compose/docker-compose_hadoop284_hive233_spark244.yml index 3c1acbdfe7714..3f74ca8adee14 100644 --- a/docker/compose/docker-compose_hadoop284_hive233_spark244.yml +++ b/docker/compose/docker-compose_hadoop284_hive233_spark244.yml @@ -86,7 +86,7 @@ services: container_name: hive-metastore-postgresql hivemetastore: - image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.3:latest + image: yihua/hudi-hadoop_2.8.4-hive_2.3.3:latest hostname: hivemetastore container_name: hivemetastore links: @@ -109,7 +109,7 @@ services: - "namenode" hiveserver: - image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.3:latest + image: yihua/hudi-hadoop_2.8.4-hive_2.3.3:latest hostname: hiveserver container_name: hiveserver env_file: diff --git a/docker/hoodie/hadoop/hive_base/Dockerfile b/docker/hoodie/hadoop/hive_base/Dockerfile index 7d04d94fc60cc..8c4fbb5886bf0 100644 --- a/docker/hoodie/hadoop/hive_base/Dockerfile +++ b/docker/hoodie/hadoop/hive_base/Dockerfile @@ -36,6 +36,9 @@ RUN echo "Hive URL is :${HIVE_URL}" && wget ${HIVE_URL} -O hive.tar.gz && \ wget https://jdbc.postgresql.org/download/postgresql-9.4.1212.jar -O $HIVE_HOME/lib/postgresql-jdbc.jar && \ rm hive.tar.gz && mkdir -p /var/hoodie/ws/docker/hoodie/hadoop/hive_base/target/ +RUN rm hive/lib/hbase* +RUN rm hive/lib/commons-io-2.4.jar + #Spark should be compiled with Hive to be able to use it #hive-site.xml should be copied to $SPARK_HOME/conf folder diff --git a/hudi-aws/pom.xml b/hudi-aws/pom.xml index d44a389a61f66..4abbd119a0d55 100644 --- a/hudi-aws/pom.xml +++ b/hudi-aws/pom.xml @@ -48,6 +48,25 @@ + + org.apache.hadoop + hadoop-common + provided + + + org.mortbay.jetty + * + + + javax.servlet.jsp + * + + + javax.servlet + * + + + org.apache.hadoop hadoop-common diff --git a/hudi-client/hudi-client-common/pom.xml b/hudi-client/hudi-client-common/pom.xml index a55a136652728..9cdc1c40ad66a 100644 --- a/hudi-client/hudi-client-common/pom.xml +++ b/hudi-client/hudi-client-common/pom.xml @@ -117,6 +117,45 @@ + + org.apache.hadoop + hadoop-common + provided + + + org.mortbay.jetty + * + + + javax.servlet.jsp + * + + + javax.servlet + * + + + + + org.apache.hadoop + hadoop-hdfs + test + + + + org.mortbay.jetty + * + + + javax.servlet.jsp + * + + + javax.servlet + * + + + org.apache.hadoop hadoop-hdfs diff --git a/hudi-client/hudi-java-client/pom.xml b/hudi-client/hudi-java-client/pom.xml index 3471bfb8ba366..b299150c6e3e0 100644 --- a/hudi-client/hudi-java-client/pom.xml +++ b/hudi-client/hudi-java-client/pom.xml @@ -122,6 +122,26 @@ test + + org.apache.hadoop + hadoop-hdfs + test + + + + org.mortbay.jetty + * + + + javax.servlet.jsp + * + + + javax.servlet + * + + + org.apache.hadoop hadoop-hdfs diff --git a/hudi-common/pom.xml b/hudi-common/pom.xml index b00b82f86c172..0cd72c33464fd 100644 --- a/hudi-common/pom.xml +++ b/hudi-common/pom.xml @@ -221,13 +221,14 @@ org.apache.hbase hbase-client ${hbase.version} + test org.apache.hbase hbase-server ${hbase.version} - + compile diff --git a/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestBase.java b/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestBase.java index 3c7a6034b4f4d..2d9d5436f6840 100644 --- a/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestBase.java +++ b/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestBase.java @@ -213,9 +213,11 @@ private TestExecStartResultCallback executeCommandInDocker(String containerName, // Each execution of command(s) in docker should not be more than 15 mins. Otherwise, it is deemed stuck. We will // try to capture stdout and stderr of the stuck process. + LOG.error("containerName: " + containerName); + LOG.error("Command: " + Arrays.asList(command)); boolean completed = dockerClient.execStartCmd(createCmdResponse.getId()).withDetach(false).withTty(false).exec(callback) - .awaitCompletion(540, SECONDS); + .awaitCompletion(540, SECONDS); if (!completed) { callback.getStderr().flush(); callback.getStdout().flush(); @@ -228,8 +230,11 @@ private TestExecStartResultCallback executeCommandInDocker(String containerName, int exitCode = dockerClient.inspectExecCmd(createCmdResponse.getId()).exec().getExitCode(); LOG.info("Exit code for command : " + exitCode); if (exitCode != 0) { - LOG.error("\n\n ###### Stdout #######\n" + callback.getStdout().toString()); + //LOG.error("\n\n ###### Stdout #######\n" + callback.getStdout().toString()); } + callback.getStderr().flush(); + callback.getStdout().flush(); + LOG.error("\n\n ###### Stdout #######\n" + callback.getStdout().toString()); LOG.error("\n\n ###### Stderr #######\n" + callback.getStderr().toString()); if (checkIfSucceed) { @@ -316,8 +321,8 @@ private void saveUpLogs() { executeCommandStringInDocker(HIVESERVER, "cat /tmp/root/hive.log | grep -i exception -A 10 -B 5", false).getStdout().toString(); String filePath = System.getProperty("java.io.tmpdir") + "/" + System.currentTimeMillis() + "-hive.log"; FileIOUtils.writeStringToFile(hiveLogStr, filePath); - LOG.info("Hive log saved up at : " + filePath); - LOG.info("<=========== Full hive log ===============>\n" + LOG.error("Hive log saved up at : " + filePath); + LOG.error("<=========== Full hive log ===============>\n" + "\n" + hiveLogStr + "\n <==========================================>"); } catch (Exception e) { @@ -334,6 +339,11 @@ void assertStdOutContains(Pair stdOutErr, String expectedOutput, String stdOutSingleSpaced = singleSpace(stdOutErr.getLeft()).replaceAll(" ", ""); expectedOutput = singleSpace(expectedOutput).replaceAll(" ", ""); + LOG.error("stdOutErr : " + stdOutErr.getLeft()); + LOG.error("stdOutErr.getRight : " + stdOutErr.getRight()); + LOG.error("stdOutSingleSpaced : " + stdOutSingleSpaced); + LOG.error("expectedOutput : " + expectedOutput); + int lastIndex = 0; int count = 0; while (lastIndex != -1) { diff --git a/packaging/hudi-hadoop-mr-bundle/pom.xml b/packaging/hudi-hadoop-mr-bundle/pom.xml index d8cf3165bf50b..4899b6e5c2095 100644 --- a/packaging/hudi-hadoop-mr-bundle/pom.xml +++ b/packaging/hudi-hadoop-mr-bundle/pom.xml @@ -74,12 +74,16 @@ com.esotericsoftware:minlog org.apache.hbase:hbase-common org.apache.hbase:hbase-client + org.apache.hbase:hbase-metrics + org.apache.hbase:hbase-metrics-api org.apache.hbase:hbase-protocol-shaded org.apache.hbase:hbase-server org.apache.hbase.thirdparty:hbase-shaded-miscellaneous org.apache.hbase.thirdparty:hbase-shaded-netty org.apache.hbase.thirdparty:hbase-shaded-protobuf org.apache.hbase:hbase-hadoop-compat + org.apache.hbase:hbase-hadoop2-compat + commons-io:commons-io org.apache.htrace:htrace-core com.yammer.metrics:metrics-core com.google.guava:guava @@ -172,81 +176,14 @@ compile - - org.apache.htrace - htrace-core - ${htrace.version} - compile - - - - - org.apache.hbase - hbase-common - ${hbase.version} - - - guava - com.google.guava - - - - - - org.apache.hbase - hbase-server - ${hbase.version} - compile - - - guava - com.google.guava - - - org.apache.hbase - hbase-common - - - javax.servlet - * - - - org.codehaus.jackson - * - - - org.mortbay.jetty - * - - - tomcat - * - - - org.apache.hbase - hbase-client - ${hbase.version} - - - org.apache.hbase - hbase-protocol-shaded + hbase-hadoop-compat ${hbase.version} - - org.apache.hbase.thirdparty - hbase-shaded-miscellaneous - ${hbase-thirdparty.version} - - - org.apache.hbase.thirdparty - hbase-shaded-netty - ${hbase-thirdparty.version} - org.apache.hbase - hbase-hadoop-compat + hbase-hadoop2-compat ${hbase.version} diff --git a/packaging/hudi-spark-bundle/pom.xml b/packaging/hudi-spark-bundle/pom.xml index 6fc6babecc483..12cd1abd077e9 100644 --- a/packaging/hudi-spark-bundle/pom.xml +++ b/packaging/hudi-spark-bundle/pom.xml @@ -122,6 +122,7 @@ org.apache.hbase.thirdparty:hbase-shaded-netty org.apache.hbase.thirdparty:hbase-shaded-protobuf org.apache.hbase:hbase-hadoop-compat + org.apache.hbase:hbase-hadoop2-compat org.apache.htrace:htrace-core org.apache.curator:curator-framework org.apache.curator:curator-client @@ -388,6 +389,11 @@ hbase-hadoop-compat ${hbase.version} + + org.apache.hbase + hbase-hadoop2-compat + ${hbase.version} + org.apache.hbase.thirdparty hbase-shaded-protobuf From 90895141bcecf4a6f966faf73b2c0fa609290281 Mon Sep 17 00:00:00 2001 From: Y Ethan Guo Date: Fri, 25 Feb 2022 13:47:44 -0800 Subject: [PATCH 08/11] Fix hudi-utilities-bundle --- packaging/hudi-utilities-bundle/pom.xml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/packaging/hudi-utilities-bundle/pom.xml b/packaging/hudi-utilities-bundle/pom.xml index 4ad7bb707b77e..f52c16a5a3de4 100644 --- a/packaging/hudi-utilities-bundle/pom.xml +++ b/packaging/hudi-utilities-bundle/pom.xml @@ -150,12 +150,16 @@ org.apache.hbase:hbase-client org.apache.hbase:hbase-common + org.apache.hbase:hbase-metrics + org.apache.hbase:hbase-metrics-api org.apache.hbase:hbase-protocol-shaded org.apache.hbase:hbase-server org.apache.hbase.thirdparty:hbase-shaded-miscellaneous org.apache.hbase.thirdparty:hbase-shaded-netty org.apache.hbase.thirdparty:hbase-shaded-protobuf org.apache.hbase:hbase-hadoop-compat + org.apache.hbase:hbase-hadoop2-compat + commons-io:commons-io org.apache.htrace:htrace-core org.apache.curator:curator-framework org.apache.curator:curator-client From 55db32bf4b6aa3796be90879815328ae376dd606 Mon Sep 17 00:00:00 2001 From: Y Ethan Guo Date: Fri, 25 Feb 2022 15:42:53 -0800 Subject: [PATCH 09/11] Fix hudi-common --- hudi-common/pom.xml | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/hudi-common/pom.xml b/hudi-common/pom.xml index 0cd72c33464fd..c18eac7af4372 100644 --- a/hudi-common/pom.xml +++ b/hudi-common/pom.xml @@ -221,7 +221,24 @@ org.apache.hbase hbase-client ${hbase.version} - test + + + javax.servlet + * + + + org.codehaus.jackson + * + + + org.mortbay.jetty + * + + + tomcat + * + + From 0735d18736639df43d4472580e64234561ea6ff4 Mon Sep 17 00:00:00 2001 From: Y Ethan Guo Date: Sat, 26 Feb 2022 10:41:35 -0800 Subject: [PATCH 10/11] Fix other bundles --- packaging/hudi-flink-bundle/pom.xml | 8 ++++++-- packaging/hudi-hadoop-mr-bundle/pom.xml | 2 +- packaging/hudi-presto-bundle/pom.xml | 8 +++++++- packaging/hudi-spark-bundle/pom.xml | 5 ++++- packaging/hudi-trino-bundle/pom.xml | 8 ++++++-- packaging/hudi-utilities-bundle/pom.xml | 2 +- 6 files changed, 25 insertions(+), 8 deletions(-) diff --git a/packaging/hudi-flink-bundle/pom.xml b/packaging/hudi-flink-bundle/pom.xml index 7f34371db0ab2..549595d75a288 100644 --- a/packaging/hudi-flink-bundle/pom.xml +++ b/packaging/hudi-flink-bundle/pom.xml @@ -137,7 +137,7 @@ org.apache.hive:hive-service org.apache.hive:hive-service-rpc org.apache.hive:hive-exec - org.apache.hive:hive-standalone-metastore + org.apache.hive:hive-standalone-metastore org.apache.hive:hive-metastore org.apache.hive:hive-jdbc org.datanucleus:datanucleus-core @@ -147,13 +147,17 @@ org.apache.hbase:hbase-common org.apache.hbase:hbase-client + org.apache.hbase:hbase-metrics + org.apache.hbase:hbase-metrics-api org.apache.hbase:hbase-server org.apache.hbase:hbase-protocol-shaded org.apache.hbase.thirdparty:hbase-shaded-miscellaneous org.apache.hbase.thirdparty:hbase-shaded-netty org.apache.hbase.thirdparty:hbase-shaded-protobuf org.apache.hbase:hbase-hadoop-compat - org.apache.htrace:htrace-core + org.apache.hbase:hbase-hadoop2-compat + commons-io:commons-io + org.apache.htrace:htrace-core4 commons-codec:commons-codec diff --git a/packaging/hudi-hadoop-mr-bundle/pom.xml b/packaging/hudi-hadoop-mr-bundle/pom.xml index 4899b6e5c2095..c84c46ceadc1e 100644 --- a/packaging/hudi-hadoop-mr-bundle/pom.xml +++ b/packaging/hudi-hadoop-mr-bundle/pom.xml @@ -84,7 +84,7 @@ org.apache.hbase:hbase-hadoop-compat org.apache.hbase:hbase-hadoop2-compat commons-io:commons-io - org.apache.htrace:htrace-core + org.apache.htrace:htrace-core4 com.yammer.metrics:metrics-core com.google.guava:guava diff --git a/packaging/hudi-presto-bundle/pom.xml b/packaging/hudi-presto-bundle/pom.xml index d7f0167d36897..62579ef82ba07 100644 --- a/packaging/hudi-presto-bundle/pom.xml +++ b/packaging/hudi-presto-bundle/pom.xml @@ -75,13 +75,19 @@ com.esotericsoftware:minlog org.apache.hbase:hbase-common org.apache.hbase:hbase-client + org.apache.hbase:hbase-metrics + org.apache.hbase:hbase-metrics-api + org.apache.hbase:hbase-protocol - org.apache.hbase:hbase-server + org.apache.hbase:hbase-protocol-shaded org.apache.htrace:htrace-core org.apache.hbase:hbase-annotations org.apache.hbase.thirdparty:hbase-shaded-protobuf org.apache.hbase.thirdparty:hbase-shaded-netty org.apache.hbase.thirdparty:hbase-shaded-miscellaneous + org.apache.hbase:hbase-hadoop-compat + org.apache.hbase:hbase-hadoop2-compat + commons-io:commons-io org.apache.htrace:htrace-core4 com.yammer.metrics:metrics-core com.google.guava:guava diff --git a/packaging/hudi-spark-bundle/pom.xml b/packaging/hudi-spark-bundle/pom.xml index 12cd1abd077e9..6149b7bbd0265 100644 --- a/packaging/hudi-spark-bundle/pom.xml +++ b/packaging/hudi-spark-bundle/pom.xml @@ -116,6 +116,8 @@ org.apache.hbase:hbase-client org.apache.hbase:hbase-common + org.apache.hbase:hbase-metrics + org.apache.hbase:hbase-metrics-api org.apache.hbase:hbase-protocol-shaded org.apache.hbase:hbase-server org.apache.hbase.thirdparty:hbase-shaded-miscellaneous @@ -123,7 +125,8 @@ org.apache.hbase.thirdparty:hbase-shaded-protobuf org.apache.hbase:hbase-hadoop-compat org.apache.hbase:hbase-hadoop2-compat - org.apache.htrace:htrace-core + commons-io:commons-io + org.apache.htrace:htrace-core4 org.apache.curator:curator-framework org.apache.curator:curator-client org.apache.curator:curator-recipes diff --git a/packaging/hudi-trino-bundle/pom.xml b/packaging/hudi-trino-bundle/pom.xml index dffa9779ca2f6..8de546a7f896e 100644 --- a/packaging/hudi-trino-bundle/pom.xml +++ b/packaging/hudi-trino-bundle/pom.xml @@ -76,16 +76,20 @@ com.esotericsoftware:minlog org.apache.hbase:hbase-common org.apache.hbase:hbase-client - org.apache.hbase:hbase-protocol + org.apache.hbase:hbase-metrics + org.apache.hbase:hbase-metrics-api + org.apache.hbase:hbase-protocol-shaded org.apache.hbase:hbase-server org.apache.hbase:hbase-annotations org.apache.hbase.thirdparty:hbase-shaded-protobuf org.apache.hbase.thirdparty:hbase-shaded-netty org.apache.hbase.thirdparty:hbase-shaded-miscellaneous + org.apache.hbase:hbase-hadoop-compat + org.apache.hbase:hbase-hadoop2-compat + commons-io:commons-io org.apache.htrace:htrace-core4 com.yammer.metrics:metrics-core com.google.guava:guava - commons-io:commons-io commons-lang:commons-lang com.google.protobuf:protobuf-java diff --git a/packaging/hudi-utilities-bundle/pom.xml b/packaging/hudi-utilities-bundle/pom.xml index f52c16a5a3de4..ee130383d27db 100644 --- a/packaging/hudi-utilities-bundle/pom.xml +++ b/packaging/hudi-utilities-bundle/pom.xml @@ -160,7 +160,7 @@ org.apache.hbase:hbase-hadoop-compat org.apache.hbase:hbase-hadoop2-compat commons-io:commons-io - org.apache.htrace:htrace-core + org.apache.htrace:htrace-core4 org.apache.curator:curator-framework org.apache.curator:curator-client org.apache.curator:curator-recipes From a9c0bcb8a33a8c046732a4aa135423f84f65235d Mon Sep 17 00:00:00 2001 From: Y Ethan Guo Date: Sun, 27 Feb 2022 22:48:39 -0800 Subject: [PATCH 11/11] Use hadoop 2.10.1 --- hudi-client/hudi-client-common/pom.xml | 39 -------------------------- hudi-client/hudi-spark-client/pom.xml | 6 ++++ hudi-common/pom.xml | 18 ------------ pom.xml | 2 +- 4 files changed, 7 insertions(+), 58 deletions(-) diff --git a/hudi-client/hudi-client-common/pom.xml b/hudi-client/hudi-client-common/pom.xml index 9cdc1c40ad66a..a55a136652728 100644 --- a/hudi-client/hudi-client-common/pom.xml +++ b/hudi-client/hudi-client-common/pom.xml @@ -117,45 +117,6 @@ - - org.apache.hadoop - hadoop-common - provided - - - org.mortbay.jetty - * - - - javax.servlet.jsp - * - - - javax.servlet - * - - - - - org.apache.hadoop - hadoop-hdfs - test - - - - org.mortbay.jetty - * - - - javax.servlet.jsp - * - - - javax.servlet - * - - - org.apache.hadoop hadoop-hdfs diff --git a/hudi-client/hudi-spark-client/pom.xml b/hudi-client/hudi-spark-client/pom.xml index 0688fedacc2ae..20cccda5ea420 100644 --- a/hudi-client/hudi-spark-client/pom.xml +++ b/hudi-client/hudi-spark-client/pom.xml @@ -90,6 +90,12 @@ + + org.apache.hadoop + hadoop-hdfs-client + ${hadoop.version} + test + org.apache.hbase hbase-testing-util diff --git a/hudi-common/pom.xml b/hudi-common/pom.xml index c18eac7af4372..b5b2a3a47a4ae 100644 --- a/hudi-common/pom.xml +++ b/hudi-common/pom.xml @@ -221,24 +221,6 @@ org.apache.hbase hbase-client ${hbase.version} - - - javax.servlet - * - - - org.codehaus.jackson - * - - - org.mortbay.jetty - * - - - tomcat - * - - diff --git a/pom.xml b/pom.xml index 2bba07c9692a5..300b935aa10a3 100644 --- a/pom.xml +++ b/pom.xml @@ -106,7 +106,7 @@ 2.17.0 1.7.30 2.9.9 - 2.7.3 + 2.10.1 org.apache.hive 2.3.1 core