diff --git a/pom.xml b/pom.xml index a0402109a003d..458030678d7ea 100644 --- a/pom.xml +++ b/pom.xml @@ -69,7 +69,7 @@ 2.3.1 0.19.0 2.3.1 - 0.9.0 + 0.10.0 1.15.1 org.apache.hudi - hudi-common + hudi-presto-bundle ${dep.hudi.version} - org.apache.hbase - hbase-server + com.esotericsoftware + kryo-shaded - org.apache.orc - orc-core + com.fasterxml.jackson.core + * - org.objenesis - objenesis + com.google.guava + guava - commons-logging - commons-logging + com.google.protobuf + protobuf-java - org.slf4j - jcl-over-slf4j + commons-lang + commons-lang - com.fasterxml.jackson.core - jackson-annotations + commons-logging + commons-logging - com.fasterxml.jackson.core - jackson-databind + log4j + log4j - org.apache.httpcomponents - httpclient + org.apache.avro + avro - org.apache.httpcomponents - fluent-hc + org.apache.hadoop + hadoop-common - org.rocksdb - rocksdbjni + org.apache.hadoop + hadoop-mapreduce-client-core - com.esotericsoftware - kryo-shaded + org.apache.hbase + hbase-shaded-server - - - - - org.apache.hudi - hudi-hadoop-mr - ${dep.hudi.version} - - org.apache.hbase - hbase-server + org.apache.htrace + htrace-core - org.apache.orc - orc-core + org.apache.httpcomponents + fluent-hc - org.objenesis - objenesis + org.apache.httpcomponents + httpclient - commons-logging - commons-logging + org.apache.hudi + hudi-common - org.slf4j - jcl-over-slf4j + org.apache.hudi + hudi-hadoop-mr-bundle - com.fasterxml.jackson.core - jackson-annotations + org.apache.orc + orc-core - com.fasterxml.jackson.core - jackson-databind + org.apache.parquet + parquet-avro - org.apache.httpcomponents - httpclient + org.codehaus.jackson + * - org.apache.httpcomponents - fluent-hc + org.lz4 + lz4-java + + + org.objenesis + objenesis org.rocksdb rocksdbjni - com.esotericsoftware - kryo-shaded + org.slf4j + slf4j-log4j12 + net.sf.opencsv diff --git a/presto-geospatial/pom.xml b/presto-geospatial/pom.xml index bc52d84e8ac9d..5edaf05c0bdd0 100644 --- a/presto-geospatial/pom.xml +++ b/presto-geospatial/pom.xml @@ -199,6 +199,8 @@ shaded.parquet.it.unimi.dsi.fastutil.* module-info + org.apache.htrace.* + org.apache.parquet.avro.* diff --git a/presto-hive-hadoop2/pom.xml b/presto-hive-hadoop2/pom.xml index d0be05c1dd440..d1ee66ae00f07 100644 --- a/presto-hive-hadoop2/pom.xml +++ b/presto-hive-hadoop2/pom.xml @@ -170,6 +170,8 @@ shaded.parquet.it.unimi.dsi.fastutil.* module-info + org.apache.htrace.* + org.apache.parquet.avro.* @@ -197,6 +199,11 @@ parquet.thrift about.html + + module-info + org.apache.htrace.* + org.apache.parquet.avro.* + @@ -224,6 +231,11 @@ parquet.thrift about.html + + module-info + org.apache.htrace.* + org.apache.parquet.avro.* + diff --git a/presto-hive/pom.xml b/presto-hive/pom.xml index 1cbbfda627ab3..cc5b9582fd1ca 100644 --- a/presto-hive/pom.xml +++ b/presto-hive/pom.xml @@ -49,12 +49,7 @@ org.apache.hudi - hudi-common - - - - org.apache.hudi - hudi-hadoop-mr + hudi-presto-bundle @@ -411,6 +406,9 @@ shaded.parquet.it.unimi.dsi.fastutil.* module-info + + org.apache.htrace.* + org.apache.parquet.avro.* diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/HadoopDirectoryLister.java b/presto-hive/src/main/java/com/facebook/presto/hive/HadoopDirectoryLister.java index b5f65f28941a7..c6012fc865508 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/HadoopDirectoryLister.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/HadoopDirectoryLister.java @@ -45,7 +45,7 @@ public Iterator list( p -> new HadoopFileInfoIterator(fileSystem.listLocatedStatus(p)), namenodeStats, hiveDirectoryContext.getNestedDirectoryPolicy(), - pathFilter); + Optional.of(pathFilter)); } public static class HadoopFileInfoIterator diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/HiveClientConfig.java b/presto-hive/src/main/java/com/facebook/presto/hive/HiveClientConfig.java index 5e21de5df15f0..794d79c29fee8 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/HiveClientConfig.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/HiveClientConfig.java @@ -198,6 +198,7 @@ public class HiveClientConfig private boolean verboseRuntimeStatsEnabled; private boolean useRecordPageSourceForCustomSplit = true; + private boolean hudiMetadataEnabled; private boolean sizeBasedSplitWeightsEnabled = true; private double minimumAssignedSplitWeight = 0.05; @@ -1740,4 +1741,17 @@ public HiveClientConfig setUseRecordPageSourceForCustomSplit(boolean useRecordPa this.useRecordPageSourceForCustomSplit = useRecordPageSourceForCustomSplit; return this; } + + @Config("hive.hudi-metadata-enabled") + @ConfigDescription("For Hudi tables prefer to fetch the list of file names, sizes and other metadata from the internal metadata table rather than storage") + public HiveClientConfig setHudiMetadataEnabled(boolean hudiMetadataEnabled) + { + this.hudiMetadataEnabled = hudiMetadataEnabled; + return this; + } + + public boolean isHudiMetadataEnabled() + { + return this.hudiMetadataEnabled; + } } diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/HiveSessionProperties.java b/presto-hive/src/main/java/com/facebook/presto/hive/HiveSessionProperties.java index c5d97ed310260..9bbb083ea0d52 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/HiveSessionProperties.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/HiveSessionProperties.java @@ -133,6 +133,7 @@ public final class HiveSessionProperties public static final String SIZE_BASED_SPLIT_WEIGHTS_ENABLED = "size_based_split_weights_enabled"; public static final String MINIMUM_ASSIGNED_SPLIT_WEIGHT = "minimum_assigned_split_weight"; private static final String USE_RECORD_PAGE_SOURCE_FOR_CUSTOM_SPLIT = "use_record_page_source_for_custom_split"; + private static final String HUDI_METADATA_ENABLED = "hudi_metadata_enabled"; private final List> sessionProperties; @@ -651,6 +652,11 @@ public HiveSessionProperties(HiveClientConfig hiveClientConfig, OrcFileWriterCon USE_RECORD_PAGE_SOURCE_FOR_CUSTOM_SPLIT, "Use record page source for custom split", hiveClientConfig.isUseRecordPageSourceForCustomSplit(), + false), + booleanProperty( + HUDI_METADATA_ENABLED, + "For Hudi tables prefer to fetch the list of file names, sizes and other metadata from the internal metadata table rather than storage", + hiveClientConfig.isHudiMetadataEnabled(), false)); } @@ -1126,4 +1132,9 @@ public static boolean isUseRecordPageSourceForCustomSplit(ConnectorSession sessi { return session.getProperty(USE_RECORD_PAGE_SOURCE_FOR_CUSTOM_SPLIT, Boolean.class); } + + public static boolean isHudiMetadataEnabled(ConnectorSession session) + { + return session.getProperty(HUDI_METADATA_ENABLED, Boolean.class); + } } diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/HiveUtil.java b/presto-hive/src/main/java/com/facebook/presto/hive/HiveUtil.java index 0bf5976fd2e83..1398e8e71fddb 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/HiveUtil.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/HiveUtil.java @@ -384,14 +384,19 @@ static boolean shouldUseRecordReaderFromInputFormat(Configuration configuration, .anyMatch(USE_RECORD_READER_FROM_INPUT_FORMAT_ANNOTATION::equals); } - static boolean shouldUseFileSplitsFromInputFormat(InputFormat inputFormat, Configuration conf, String tablePath) + static boolean shouldUseFileSplitsFromInputFormat(InputFormat inputFormat, DirectoryLister directoryLister) { - boolean hasUseSplitsAnnotation = Arrays.stream(inputFormat.getClass().getAnnotations()) - .map(Annotation::annotationType) - .map(Class::getSimpleName) - .anyMatch(USE_FILE_SPLITS_FROM_INPUT_FORMAT_ANNOTATION::equals); + if (directoryLister instanceof HudiDirectoryLister) { + boolean hasUseSplitsAnnotation = Arrays.stream(inputFormat.getClass().getAnnotations()) + .map(Annotation::annotationType) + .map(Class::getSimpleName) + .anyMatch(USE_FILE_SPLITS_FROM_INPUT_FORMAT_ANNOTATION::equals); + + return hasUseSplitsAnnotation && + (!isHudiParquetInputFormat(inputFormat) || shouldUseFileSplitsForHudi(inputFormat, ((HudiDirectoryLister) directoryLister).getMetaClient())); + } - return hasUseSplitsAnnotation && (!isHudiParquetInputFormat(inputFormat) || shouldUseFileSplitsForHudi(inputFormat, conf, tablePath)); + return false; } static boolean isHudiParquetInputFormat(InputFormat inputFormat) @@ -399,14 +404,13 @@ static boolean isHudiParquetInputFormat(InputFormat inputFormat) return inputFormat instanceof HoodieParquetInputFormat; } - private static boolean shouldUseFileSplitsForHudi(InputFormat inputFormat, Configuration conf, String tablePath) + private static boolean shouldUseFileSplitsForHudi(InputFormat inputFormat, HoodieTableMetaClient metaClient) { if (inputFormat instanceof HoodieParquetRealtimeInputFormat) { return true; } - HoodieTableMetaClient hoodieTableMetaClient = HoodieTableMetaClient.builder().setConf(conf).setBasePath(tablePath).build(); - return hoodieTableMetaClient.getTableConfig().getBootstrapBasePath().isPresent(); + return metaClient.getTableConfig().getBootstrapBasePath().isPresent(); } public static long parseHiveDate(String value) diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/HudiDirectoryLister.java b/presto-hive/src/main/java/com/facebook/presto/hive/HudiDirectoryLister.java new file mode 100644 index 0000000000000..c9baa024c94d9 --- /dev/null +++ b/presto-hive/src/main/java/com/facebook/presto/hive/HudiDirectoryLister.java @@ -0,0 +1,135 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.hive; + +import com.facebook.airlift.log.Logger; +import com.facebook.presto.hive.filesystem.ExtendedFileSystem; +import com.facebook.presto.hive.metastore.Table; +import com.facebook.presto.hive.util.HiveFileIterator; +import com.facebook.presto.spi.ConnectorSession; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.BlockLocation; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.LocatedFileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathFilter; +import org.apache.hadoop.fs.RemoteIterator; +import org.apache.hudi.common.config.HoodieMetadataConfig; +import org.apache.hudi.common.engine.HoodieEngineContext; +import org.apache.hudi.common.engine.HoodieLocalEngineContext; +import org.apache.hudi.common.fs.FSUtils; +import org.apache.hudi.common.model.HoodieBaseFile; +import org.apache.hudi.common.table.HoodieTableMetaClient; +import org.apache.hudi.common.table.view.FileSystemViewManager; +import org.apache.hudi.common.table.view.HoodieTableFileSystemView; + +import java.io.IOException; +import java.util.Iterator; +import java.util.Optional; + +import static com.facebook.presto.hive.HiveFileInfo.createHiveFileInfo; +import static com.facebook.presto.hive.HiveSessionProperties.isHudiMetadataEnabled; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_DEFAULT_PORT; + +public class HudiDirectoryLister + implements DirectoryLister +{ + private static final Logger log = Logger.get(HudiDirectoryLister.class); + + private final HoodieTableFileSystemView fileSystemView; + private final HoodieTableMetaClient metaClient; + private final boolean metadataEnabled; + + public HudiDirectoryLister(Configuration conf, ConnectorSession session, Table table) + { + log.info("Using Hudi Directory Lister."); + this.metadataEnabled = isHudiMetadataEnabled(session); + this.metaClient = HoodieTableMetaClient.builder() + .setConf(conf) + .setBasePath(table.getStorage().getLocation()) + .build(); + HoodieEngineContext engineContext = new HoodieLocalEngineContext(conf); + HoodieMetadataConfig metadataConfig = HoodieMetadataConfig.newBuilder() + .enable(metadataEnabled) + .build(); + this.fileSystemView = FileSystemViewManager.createInMemoryFileSystemView(engineContext, metaClient, metadataConfig); + } + + public HoodieTableMetaClient getMetaClient() + { + return metaClient; + } + + @Override + public Iterator list( + ExtendedFileSystem fileSystem, + Table table, + Path path, + NamenodeStats namenodeStats, + PathFilter pathFilter, + HiveDirectoryContext hiveDirectoryContext) + { + log.debug("Listing path using Hudi directory lister: %s", path.toString()); + return new HiveFileIterator( + path, + p -> new HudiFileInfoIterator( + fileSystemView, + metadataEnabled ? Optional.empty() : Optional.of(fileSystem.listStatus(p)), + table.getStorage().getLocation(), + p), + namenodeStats, + hiveDirectoryContext.getNestedDirectoryPolicy(), + Optional.empty()); + } + + public static class HudiFileInfoIterator + implements RemoteIterator + { + private final Iterator hoodieBaseFileIterator; + + public HudiFileInfoIterator( + HoodieTableFileSystemView fileSystemView, + Optional fileStatuses, + String tablePath, + Path directory) + { + String partition = FSUtils.getRelativePartitionPath(new Path(tablePath), directory); + if (fileStatuses.isPresent()) { + fileSystemView.addFilesToView(fileStatuses.get()); + this.hoodieBaseFileIterator = fileSystemView.fetchLatestBaseFiles(partition).iterator(); + } + else { + this.hoodieBaseFileIterator = fileSystemView.getLatestBaseFiles(partition).iterator(); + } + } + + @Override + public boolean hasNext() + { + return hoodieBaseFileIterator.hasNext(); + } + + @Override + public HiveFileInfo next() + throws IOException + { + FileStatus fileStatus = hoodieBaseFileIterator.next().getFileStatus(); + String[] name = new String[] {"localhost:" + DFS_DATANODE_DEFAULT_PORT}; + String[] host = new String[] {"localhost"}; + LocatedFileStatus hoodieFileStatus = new LocatedFileStatus(fileStatus, + new BlockLocation[] {new BlockLocation(name, host, 0L, fileStatus.getLen())}); + return createHiveFileInfo(hoodieFileStatus, Optional.empty()); + } + } +} diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/StoragePartitionLoader.java b/presto-hive/src/main/java/com/facebook/presto/hive/StoragePartitionLoader.java index 5ebceacfcdbbc..b42f5147968ed 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/StoragePartitionLoader.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/StoragePartitionLoader.java @@ -23,10 +23,6 @@ import com.facebook.presto.spi.ConnectorSession; import com.facebook.presto.spi.PrestoException; import com.facebook.presto.spi.SchemaTableName; -import com.google.common.base.Function; -import com.google.common.cache.CacheBuilder; -import com.google.common.cache.CacheLoader; -import com.google.common.cache.LoadingCache; import com.google.common.collect.ArrayListMultimap; import com.google.common.collect.ImmutableList; import com.google.common.collect.Iterators; @@ -44,7 +40,6 @@ import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.TextInputFormat; -import org.apache.hudi.hadoop.HoodieROTablePathFilter; import java.io.BufferedReader; import java.io.IOException; @@ -85,6 +80,7 @@ import static com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkState; +import static com.google.common.base.Strings.isNullOrEmpty; import static com.google.common.collect.ImmutableList.toImmutableList; import static com.google.common.collect.Maps.fromProperties; import static com.google.common.collect.Streams.stream; @@ -110,7 +106,6 @@ public class StoragePartitionLoader private final ConnectorSession session; private final Deque> fileIterators; private final boolean schedulerUsesHostAddresses; - private final LoadingCache hoodiePathFilterLoadingCache; private final boolean partialAggregationsPushedDown; public StoragePartitionLoader( @@ -132,15 +127,21 @@ public StoragePartitionLoader( this.session = requireNonNull(session, "session is null"); this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null"); this.namenodeStats = requireNonNull(namenodeStats, "namenodeStats is null"); - this.directoryLister = requireNonNull(directoryLister, "directoryLister is null"); this.recursiveDirWalkerEnabled = recursiveDirWalkerEnabled; this.hdfsContext = new HdfsContext(session, table.getDatabaseName(), table.getTableName(), table.getStorage().getLocation(), false); this.fileIterators = requireNonNull(fileIterators, "fileIterators is null"); this.schedulerUsesHostAddresses = schedulerUsesHostAddresses; - this.hoodiePathFilterLoadingCache = CacheBuilder.newBuilder() - .maximumSize(1000) - .build(CacheLoader.from((Function) HoodieROTablePathFilter::new)); this.partialAggregationsPushedDown = partialAggregationsPushedDown; + + Optional directoryListerOverride = Optional.empty(); + if (!isNullOrEmpty(table.getStorage().getLocation())) { + Configuration configuration = hdfsEnvironment.getConfiguration(hdfsContext, new Path(table.getStorage().getLocation())); + InputFormat inputFormat = getInputFormat(configuration, table.getStorage().getStorageFormat().getInputFormat(), false); + if (isHudiParquetInputFormat(inputFormat)) { + directoryListerOverride = Optional.of(new HudiDirectoryLister(configuration, session, table)); + } + } + this.directoryLister = directoryListerOverride.orElseGet(() -> requireNonNull(directoryLister, "directoryLister is null")); } @Override @@ -247,7 +248,7 @@ public ListenableFuture loadPartition(HivePartitionMetadata partition, HiveSp schedulerUsesHostAddresses, partition.getEncryptionInformation()); - if (shouldUseFileSplitsFromInputFormat(inputFormat, configuration, table.getStorage().getLocation())) { + if (shouldUseFileSplitsFromInputFormat(inputFormat, directoryLister)) { if (tableBucketInfo.isPresent()) { throw new PrestoException(NOT_SUPPORTED, "Presto cannot read bucketed partition in an input format with UseFileSplitsFromInputFormat annotation: " + inputFormat.getClass().getSimpleName()); } @@ -259,7 +260,7 @@ public ListenableFuture loadPartition(HivePartitionMetadata partition, HiveSp return addSplitsToSource(splits, splitFactory, hiveSplitSource, stopped); } - PathFilter pathFilter = isHudiParquetInputFormat(inputFormat) ? hoodiePathFilterLoadingCache.getUnchecked(configuration) : path1 -> true; + PathFilter pathFilter = path1 -> true; // S3 Select pushdown works at the granularity of individual S3 objects, // Partial aggregation pushdown works at the granularity of individual files // therefore we must not split files when either is enabled. diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/util/HiveFileIterator.java b/presto-hive/src/main/java/com/facebook/presto/hive/util/HiveFileIterator.java index 7749eb78148af..20fda0bc96ee7 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/util/HiveFileIterator.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/util/HiveFileIterator.java @@ -31,6 +31,7 @@ import java.util.Collections; import java.util.Deque; import java.util.Iterator; +import java.util.Optional; import static com.facebook.presto.hive.HiveErrorCode.HIVE_FILESYSTEM_ERROR; import static com.facebook.presto.hive.HiveErrorCode.HIVE_FILE_NOT_FOUND; @@ -44,7 +45,7 @@ public class HiveFileIterator private final ListDirectoryOperation listDirectoryOperation; private final NamenodeStats namenodeStats; private final NestedDirectoryPolicy nestedDirectoryPolicy; - private final PathFilter pathFilter; + private final Optional pathFilter; private Iterator remoteIterator = Collections.emptyIterator(); @@ -53,7 +54,7 @@ public HiveFileIterator( ListDirectoryOperation listDirectoryOperation, NamenodeStats namenodeStats, NestedDirectoryPolicy nestedDirectoryPolicy, - PathFilter pathFilter) + Optional pathFilter) { paths.addLast(requireNonNull(path, "path is null")); this.listDirectoryOperation = requireNonNull(listDirectoryOperation, "listDirectoryOperation is null"); @@ -97,10 +98,14 @@ protected HiveFileInfo computeNext() } } - private Iterator getLocatedFileStatusRemoteIterator(Path path, PathFilter pathFilter) + private Iterator getLocatedFileStatusRemoteIterator(Path path, Optional pathFilter) { try (TimeStat.BlockTimer ignored = namenodeStats.getListLocatedStatus().time()) { - return Iterators.filter(new FileStatusIterator(path, listDirectoryOperation, namenodeStats), input -> pathFilter.accept(input.getPath())); + FileStatusIterator statusIterator = new FileStatusIterator(path, listDirectoryOperation, namenodeStats); + if (!pathFilter.isPresent()) { + return statusIterator; + } + return Iterators.filter(new FileStatusIterator(path, listDirectoryOperation, namenodeStats), input -> pathFilter.get().accept(input.getPath())); } } diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/util/HudiRealtimeBootstrapBaseFileSplitConverter.java b/presto-hive/src/main/java/com/facebook/presto/hive/util/HudiRealtimeBootstrapBaseFileSplitConverter.java index ba4f02ddf8b03..8c4f6119beacf 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/util/HudiRealtimeBootstrapBaseFileSplitConverter.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/util/HudiRealtimeBootstrapBaseFileSplitConverter.java @@ -16,6 +16,7 @@ import com.google.common.collect.ImmutableMap; import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapred.FileSplit; +import org.apache.hudi.common.model.HoodieLogFile; import org.apache.hudi.hadoop.realtime.RealtimeBootstrapBaseFileSplit; import java.io.IOException; @@ -24,6 +25,7 @@ import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.stream.Collectors; import static com.facebook.presto.hive.HiveUtil.CUSTOM_FILE_SPLIT_CLASS_KEY; import static com.google.common.base.Strings.isNullOrEmpty; @@ -68,12 +70,13 @@ public Optional recreateFileSplitWithCustomInfo(FileSplit split, Map< if (!isNullOrEmpty(customFileSplitClass) && RealtimeBootstrapBaseFileSplit.class.getName().equals(customFileSplitClass)) { String deltaFilePaths = customSplitInfo.get(DELTA_FILE_PATHS_KEY); List deltaLogPaths = isNullOrEmpty(deltaFilePaths) ? Collections.emptyList() : Arrays.asList(deltaFilePaths.split(",")); + List deltaLogFiles = deltaLogPaths.stream().map(p -> new HoodieLogFile(new Path(p))).collect(Collectors.toList()); FileSplit bootstrapFileSplit = new FileSplit( new Path(customSplitInfo.get(BOOTSTRAP_FILE_SPLIT_PATH)), parseLong(customSplitInfo.get(BOOTSTRAP_FILE_SPLIT_START)), parseLong(customSplitInfo.get(BOOTSTRAP_FILE_SPLIT_LEN)), (String[]) null); - split = new RealtimeBootstrapBaseFileSplit(split, customSplitInfo.get(BASE_PATH_KEY), deltaLogPaths, + split = new RealtimeBootstrapBaseFileSplit(split, customSplitInfo.get(BASE_PATH_KEY), deltaLogFiles, customSplitInfo.get(MAX_COMMIT_TIME_KEY), bootstrapFileSplit); return Optional.of(split); } diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/util/HudiRealtimeSplitConverter.java b/presto-hive/src/main/java/com/facebook/presto/hive/util/HudiRealtimeSplitConverter.java index 5edcbc8811575..5868ae1a9fbfd 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/util/HudiRealtimeSplitConverter.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/util/HudiRealtimeSplitConverter.java @@ -14,7 +14,9 @@ package com.facebook.presto.hive.util; import com.google.common.collect.ImmutableMap; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapred.FileSplit; +import org.apache.hudi.common.model.HoodieLogFile; import org.apache.hudi.common.util.Option; import org.apache.hudi.hadoop.realtime.HoodieRealtimeFileSplit; @@ -23,6 +25,7 @@ import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.stream.Collectors; import static com.facebook.presto.hive.HiveUtil.CUSTOM_FILE_SPLIT_CLASS_KEY; import static java.util.Objects.requireNonNull; @@ -62,10 +65,11 @@ public Optional recreateFileSplitWithCustomInfo(FileSplit split, Map< if (HoodieRealtimeFileSplit.class.getName().equals(customSplitClass)) { requireNonNull(customSplitInfo.get(HUDI_DELTA_FILEPATHS_KEY), "HUDI_DELTA_FILEPATHS_KEY is missing"); List deltaLogPaths = Arrays.asList(customSplitInfo.get(HUDI_DELTA_FILEPATHS_KEY).split(",")); + List deltaLogFiles = deltaLogPaths.stream().map(p -> new HoodieLogFile(new Path(p))).collect(Collectors.toList()); return Optional.of(new HoodieRealtimeFileSplit( split, requireNonNull(customSplitInfo.get(HUDI_BASEPATH_KEY), "HUDI_BASEPATH_KEY is missing"), - deltaLogPaths, + deltaLogFiles, requireNonNull(customSplitInfo.get(HUDI_MAX_COMMIT_TIME_KEY), "HUDI_MAX_COMMIT_TIME_KEY is missing"), Option.empty())); } diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveClientConfig.java b/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveClientConfig.java index 17afde2a55a34..abfbd24e4d7f3 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveClientConfig.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveClientConfig.java @@ -160,7 +160,8 @@ public void testDefaults() .setSizeBasedSplitWeightsEnabled(true) .setMinimumAssignedSplitWeight(0.05) .setUserDefinedTypeEncodingEnabled(false) - .setUseRecordPageSourceForCustomSplit(true)); + .setUseRecordPageSourceForCustomSplit(true) + .setHudiMetadataEnabled(false)); } @Test @@ -282,6 +283,7 @@ public void testExplicitPropertyMappings() .put("hive.user-defined-type-encoding-enabled", "true") .put("hive.minimum-assigned-split-weight", "1.0") .put("hive.use-record-page-source-for-custom-split", "false") + .put("hive.hudi-metadata-enabled", "true") .build(); HiveClientConfig expected = new HiveClientConfig() @@ -399,7 +401,8 @@ public void testExplicitPropertyMappings() .setSizeBasedSplitWeightsEnabled(false) .setMinimumAssignedSplitWeight(1.0) .setUserDefinedTypeEncodingEnabled(true) - .setUseRecordPageSourceForCustomSplit(false); + .setUseRecordPageSourceForCustomSplit(false) + .setHudiMetadataEnabled(true); ConfigAssertions.assertFullMapping(properties, expected); } diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/TestHudiDirectoryLister.java b/presto-hive/src/test/java/com/facebook/presto/hive/TestHudiDirectoryLister.java new file mode 100644 index 0000000000000..0ac9a56e3da78 --- /dev/null +++ b/presto-hive/src/test/java/com/facebook/presto/hive/TestHudiDirectoryLister.java @@ -0,0 +1,132 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.hive; + +import com.facebook.presto.hive.filesystem.ExtendedFileSystem; +import com.facebook.presto.hive.metastore.Storage; +import com.facebook.presto.hive.metastore.Table; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathFilter; +import org.apache.hudi.common.table.HoodieTableMetaClient; +import org.apache.hudi.exception.TableNotFoundException; +import org.testng.annotations.AfterClass; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +import java.io.IOException; +import java.util.Iterator; +import java.util.Optional; + +import static com.facebook.presto.hive.BucketFunctionType.HIVE_COMPATIBLE; +import static com.facebook.presto.hive.HiveStorageFormat.PARQUET; +import static com.facebook.presto.hive.HiveTestUtils.SESSION; +import static com.facebook.presto.hive.NestedDirectoryPolicy.IGNORED; +import static com.facebook.presto.hive.metastore.PrestoTableType.EXTERNAL_TABLE; +import static com.facebook.presto.hive.metastore.StorageFormat.fromHiveStorageFormat; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertThrows; +import static org.testng.Assert.assertTrue; + +public class TestHudiDirectoryLister +{ + private Configuration hadoopConf; + + @BeforeClass + private void setup() + { + hadoopConf = new Configuration(); + hadoopConf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); + hadoopConf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName()); + } + + @AfterClass(alwaysRun = true) + private void tearDown() + { + hadoopConf = null; + } + + @Test + public void testDirectoryListerForHudiTable() + throws IOException + { + Table mockTable = new Table( + "schema", + "hudi_non_part_cow", + "user", + EXTERNAL_TABLE, + new Storage(fromHiveStorageFormat(PARQUET), + getTableBasePath("hudi_non_part_cow"), + Optional.of(new HiveBucketProperty( + ImmutableList.of(), + 1, + ImmutableList.of(), + HIVE_COMPATIBLE, + Optional.empty())), + false, + ImmutableMap.of(), + ImmutableMap.of()), + ImmutableList.of(), + ImmutableList.of(), + ImmutableMap.of(), + Optional.empty(), + Optional.empty()); + + HudiDirectoryLister directoryLister = new HudiDirectoryLister(hadoopConf, SESSION, mockTable); + HoodieTableMetaClient metaClient = directoryLister.getMetaClient(); + assertEquals(metaClient.getBasePath(), mockTable.getStorage().getLocation()); + Path path = new Path(mockTable.getStorage().getLocation()); + ExtendedFileSystem fs = (ExtendedFileSystem) path.getFileSystem(hadoopConf); + PathFilter pathFilter = path1 -> true; + Iterator fileInfoIterator = directoryLister.list(fs, mockTable, path, new NamenodeStats(), pathFilter, new HiveDirectoryContext(IGNORED, false)); + assertTrue(fileInfoIterator.hasNext()); + HiveFileInfo fileInfo = fileInfoIterator.next(); + assertEquals(fileInfo.getPath().getName(), "d0875d00-483d-4e8b-bbbe-c520366c47a0-0_0-6-11_20211217110514527.parquet"); + } + + @Test + public void testDirectoryListerForNonHudiTable() + { + Table mockTable = new Table( + "schema", + "non_hudi_table", + "user", + EXTERNAL_TABLE, + new Storage(fromHiveStorageFormat(PARQUET), + getTableBasePath("non_hudi_table"), + Optional.of(new HiveBucketProperty( + ImmutableList.of(), + 1, + ImmutableList.of(), + HIVE_COMPATIBLE, + Optional.empty())), + false, + ImmutableMap.of(), + ImmutableMap.of()), + ImmutableList.of(), + ImmutableList.of(), + ImmutableMap.of(), + Optional.empty(), + Optional.empty()); + + assertThrows(TableNotFoundException.class, () -> new HudiDirectoryLister(hadoopConf, SESSION, mockTable)); + } + + private static String getTableBasePath(String tableName) + { + return TestHudiDirectoryLister.class.getClassLoader().getResource(tableName).toString(); + } +} diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/util/TestCustomSplitConversionUtils.java b/presto-hive/src/test/java/com/facebook/presto/hive/util/TestCustomSplitConversionUtils.java index 9fef07716578b..fb65585e50b96 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/util/TestCustomSplitConversionUtils.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/util/TestCustomSplitConversionUtils.java @@ -15,6 +15,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapred.FileSplit; +import org.apache.hudi.common.model.HoodieLogFile; import org.apache.hudi.common.util.Option; import org.apache.hudi.hadoop.BootstrapBaseFileSplit; import org.apache.hudi.hadoop.realtime.HoodieRealtimeFileSplit; @@ -25,6 +26,7 @@ import java.util.Arrays; import java.util.List; import java.util.Map; +import java.util.stream.Collectors; import static org.testng.Assert.assertEquals; @@ -40,11 +42,12 @@ public class TestCustomSplitConversionUtils public void testHudiRealtimeSplitConverterRoundTrip() throws IOException { - List expectedDeltaLogPaths = Arrays.asList("test1", "test2", "test3"); + List deltaLogPaths = Arrays.asList("test1", "test2", "test3"); + List deltaLogFiles = deltaLogPaths.stream().map(p -> new HoodieLogFile(new Path(p))).collect(Collectors.toList()); String expectedMaxCommitTime = "max_commit_time"; FileSplit baseSplit = new FileSplit(FILE_PATH, SPLIT_START_POS, SPLIT_LENGTH, SPLIT_HOSTS); - FileSplit hudiSplit = new HoodieRealtimeFileSplit(baseSplit, BASE_PATH, expectedDeltaLogPaths, expectedMaxCommitTime, Option.empty()); + FileSplit hudiSplit = new HoodieRealtimeFileSplit(baseSplit, BASE_PATH, deltaLogFiles, expectedMaxCommitTime, Option.empty()); // Test conversion of HudiSplit -> customSplitInfo Map customSplitInfo = CustomSplitConversionUtils.extractCustomSplitInfo(hudiSplit); @@ -57,7 +60,7 @@ public void testHudiRealtimeSplitConverterRoundTrip() assertEquals(SPLIT_LENGTH, recreatedSplit.getLength()); assertEquals(SPLIT_HOSTS, recreatedSplit.getLocations()); assertEquals(BASE_PATH, recreatedSplit.getBasePath()); - assertEquals(expectedDeltaLogPaths, recreatedSplit.getDeltaLogPaths()); + assertEquals(deltaLogPaths, recreatedSplit.getDeltaLogPaths()); assertEquals(expectedMaxCommitTime, recreatedSplit.getMaxCommitTime()); } @@ -95,6 +98,7 @@ public void testHudiRealtimeBootstrapBaseFileSplitConverter() throws IOException { List deltaLogPaths = Arrays.asList("test1", "test2", "test3"); + List deltaLogFiles = deltaLogPaths.stream().map(p -> new HoodieLogFile(new Path(p))).collect(Collectors.toList()); String maxCommitTime = "max_commit_time"; Path bootstrapSourceFilePath = new Path("/test/source/test.parquet"); @@ -104,7 +108,7 @@ public void testHudiRealtimeBootstrapBaseFileSplitConverter() FileSplit baseSplit = new FileSplit(FILE_PATH, SPLIT_START_POS, SPLIT_LENGTH, SPLIT_HOSTS); FileSplit bootstrapSourceSplit = new FileSplit(bootstrapSourceFilePath, bootstrapSourceSplitStartPos, bootstrapSourceSplitLength, new String[0]); - FileSplit hudiSplit = new RealtimeBootstrapBaseFileSplit(baseSplit, BASE_PATH, deltaLogPaths, maxCommitTime, + FileSplit hudiSplit = new RealtimeBootstrapBaseFileSplit(baseSplit, BASE_PATH, deltaLogFiles, maxCommitTime, bootstrapSourceSplit); // Test conversion of HudiSplit -> customSplitInfo diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/util/TestHiveFileIterator.java b/presto-hive/src/test/java/com/facebook/presto/hive/util/TestHiveFileIterator.java index b1e332b43a758..a0b2a4c9b3f75 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/util/TestHiveFileIterator.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/util/TestHiveFileIterator.java @@ -29,6 +29,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; +import java.util.Optional; import static com.facebook.presto.hive.NestedDirectoryPolicy.IGNORED; import static com.facebook.presto.hive.NestedDirectoryPolicy.RECURSE; @@ -77,7 +78,7 @@ public void testDefaultPathFilterNoRecursion() createFiles(basePath, 5, false); Path rootPath = new Path("file://" + basePath + File.separator); PathFilter pathFilter = path -> true; - HiveFileIterator hiveFileIterator = new HiveFileIterator(rootPath, listDirectoryOperation, new NamenodeStats(), IGNORED, pathFilter); + HiveFileIterator hiveFileIterator = new HiveFileIterator(rootPath, listDirectoryOperation, new NamenodeStats(), IGNORED, Optional.of(pathFilter)); int actualCount = Iterators.size(hiveFileIterator); assertEquals(actualCount, 8); @@ -108,7 +109,7 @@ public void testDefaultPathFilterWithRecursion() createFiles(dir2, 4, false); Path rootPath = new Path("file://" + basePath + File.separator); PathFilter pathFilter = path -> true; - HiveFileIterator hiveFileIterator = new HiveFileIterator(rootPath, listDirectoryOperation, new NamenodeStats(), RECURSE, pathFilter); + HiveFileIterator hiveFileIterator = new HiveFileIterator(rootPath, listDirectoryOperation, new NamenodeStats(), RECURSE, Optional.of(pathFilter)); int actualCount = Iterators.size(hiveFileIterator); assertEquals(actualCount, 20); @@ -129,7 +130,7 @@ public void testPathFilterWithNoRecursion() createFiles(basePath, 5, false); Path rootPath = new Path("file://" + basePath + File.separator); PathFilter pathFilter = path -> path.getName().contains(PATH_FILTER_MATCHED_PREFIX); - HiveFileIterator hiveFileIterator = new HiveFileIterator(rootPath, listDirectoryOperation, new NamenodeStats(), IGNORED, pathFilter); + HiveFileIterator hiveFileIterator = new HiveFileIterator(rootPath, listDirectoryOperation, new NamenodeStats(), IGNORED, Optional.of(pathFilter)); int actualCount = Iterators.size(hiveFileIterator); assertEquals(actualCount, 3); @@ -160,7 +161,7 @@ public void testPathFilterWithRecursion() createFiles(dir2, 4, false); Path rootPath = new Path("file://" + basePath + File.separator); PathFilter pathFilter = path -> path.getName().contains(PATH_FILTER_MATCHED_PREFIX); - HiveFileIterator hiveFileIterator = new HiveFileIterator(rootPath, listDirectoryOperation, new NamenodeStats(), RECURSE, pathFilter); + HiveFileIterator hiveFileIterator = new HiveFileIterator(rootPath, listDirectoryOperation, new NamenodeStats(), RECURSE, Optional.of(pathFilter)); int actualCount = Iterators.size(hiveFileIterator); assertEquals(actualCount, 9); diff --git a/presto-hive/src/test/resources/hudi_non_part_cow/.hoodie/20211217110514527.commit b/presto-hive/src/test/resources/hudi_non_part_cow/.hoodie/20211217110514527.commit new file mode 100644 index 0000000000000..f77eeb137f026 --- /dev/null +++ b/presto-hive/src/test/resources/hudi_non_part_cow/.hoodie/20211217110514527.commit @@ -0,0 +1,50 @@ +{ + "partitionToWriteStats" : { + "" : [ { + "fileId" : "d0875d00-483d-4e8b-bbbe-c520366c47a0-0", + "path" : "d0875d00-483d-4e8b-bbbe-c520366c47a0-0_0-6-11_20211217110514527.parquet", + "prevCommit" : "null", + "numWrites" : 3, + "numDeletes" : 0, + "numUpdateWrites" : 0, + "numInserts" : 3, + "totalWriteBytes" : 436273, + "totalWriteErrors" : 0, + "tempPath" : null, + "partitionPath" : "", + "totalLogRecords" : 0, + "totalLogFilesCompacted" : 0, + "totalLogSizeCompacted" : 0, + "totalUpdatedRecordsCompacted" : 0, + "totalLogBlocks" : 0, + "totalCorruptLogBlock" : 0, + "totalRollbackBlocks" : 0, + "fileSizeInBytes" : 436273, + "minEventTime" : null, + "maxEventTime" : null + } ] + }, + "compacted" : false, + "extraMetadata" : { + "schema" : "{\"type\":\"record\",\"name\":\"hudi_non_part_cow_record\",\"namespace\":\"hoodie.hudi_non_part_cow\",\"fields\":[{\"name\":\"rowId\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"partitionId\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"preComb\",\"type\":[\"null\",\"long\"],\"default\":null},{\"name\":\"name\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"versionId\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"toBeDeletedStr\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"intToLong\",\"type\":[\"null\",\"int\"],\"default\":null},{\"name\":\"longToInt\",\"type\":[\"null\",\"long\"],\"default\":null}]}" + }, + "operationType" : "INSERT", + "writePartitionPaths" : [ "" ], + "fileIdAndRelativePaths" : { + "d0875d00-483d-4e8b-bbbe-c520366c47a0-0" : "d0875d00-483d-4e8b-bbbe-c520366c47a0-0_0-6-11_20211217110514527.parquet" + }, + "totalRecordsDeleted" : 0, + "totalLogRecordsCompacted" : 0, + "totalLogFilesCompacted" : 0, + "totalCompactedRecordsUpdated" : 0, + "totalLogFilesSize" : 0, + "totalScanTime" : 0, + "totalCreateTime" : 1743, + "totalUpsertTime" : 0, + "minAndMaxEventTime" : { + "Optional.empty" : { + "val" : null, + "present" : false + } + } +} \ No newline at end of file diff --git a/presto-hive/src/test/resources/hudi_non_part_cow/.hoodie/20211217110514527.commit.requested b/presto-hive/src/test/resources/hudi_non_part_cow/.hoodie/20211217110514527.commit.requested new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/presto-hive/src/test/resources/hudi_non_part_cow/.hoodie/20211217110514527.inflight b/presto-hive/src/test/resources/hudi_non_part_cow/.hoodie/20211217110514527.inflight new file mode 100644 index 0000000000000..6605bcaf9b36c --- /dev/null +++ b/presto-hive/src/test/resources/hudi_non_part_cow/.hoodie/20211217110514527.inflight @@ -0,0 +1,48 @@ +{ + "partitionToWriteStats" : { + "" : [ { + "fileId" : "", + "path" : null, + "prevCommit" : "null", + "numWrites" : 0, + "numDeletes" : 0, + "numUpdateWrites" : 0, + "numInserts" : 3, + "totalWriteBytes" : 0, + "totalWriteErrors" : 0, + "tempPath" : null, + "partitionPath" : null, + "totalLogRecords" : 0, + "totalLogFilesCompacted" : 0, + "totalLogSizeCompacted" : 0, + "totalUpdatedRecordsCompacted" : 0, + "totalLogBlocks" : 0, + "totalCorruptLogBlock" : 0, + "totalRollbackBlocks" : 0, + "fileSizeInBytes" : 0, + "minEventTime" : null, + "maxEventTime" : null + } ] + }, + "compacted" : false, + "extraMetadata" : { }, + "operationType" : "INSERT", + "writePartitionPaths" : [ "" ], + "fileIdAndRelativePaths" : { + "" : null + }, + "totalRecordsDeleted" : 0, + "totalLogRecordsCompacted" : 0, + "totalLogFilesCompacted" : 0, + "totalCompactedRecordsUpdated" : 0, + "totalLogFilesSize" : 0, + "totalScanTime" : 0, + "totalCreateTime" : 0, + "totalUpsertTime" : 0, + "minAndMaxEventTime" : { + "Optional.empty" : { + "val" : null, + "present" : false + } + } +} \ No newline at end of file diff --git a/presto-hive/src/test/resources/hudi_non_part_cow/.hoodie/hoodie.properties b/presto-hive/src/test/resources/hudi_non_part_cow/.hoodie/hoodie.properties new file mode 100644 index 0000000000000..3d03fa7915c39 --- /dev/null +++ b/presto-hive/src/test/resources/hudi_non_part_cow/.hoodie/hoodie.properties @@ -0,0 +1,14 @@ +#Properties saved on Fri Dec 17 11:05:14 UTC 2021 +#Fri Dec 17 11:05:14 UTC 2021 +hoodie.table.precombine.field=preComb +hoodie.table.partition.fields= +hoodie.table.type=COPY_ON_WRITE +hoodie.archivelog.folder=archived +hoodie.populate.meta.fields=true +hoodie.timeline.layout.version=1 +hoodie.table.version=3 +hoodie.table.recordkey.fields=rowId +hoodie.table.base.file.format=PARQUET +hoodie.table.keygenerator.class=org.apache.hudi.keygen.NonpartitionedKeyGenerator +hoodie.table.name=hudi_non_part_cow +hoodie.datasource.write.hive_style_partitioning=false diff --git a/presto-hive/src/test/resources/hudi_non_part_cow/.hoodie_partition_metadata b/presto-hive/src/test/resources/hudi_non_part_cow/.hoodie_partition_metadata new file mode 100644 index 0000000000000..f2149eb6cd5a3 --- /dev/null +++ b/presto-hive/src/test/resources/hudi_non_part_cow/.hoodie_partition_metadata @@ -0,0 +1,4 @@ +#partition metadata +#Fri Dec 17 11:05:23 UTC 2021 +commitTime=20211217110514527 +partitionDepth=0 diff --git a/presto-hive/src/test/resources/hudi_non_part_cow/d0875d00-483d-4e8b-bbbe-c520366c47a0-0_0-6-11_20211217110514527.parquet b/presto-hive/src/test/resources/hudi_non_part_cow/d0875d00-483d-4e8b-bbbe-c520366c47a0-0_0-6-11_20211217110514527.parquet new file mode 100644 index 0000000000000..52de8719bf62d Binary files /dev/null and b/presto-hive/src/test/resources/hudi_non_part_cow/d0875d00-483d-4e8b-bbbe-c520366c47a0-0_0-6-11_20211217110514527.parquet differ diff --git a/presto-hive/src/test/resources/non_hudi_table/d0875d00-483d-4e8b-bbbe-c520366c47a0-0_0-6-11_20211217110514527.parquet b/presto-hive/src/test/resources/non_hudi_table/d0875d00-483d-4e8b-bbbe-c520366c47a0-0_0-6-11_20211217110514527.parquet new file mode 100644 index 0000000000000..52de8719bf62d Binary files /dev/null and b/presto-hive/src/test/resources/non_hudi_table/d0875d00-483d-4e8b-bbbe-c520366c47a0-0_0-6-11_20211217110514527.parquet differ diff --git a/presto-iceberg/pom.xml b/presto-iceberg/pom.xml index 99792c859b921..693fd3557d057 100644 --- a/presto-iceberg/pom.xml +++ b/presto-iceberg/pom.xml @@ -424,6 +424,7 @@ module-info org.apache.avro.* org.apache.parquet.* + org.apache.htrace.* diff --git a/presto-jdbc/pom.xml b/presto-jdbc/pom.xml index b2d81274da41e..72adfb10e41c0 100644 --- a/presto-jdbc/pom.xml +++ b/presto-jdbc/pom.xml @@ -207,6 +207,8 @@ shaded.parquet.it.unimi.dsi.fastutil.* module-info + org.apache.htrace.* + org.apache.parquet.avro.* diff --git a/presto-spark-base/pom.xml b/presto-spark-base/pom.xml index 58a2f2ed21127..1ec66fa0e3cc9 100644 --- a/presto-spark-base/pom.xml +++ b/presto-spark-base/pom.xml @@ -249,6 +249,9 @@ shaded.parquet.it.unimi.dsi.fastutil.* module-info + com.esotericsoftware.reflectasm.* + org.apache.htrace.* + org.apache.parquet.avro.* diff --git a/presto-spark-testing/pom.xml b/presto-spark-testing/pom.xml index 84c6b0c3be436..b468f7a62a48c 100644 --- a/presto-spark-testing/pom.xml +++ b/presto-spark-testing/pom.xml @@ -130,6 +130,8 @@ shaded.parquet.it.unimi.dsi.fastutil.* module-info + org.apache.htrace.* + org.apache.parquet.avro.*