diff --git a/pom.xml b/pom.xml
index a0402109a003d..458030678d7ea 100644
--- a/pom.xml
+++ b/pom.xml
@@ -69,7 +69,7 @@
2.3.1
0.19.0
2.3.1
- 0.9.0
+ 0.10.0
1.15.1
org.apache.hudi
- hudi-common
+ hudi-presto-bundle
${dep.hudi.version}
- org.apache.hbase
- hbase-server
+ com.esotericsoftware
+ kryo-shaded
- org.apache.orc
- orc-core
+ com.fasterxml.jackson.core
+ *
- org.objenesis
- objenesis
+ com.google.guava
+ guava
- commons-logging
- commons-logging
+ com.google.protobuf
+ protobuf-java
- org.slf4j
- jcl-over-slf4j
+ commons-lang
+ commons-lang
- com.fasterxml.jackson.core
- jackson-annotations
+ commons-logging
+ commons-logging
- com.fasterxml.jackson.core
- jackson-databind
+ log4j
+ log4j
- org.apache.httpcomponents
- httpclient
+ org.apache.avro
+ avro
- org.apache.httpcomponents
- fluent-hc
+ org.apache.hadoop
+ hadoop-common
- org.rocksdb
- rocksdbjni
+ org.apache.hadoop
+ hadoop-mapreduce-client-core
- com.esotericsoftware
- kryo-shaded
+ org.apache.hbase
+ hbase-shaded-server
-
-
-
-
- org.apache.hudi
- hudi-hadoop-mr
- ${dep.hudi.version}
-
- org.apache.hbase
- hbase-server
+ org.apache.htrace
+ htrace-core
- org.apache.orc
- orc-core
+ org.apache.httpcomponents
+ fluent-hc
- org.objenesis
- objenesis
+ org.apache.httpcomponents
+ httpclient
- commons-logging
- commons-logging
+ org.apache.hudi
+ hudi-common
- org.slf4j
- jcl-over-slf4j
+ org.apache.hudi
+ hudi-hadoop-mr-bundle
- com.fasterxml.jackson.core
- jackson-annotations
+ org.apache.orc
+ orc-core
- com.fasterxml.jackson.core
- jackson-databind
+ org.apache.parquet
+ parquet-avro
- org.apache.httpcomponents
- httpclient
+ org.codehaus.jackson
+ *
- org.apache.httpcomponents
- fluent-hc
+ org.lz4
+ lz4-java
+
+
+ org.objenesis
+ objenesis
org.rocksdb
rocksdbjni
- com.esotericsoftware
- kryo-shaded
+ org.slf4j
+ slf4j-log4j12
+
net.sf.opencsv
diff --git a/presto-geospatial/pom.xml b/presto-geospatial/pom.xml
index bc52d84e8ac9d..5edaf05c0bdd0 100644
--- a/presto-geospatial/pom.xml
+++ b/presto-geospatial/pom.xml
@@ -199,6 +199,8 @@
shaded.parquet.it.unimi.dsi.fastutil.*
module-info
+ org.apache.htrace.*
+ org.apache.parquet.avro.*
diff --git a/presto-hive-hadoop2/pom.xml b/presto-hive-hadoop2/pom.xml
index d0be05c1dd440..d1ee66ae00f07 100644
--- a/presto-hive-hadoop2/pom.xml
+++ b/presto-hive-hadoop2/pom.xml
@@ -170,6 +170,8 @@
shaded.parquet.it.unimi.dsi.fastutil.*
module-info
+ org.apache.htrace.*
+ org.apache.parquet.avro.*
@@ -197,6 +199,11 @@
parquet.thrift
about.html
+
+ module-info
+ org.apache.htrace.*
+ org.apache.parquet.avro.*
+
@@ -224,6 +231,11 @@
parquet.thrift
about.html
+
+ module-info
+ org.apache.htrace.*
+ org.apache.parquet.avro.*
+
diff --git a/presto-hive/pom.xml b/presto-hive/pom.xml
index 1cbbfda627ab3..cc5b9582fd1ca 100644
--- a/presto-hive/pom.xml
+++ b/presto-hive/pom.xml
@@ -49,12 +49,7 @@
org.apache.hudi
- hudi-common
-
-
-
- org.apache.hudi
- hudi-hadoop-mr
+ hudi-presto-bundle
@@ -411,6 +406,9 @@
shaded.parquet.it.unimi.dsi.fastutil.*
module-info
+
+ org.apache.htrace.*
+ org.apache.parquet.avro.*
diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/HadoopDirectoryLister.java b/presto-hive/src/main/java/com/facebook/presto/hive/HadoopDirectoryLister.java
index b5f65f28941a7..c6012fc865508 100644
--- a/presto-hive/src/main/java/com/facebook/presto/hive/HadoopDirectoryLister.java
+++ b/presto-hive/src/main/java/com/facebook/presto/hive/HadoopDirectoryLister.java
@@ -45,7 +45,7 @@ public Iterator list(
p -> new HadoopFileInfoIterator(fileSystem.listLocatedStatus(p)),
namenodeStats,
hiveDirectoryContext.getNestedDirectoryPolicy(),
- pathFilter);
+ Optional.of(pathFilter));
}
public static class HadoopFileInfoIterator
diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/HiveClientConfig.java b/presto-hive/src/main/java/com/facebook/presto/hive/HiveClientConfig.java
index 5e21de5df15f0..794d79c29fee8 100644
--- a/presto-hive/src/main/java/com/facebook/presto/hive/HiveClientConfig.java
+++ b/presto-hive/src/main/java/com/facebook/presto/hive/HiveClientConfig.java
@@ -198,6 +198,7 @@ public class HiveClientConfig
private boolean verboseRuntimeStatsEnabled;
private boolean useRecordPageSourceForCustomSplit = true;
+ private boolean hudiMetadataEnabled;
private boolean sizeBasedSplitWeightsEnabled = true;
private double minimumAssignedSplitWeight = 0.05;
@@ -1740,4 +1741,17 @@ public HiveClientConfig setUseRecordPageSourceForCustomSplit(boolean useRecordPa
this.useRecordPageSourceForCustomSplit = useRecordPageSourceForCustomSplit;
return this;
}
+
+ @Config("hive.hudi-metadata-enabled")
+ @ConfigDescription("For Hudi tables prefer to fetch the list of file names, sizes and other metadata from the internal metadata table rather than storage")
+ public HiveClientConfig setHudiMetadataEnabled(boolean hudiMetadataEnabled)
+ {
+ this.hudiMetadataEnabled = hudiMetadataEnabled;
+ return this;
+ }
+
+ public boolean isHudiMetadataEnabled()
+ {
+ return this.hudiMetadataEnabled;
+ }
}
diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/HiveSessionProperties.java b/presto-hive/src/main/java/com/facebook/presto/hive/HiveSessionProperties.java
index c5d97ed310260..9bbb083ea0d52 100644
--- a/presto-hive/src/main/java/com/facebook/presto/hive/HiveSessionProperties.java
+++ b/presto-hive/src/main/java/com/facebook/presto/hive/HiveSessionProperties.java
@@ -133,6 +133,7 @@ public final class HiveSessionProperties
public static final String SIZE_BASED_SPLIT_WEIGHTS_ENABLED = "size_based_split_weights_enabled";
public static final String MINIMUM_ASSIGNED_SPLIT_WEIGHT = "minimum_assigned_split_weight";
private static final String USE_RECORD_PAGE_SOURCE_FOR_CUSTOM_SPLIT = "use_record_page_source_for_custom_split";
+ private static final String HUDI_METADATA_ENABLED = "hudi_metadata_enabled";
private final List> sessionProperties;
@@ -651,6 +652,11 @@ public HiveSessionProperties(HiveClientConfig hiveClientConfig, OrcFileWriterCon
USE_RECORD_PAGE_SOURCE_FOR_CUSTOM_SPLIT,
"Use record page source for custom split",
hiveClientConfig.isUseRecordPageSourceForCustomSplit(),
+ false),
+ booleanProperty(
+ HUDI_METADATA_ENABLED,
+ "For Hudi tables prefer to fetch the list of file names, sizes and other metadata from the internal metadata table rather than storage",
+ hiveClientConfig.isHudiMetadataEnabled(),
false));
}
@@ -1126,4 +1132,9 @@ public static boolean isUseRecordPageSourceForCustomSplit(ConnectorSession sessi
{
return session.getProperty(USE_RECORD_PAGE_SOURCE_FOR_CUSTOM_SPLIT, Boolean.class);
}
+
+ public static boolean isHudiMetadataEnabled(ConnectorSession session)
+ {
+ return session.getProperty(HUDI_METADATA_ENABLED, Boolean.class);
+ }
}
diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/HiveUtil.java b/presto-hive/src/main/java/com/facebook/presto/hive/HiveUtil.java
index 0bf5976fd2e83..1398e8e71fddb 100644
--- a/presto-hive/src/main/java/com/facebook/presto/hive/HiveUtil.java
+++ b/presto-hive/src/main/java/com/facebook/presto/hive/HiveUtil.java
@@ -384,14 +384,19 @@ static boolean shouldUseRecordReaderFromInputFormat(Configuration configuration,
.anyMatch(USE_RECORD_READER_FROM_INPUT_FORMAT_ANNOTATION::equals);
}
- static boolean shouldUseFileSplitsFromInputFormat(InputFormat, ?> inputFormat, Configuration conf, String tablePath)
+ static boolean shouldUseFileSplitsFromInputFormat(InputFormat, ?> inputFormat, DirectoryLister directoryLister)
{
- boolean hasUseSplitsAnnotation = Arrays.stream(inputFormat.getClass().getAnnotations())
- .map(Annotation::annotationType)
- .map(Class::getSimpleName)
- .anyMatch(USE_FILE_SPLITS_FROM_INPUT_FORMAT_ANNOTATION::equals);
+ if (directoryLister instanceof HudiDirectoryLister) {
+ boolean hasUseSplitsAnnotation = Arrays.stream(inputFormat.getClass().getAnnotations())
+ .map(Annotation::annotationType)
+ .map(Class::getSimpleName)
+ .anyMatch(USE_FILE_SPLITS_FROM_INPUT_FORMAT_ANNOTATION::equals);
+
+ return hasUseSplitsAnnotation &&
+ (!isHudiParquetInputFormat(inputFormat) || shouldUseFileSplitsForHudi(inputFormat, ((HudiDirectoryLister) directoryLister).getMetaClient()));
+ }
- return hasUseSplitsAnnotation && (!isHudiParquetInputFormat(inputFormat) || shouldUseFileSplitsForHudi(inputFormat, conf, tablePath));
+ return false;
}
static boolean isHudiParquetInputFormat(InputFormat, ?> inputFormat)
@@ -399,14 +404,13 @@ static boolean isHudiParquetInputFormat(InputFormat, ?> inputFormat)
return inputFormat instanceof HoodieParquetInputFormat;
}
- private static boolean shouldUseFileSplitsForHudi(InputFormat, ?> inputFormat, Configuration conf, String tablePath)
+ private static boolean shouldUseFileSplitsForHudi(InputFormat, ?> inputFormat, HoodieTableMetaClient metaClient)
{
if (inputFormat instanceof HoodieParquetRealtimeInputFormat) {
return true;
}
- HoodieTableMetaClient hoodieTableMetaClient = HoodieTableMetaClient.builder().setConf(conf).setBasePath(tablePath).build();
- return hoodieTableMetaClient.getTableConfig().getBootstrapBasePath().isPresent();
+ return metaClient.getTableConfig().getBootstrapBasePath().isPresent();
}
public static long parseHiveDate(String value)
diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/HudiDirectoryLister.java b/presto-hive/src/main/java/com/facebook/presto/hive/HudiDirectoryLister.java
new file mode 100644
index 0000000000000..c9baa024c94d9
--- /dev/null
+++ b/presto-hive/src/main/java/com/facebook/presto/hive/HudiDirectoryLister.java
@@ -0,0 +1,135 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.facebook.presto.hive;
+
+import com.facebook.airlift.log.Logger;
+import com.facebook.presto.hive.filesystem.ExtendedFileSystem;
+import com.facebook.presto.hive.metastore.Table;
+import com.facebook.presto.hive.util.HiveFileIterator;
+import com.facebook.presto.spi.ConnectorSession;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.BlockLocation;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathFilter;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hudi.common.config.HoodieMetadataConfig;
+import org.apache.hudi.common.engine.HoodieEngineContext;
+import org.apache.hudi.common.engine.HoodieLocalEngineContext;
+import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.model.HoodieBaseFile;
+import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.hudi.common.table.view.FileSystemViewManager;
+import org.apache.hudi.common.table.view.HoodieTableFileSystemView;
+
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.Optional;
+
+import static com.facebook.presto.hive.HiveFileInfo.createHiveFileInfo;
+import static com.facebook.presto.hive.HiveSessionProperties.isHudiMetadataEnabled;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_DEFAULT_PORT;
+
+public class HudiDirectoryLister
+ implements DirectoryLister
+{
+ private static final Logger log = Logger.get(HudiDirectoryLister.class);
+
+ private final HoodieTableFileSystemView fileSystemView;
+ private final HoodieTableMetaClient metaClient;
+ private final boolean metadataEnabled;
+
+ public HudiDirectoryLister(Configuration conf, ConnectorSession session, Table table)
+ {
+ log.info("Using Hudi Directory Lister.");
+ this.metadataEnabled = isHudiMetadataEnabled(session);
+ this.metaClient = HoodieTableMetaClient.builder()
+ .setConf(conf)
+ .setBasePath(table.getStorage().getLocation())
+ .build();
+ HoodieEngineContext engineContext = new HoodieLocalEngineContext(conf);
+ HoodieMetadataConfig metadataConfig = HoodieMetadataConfig.newBuilder()
+ .enable(metadataEnabled)
+ .build();
+ this.fileSystemView = FileSystemViewManager.createInMemoryFileSystemView(engineContext, metaClient, metadataConfig);
+ }
+
+ public HoodieTableMetaClient getMetaClient()
+ {
+ return metaClient;
+ }
+
+ @Override
+ public Iterator list(
+ ExtendedFileSystem fileSystem,
+ Table table,
+ Path path,
+ NamenodeStats namenodeStats,
+ PathFilter pathFilter,
+ HiveDirectoryContext hiveDirectoryContext)
+ {
+ log.debug("Listing path using Hudi directory lister: %s", path.toString());
+ return new HiveFileIterator(
+ path,
+ p -> new HudiFileInfoIterator(
+ fileSystemView,
+ metadataEnabled ? Optional.empty() : Optional.of(fileSystem.listStatus(p)),
+ table.getStorage().getLocation(),
+ p),
+ namenodeStats,
+ hiveDirectoryContext.getNestedDirectoryPolicy(),
+ Optional.empty());
+ }
+
+ public static class HudiFileInfoIterator
+ implements RemoteIterator
+ {
+ private final Iterator hoodieBaseFileIterator;
+
+ public HudiFileInfoIterator(
+ HoodieTableFileSystemView fileSystemView,
+ Optional fileStatuses,
+ String tablePath,
+ Path directory)
+ {
+ String partition = FSUtils.getRelativePartitionPath(new Path(tablePath), directory);
+ if (fileStatuses.isPresent()) {
+ fileSystemView.addFilesToView(fileStatuses.get());
+ this.hoodieBaseFileIterator = fileSystemView.fetchLatestBaseFiles(partition).iterator();
+ }
+ else {
+ this.hoodieBaseFileIterator = fileSystemView.getLatestBaseFiles(partition).iterator();
+ }
+ }
+
+ @Override
+ public boolean hasNext()
+ {
+ return hoodieBaseFileIterator.hasNext();
+ }
+
+ @Override
+ public HiveFileInfo next()
+ throws IOException
+ {
+ FileStatus fileStatus = hoodieBaseFileIterator.next().getFileStatus();
+ String[] name = new String[] {"localhost:" + DFS_DATANODE_DEFAULT_PORT};
+ String[] host = new String[] {"localhost"};
+ LocatedFileStatus hoodieFileStatus = new LocatedFileStatus(fileStatus,
+ new BlockLocation[] {new BlockLocation(name, host, 0L, fileStatus.getLen())});
+ return createHiveFileInfo(hoodieFileStatus, Optional.empty());
+ }
+ }
+}
diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/StoragePartitionLoader.java b/presto-hive/src/main/java/com/facebook/presto/hive/StoragePartitionLoader.java
index 5ebceacfcdbbc..b42f5147968ed 100644
--- a/presto-hive/src/main/java/com/facebook/presto/hive/StoragePartitionLoader.java
+++ b/presto-hive/src/main/java/com/facebook/presto/hive/StoragePartitionLoader.java
@@ -23,10 +23,6 @@
import com.facebook.presto.spi.ConnectorSession;
import com.facebook.presto.spi.PrestoException;
import com.facebook.presto.spi.SchemaTableName;
-import com.google.common.base.Function;
-import com.google.common.cache.CacheBuilder;
-import com.google.common.cache.CacheLoader;
-import com.google.common.cache.LoadingCache;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterators;
@@ -44,7 +40,6 @@
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.TextInputFormat;
-import org.apache.hudi.hadoop.HoodieROTablePathFilter;
import java.io.BufferedReader;
import java.io.IOException;
@@ -85,6 +80,7 @@
import static com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkState;
+import static com.google.common.base.Strings.isNullOrEmpty;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static com.google.common.collect.Maps.fromProperties;
import static com.google.common.collect.Streams.stream;
@@ -110,7 +106,6 @@ public class StoragePartitionLoader
private final ConnectorSession session;
private final Deque> fileIterators;
private final boolean schedulerUsesHostAddresses;
- private final LoadingCache hoodiePathFilterLoadingCache;
private final boolean partialAggregationsPushedDown;
public StoragePartitionLoader(
@@ -132,15 +127,21 @@ public StoragePartitionLoader(
this.session = requireNonNull(session, "session is null");
this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null");
this.namenodeStats = requireNonNull(namenodeStats, "namenodeStats is null");
- this.directoryLister = requireNonNull(directoryLister, "directoryLister is null");
this.recursiveDirWalkerEnabled = recursiveDirWalkerEnabled;
this.hdfsContext = new HdfsContext(session, table.getDatabaseName(), table.getTableName(), table.getStorage().getLocation(), false);
this.fileIterators = requireNonNull(fileIterators, "fileIterators is null");
this.schedulerUsesHostAddresses = schedulerUsesHostAddresses;
- this.hoodiePathFilterLoadingCache = CacheBuilder.newBuilder()
- .maximumSize(1000)
- .build(CacheLoader.from((Function) HoodieROTablePathFilter::new));
this.partialAggregationsPushedDown = partialAggregationsPushedDown;
+
+ Optional directoryListerOverride = Optional.empty();
+ if (!isNullOrEmpty(table.getStorage().getLocation())) {
+ Configuration configuration = hdfsEnvironment.getConfiguration(hdfsContext, new Path(table.getStorage().getLocation()));
+ InputFormat, ?> inputFormat = getInputFormat(configuration, table.getStorage().getStorageFormat().getInputFormat(), false);
+ if (isHudiParquetInputFormat(inputFormat)) {
+ directoryListerOverride = Optional.of(new HudiDirectoryLister(configuration, session, table));
+ }
+ }
+ this.directoryLister = directoryListerOverride.orElseGet(() -> requireNonNull(directoryLister, "directoryLister is null"));
}
@Override
@@ -247,7 +248,7 @@ public ListenableFuture> loadPartition(HivePartitionMetadata partition, HiveSp
schedulerUsesHostAddresses,
partition.getEncryptionInformation());
- if (shouldUseFileSplitsFromInputFormat(inputFormat, configuration, table.getStorage().getLocation())) {
+ if (shouldUseFileSplitsFromInputFormat(inputFormat, directoryLister)) {
if (tableBucketInfo.isPresent()) {
throw new PrestoException(NOT_SUPPORTED, "Presto cannot read bucketed partition in an input format with UseFileSplitsFromInputFormat annotation: " + inputFormat.getClass().getSimpleName());
}
@@ -259,7 +260,7 @@ public ListenableFuture> loadPartition(HivePartitionMetadata partition, HiveSp
return addSplitsToSource(splits, splitFactory, hiveSplitSource, stopped);
}
- PathFilter pathFilter = isHudiParquetInputFormat(inputFormat) ? hoodiePathFilterLoadingCache.getUnchecked(configuration) : path1 -> true;
+ PathFilter pathFilter = path1 -> true;
// S3 Select pushdown works at the granularity of individual S3 objects,
// Partial aggregation pushdown works at the granularity of individual files
// therefore we must not split files when either is enabled.
diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/util/HiveFileIterator.java b/presto-hive/src/main/java/com/facebook/presto/hive/util/HiveFileIterator.java
index 7749eb78148af..20fda0bc96ee7 100644
--- a/presto-hive/src/main/java/com/facebook/presto/hive/util/HiveFileIterator.java
+++ b/presto-hive/src/main/java/com/facebook/presto/hive/util/HiveFileIterator.java
@@ -31,6 +31,7 @@
import java.util.Collections;
import java.util.Deque;
import java.util.Iterator;
+import java.util.Optional;
import static com.facebook.presto.hive.HiveErrorCode.HIVE_FILESYSTEM_ERROR;
import static com.facebook.presto.hive.HiveErrorCode.HIVE_FILE_NOT_FOUND;
@@ -44,7 +45,7 @@ public class HiveFileIterator
private final ListDirectoryOperation listDirectoryOperation;
private final NamenodeStats namenodeStats;
private final NestedDirectoryPolicy nestedDirectoryPolicy;
- private final PathFilter pathFilter;
+ private final Optional pathFilter;
private Iterator remoteIterator = Collections.emptyIterator();
@@ -53,7 +54,7 @@ public HiveFileIterator(
ListDirectoryOperation listDirectoryOperation,
NamenodeStats namenodeStats,
NestedDirectoryPolicy nestedDirectoryPolicy,
- PathFilter pathFilter)
+ Optional pathFilter)
{
paths.addLast(requireNonNull(path, "path is null"));
this.listDirectoryOperation = requireNonNull(listDirectoryOperation, "listDirectoryOperation is null");
@@ -97,10 +98,14 @@ protected HiveFileInfo computeNext()
}
}
- private Iterator getLocatedFileStatusRemoteIterator(Path path, PathFilter pathFilter)
+ private Iterator getLocatedFileStatusRemoteIterator(Path path, Optional pathFilter)
{
try (TimeStat.BlockTimer ignored = namenodeStats.getListLocatedStatus().time()) {
- return Iterators.filter(new FileStatusIterator(path, listDirectoryOperation, namenodeStats), input -> pathFilter.accept(input.getPath()));
+ FileStatusIterator statusIterator = new FileStatusIterator(path, listDirectoryOperation, namenodeStats);
+ if (!pathFilter.isPresent()) {
+ return statusIterator;
+ }
+ return Iterators.filter(new FileStatusIterator(path, listDirectoryOperation, namenodeStats), input -> pathFilter.get().accept(input.getPath()));
}
}
diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/util/HudiRealtimeBootstrapBaseFileSplitConverter.java b/presto-hive/src/main/java/com/facebook/presto/hive/util/HudiRealtimeBootstrapBaseFileSplitConverter.java
index ba4f02ddf8b03..8c4f6119beacf 100644
--- a/presto-hive/src/main/java/com/facebook/presto/hive/util/HudiRealtimeBootstrapBaseFileSplitConverter.java
+++ b/presto-hive/src/main/java/com/facebook/presto/hive/util/HudiRealtimeBootstrapBaseFileSplitConverter.java
@@ -16,6 +16,7 @@
import com.google.common.collect.ImmutableMap;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hudi.common.model.HoodieLogFile;
import org.apache.hudi.hadoop.realtime.RealtimeBootstrapBaseFileSplit;
import java.io.IOException;
@@ -24,6 +25,7 @@
import java.util.List;
import java.util.Map;
import java.util.Optional;
+import java.util.stream.Collectors;
import static com.facebook.presto.hive.HiveUtil.CUSTOM_FILE_SPLIT_CLASS_KEY;
import static com.google.common.base.Strings.isNullOrEmpty;
@@ -68,12 +70,13 @@ public Optional recreateFileSplitWithCustomInfo(FileSplit split, Map<
if (!isNullOrEmpty(customFileSplitClass) && RealtimeBootstrapBaseFileSplit.class.getName().equals(customFileSplitClass)) {
String deltaFilePaths = customSplitInfo.get(DELTA_FILE_PATHS_KEY);
List deltaLogPaths = isNullOrEmpty(deltaFilePaths) ? Collections.emptyList() : Arrays.asList(deltaFilePaths.split(","));
+ List deltaLogFiles = deltaLogPaths.stream().map(p -> new HoodieLogFile(new Path(p))).collect(Collectors.toList());
FileSplit bootstrapFileSplit = new FileSplit(
new Path(customSplitInfo.get(BOOTSTRAP_FILE_SPLIT_PATH)),
parseLong(customSplitInfo.get(BOOTSTRAP_FILE_SPLIT_START)),
parseLong(customSplitInfo.get(BOOTSTRAP_FILE_SPLIT_LEN)),
(String[]) null);
- split = new RealtimeBootstrapBaseFileSplit(split, customSplitInfo.get(BASE_PATH_KEY), deltaLogPaths,
+ split = new RealtimeBootstrapBaseFileSplit(split, customSplitInfo.get(BASE_PATH_KEY), deltaLogFiles,
customSplitInfo.get(MAX_COMMIT_TIME_KEY), bootstrapFileSplit);
return Optional.of(split);
}
diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/util/HudiRealtimeSplitConverter.java b/presto-hive/src/main/java/com/facebook/presto/hive/util/HudiRealtimeSplitConverter.java
index 5edcbc8811575..5868ae1a9fbfd 100644
--- a/presto-hive/src/main/java/com/facebook/presto/hive/util/HudiRealtimeSplitConverter.java
+++ b/presto-hive/src/main/java/com/facebook/presto/hive/util/HudiRealtimeSplitConverter.java
@@ -14,7 +14,9 @@
package com.facebook.presto.hive.util;
import com.google.common.collect.ImmutableMap;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hudi.common.model.HoodieLogFile;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.hadoop.realtime.HoodieRealtimeFileSplit;
@@ -23,6 +25,7 @@
import java.util.List;
import java.util.Map;
import java.util.Optional;
+import java.util.stream.Collectors;
import static com.facebook.presto.hive.HiveUtil.CUSTOM_FILE_SPLIT_CLASS_KEY;
import static java.util.Objects.requireNonNull;
@@ -62,10 +65,11 @@ public Optional recreateFileSplitWithCustomInfo(FileSplit split, Map<
if (HoodieRealtimeFileSplit.class.getName().equals(customSplitClass)) {
requireNonNull(customSplitInfo.get(HUDI_DELTA_FILEPATHS_KEY), "HUDI_DELTA_FILEPATHS_KEY is missing");
List deltaLogPaths = Arrays.asList(customSplitInfo.get(HUDI_DELTA_FILEPATHS_KEY).split(","));
+ List deltaLogFiles = deltaLogPaths.stream().map(p -> new HoodieLogFile(new Path(p))).collect(Collectors.toList());
return Optional.of(new HoodieRealtimeFileSplit(
split,
requireNonNull(customSplitInfo.get(HUDI_BASEPATH_KEY), "HUDI_BASEPATH_KEY is missing"),
- deltaLogPaths,
+ deltaLogFiles,
requireNonNull(customSplitInfo.get(HUDI_MAX_COMMIT_TIME_KEY), "HUDI_MAX_COMMIT_TIME_KEY is missing"),
Option.empty()));
}
diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveClientConfig.java b/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveClientConfig.java
index 17afde2a55a34..abfbd24e4d7f3 100644
--- a/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveClientConfig.java
+++ b/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveClientConfig.java
@@ -160,7 +160,8 @@ public void testDefaults()
.setSizeBasedSplitWeightsEnabled(true)
.setMinimumAssignedSplitWeight(0.05)
.setUserDefinedTypeEncodingEnabled(false)
- .setUseRecordPageSourceForCustomSplit(true));
+ .setUseRecordPageSourceForCustomSplit(true)
+ .setHudiMetadataEnabled(false));
}
@Test
@@ -282,6 +283,7 @@ public void testExplicitPropertyMappings()
.put("hive.user-defined-type-encoding-enabled", "true")
.put("hive.minimum-assigned-split-weight", "1.0")
.put("hive.use-record-page-source-for-custom-split", "false")
+ .put("hive.hudi-metadata-enabled", "true")
.build();
HiveClientConfig expected = new HiveClientConfig()
@@ -399,7 +401,8 @@ public void testExplicitPropertyMappings()
.setSizeBasedSplitWeightsEnabled(false)
.setMinimumAssignedSplitWeight(1.0)
.setUserDefinedTypeEncodingEnabled(true)
- .setUseRecordPageSourceForCustomSplit(false);
+ .setUseRecordPageSourceForCustomSplit(false)
+ .setHudiMetadataEnabled(true);
ConfigAssertions.assertFullMapping(properties, expected);
}
diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/TestHudiDirectoryLister.java b/presto-hive/src/test/java/com/facebook/presto/hive/TestHudiDirectoryLister.java
new file mode 100644
index 0000000000000..0ac9a56e3da78
--- /dev/null
+++ b/presto-hive/src/test/java/com/facebook/presto/hive/TestHudiDirectoryLister.java
@@ -0,0 +1,132 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.facebook.presto.hive;
+
+import com.facebook.presto.hive.filesystem.ExtendedFileSystem;
+import com.facebook.presto.hive.metastore.Storage;
+import com.facebook.presto.hive.metastore.Table;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathFilter;
+import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.hudi.exception.TableNotFoundException;
+import org.testng.annotations.AfterClass;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.Test;
+
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.Optional;
+
+import static com.facebook.presto.hive.BucketFunctionType.HIVE_COMPATIBLE;
+import static com.facebook.presto.hive.HiveStorageFormat.PARQUET;
+import static com.facebook.presto.hive.HiveTestUtils.SESSION;
+import static com.facebook.presto.hive.NestedDirectoryPolicy.IGNORED;
+import static com.facebook.presto.hive.metastore.PrestoTableType.EXTERNAL_TABLE;
+import static com.facebook.presto.hive.metastore.StorageFormat.fromHiveStorageFormat;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertThrows;
+import static org.testng.Assert.assertTrue;
+
+public class TestHudiDirectoryLister
+{
+ private Configuration hadoopConf;
+
+ @BeforeClass
+ private void setup()
+ {
+ hadoopConf = new Configuration();
+ hadoopConf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
+ hadoopConf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());
+ }
+
+ @AfterClass(alwaysRun = true)
+ private void tearDown()
+ {
+ hadoopConf = null;
+ }
+
+ @Test
+ public void testDirectoryListerForHudiTable()
+ throws IOException
+ {
+ Table mockTable = new Table(
+ "schema",
+ "hudi_non_part_cow",
+ "user",
+ EXTERNAL_TABLE,
+ new Storage(fromHiveStorageFormat(PARQUET),
+ getTableBasePath("hudi_non_part_cow"),
+ Optional.of(new HiveBucketProperty(
+ ImmutableList.of(),
+ 1,
+ ImmutableList.of(),
+ HIVE_COMPATIBLE,
+ Optional.empty())),
+ false,
+ ImmutableMap.of(),
+ ImmutableMap.of()),
+ ImmutableList.of(),
+ ImmutableList.of(),
+ ImmutableMap.of(),
+ Optional.empty(),
+ Optional.empty());
+
+ HudiDirectoryLister directoryLister = new HudiDirectoryLister(hadoopConf, SESSION, mockTable);
+ HoodieTableMetaClient metaClient = directoryLister.getMetaClient();
+ assertEquals(metaClient.getBasePath(), mockTable.getStorage().getLocation());
+ Path path = new Path(mockTable.getStorage().getLocation());
+ ExtendedFileSystem fs = (ExtendedFileSystem) path.getFileSystem(hadoopConf);
+ PathFilter pathFilter = path1 -> true;
+ Iterator fileInfoIterator = directoryLister.list(fs, mockTable, path, new NamenodeStats(), pathFilter, new HiveDirectoryContext(IGNORED, false));
+ assertTrue(fileInfoIterator.hasNext());
+ HiveFileInfo fileInfo = fileInfoIterator.next();
+ assertEquals(fileInfo.getPath().getName(), "d0875d00-483d-4e8b-bbbe-c520366c47a0-0_0-6-11_20211217110514527.parquet");
+ }
+
+ @Test
+ public void testDirectoryListerForNonHudiTable()
+ {
+ Table mockTable = new Table(
+ "schema",
+ "non_hudi_table",
+ "user",
+ EXTERNAL_TABLE,
+ new Storage(fromHiveStorageFormat(PARQUET),
+ getTableBasePath("non_hudi_table"),
+ Optional.of(new HiveBucketProperty(
+ ImmutableList.of(),
+ 1,
+ ImmutableList.of(),
+ HIVE_COMPATIBLE,
+ Optional.empty())),
+ false,
+ ImmutableMap.of(),
+ ImmutableMap.of()),
+ ImmutableList.of(),
+ ImmutableList.of(),
+ ImmutableMap.of(),
+ Optional.empty(),
+ Optional.empty());
+
+ assertThrows(TableNotFoundException.class, () -> new HudiDirectoryLister(hadoopConf, SESSION, mockTable));
+ }
+
+ private static String getTableBasePath(String tableName)
+ {
+ return TestHudiDirectoryLister.class.getClassLoader().getResource(tableName).toString();
+ }
+}
diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/util/TestCustomSplitConversionUtils.java b/presto-hive/src/test/java/com/facebook/presto/hive/util/TestCustomSplitConversionUtils.java
index 9fef07716578b..fb65585e50b96 100644
--- a/presto-hive/src/test/java/com/facebook/presto/hive/util/TestCustomSplitConversionUtils.java
+++ b/presto-hive/src/test/java/com/facebook/presto/hive/util/TestCustomSplitConversionUtils.java
@@ -15,6 +15,7 @@
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hudi.common.model.HoodieLogFile;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.hadoop.BootstrapBaseFileSplit;
import org.apache.hudi.hadoop.realtime.HoodieRealtimeFileSplit;
@@ -25,6 +26,7 @@
import java.util.Arrays;
import java.util.List;
import java.util.Map;
+import java.util.stream.Collectors;
import static org.testng.Assert.assertEquals;
@@ -40,11 +42,12 @@ public class TestCustomSplitConversionUtils
public void testHudiRealtimeSplitConverterRoundTrip()
throws IOException
{
- List expectedDeltaLogPaths = Arrays.asList("test1", "test2", "test3");
+ List deltaLogPaths = Arrays.asList("test1", "test2", "test3");
+ List deltaLogFiles = deltaLogPaths.stream().map(p -> new HoodieLogFile(new Path(p))).collect(Collectors.toList());
String expectedMaxCommitTime = "max_commit_time";
FileSplit baseSplit = new FileSplit(FILE_PATH, SPLIT_START_POS, SPLIT_LENGTH, SPLIT_HOSTS);
- FileSplit hudiSplit = new HoodieRealtimeFileSplit(baseSplit, BASE_PATH, expectedDeltaLogPaths, expectedMaxCommitTime, Option.empty());
+ FileSplit hudiSplit = new HoodieRealtimeFileSplit(baseSplit, BASE_PATH, deltaLogFiles, expectedMaxCommitTime, Option.empty());
// Test conversion of HudiSplit -> customSplitInfo
Map customSplitInfo = CustomSplitConversionUtils.extractCustomSplitInfo(hudiSplit);
@@ -57,7 +60,7 @@ public void testHudiRealtimeSplitConverterRoundTrip()
assertEquals(SPLIT_LENGTH, recreatedSplit.getLength());
assertEquals(SPLIT_HOSTS, recreatedSplit.getLocations());
assertEquals(BASE_PATH, recreatedSplit.getBasePath());
- assertEquals(expectedDeltaLogPaths, recreatedSplit.getDeltaLogPaths());
+ assertEquals(deltaLogPaths, recreatedSplit.getDeltaLogPaths());
assertEquals(expectedMaxCommitTime, recreatedSplit.getMaxCommitTime());
}
@@ -95,6 +98,7 @@ public void testHudiRealtimeBootstrapBaseFileSplitConverter()
throws IOException
{
List deltaLogPaths = Arrays.asList("test1", "test2", "test3");
+ List deltaLogFiles = deltaLogPaths.stream().map(p -> new HoodieLogFile(new Path(p))).collect(Collectors.toList());
String maxCommitTime = "max_commit_time";
Path bootstrapSourceFilePath = new Path("/test/source/test.parquet");
@@ -104,7 +108,7 @@ public void testHudiRealtimeBootstrapBaseFileSplitConverter()
FileSplit baseSplit = new FileSplit(FILE_PATH, SPLIT_START_POS, SPLIT_LENGTH, SPLIT_HOSTS);
FileSplit bootstrapSourceSplit = new FileSplit(bootstrapSourceFilePath, bootstrapSourceSplitStartPos, bootstrapSourceSplitLength,
new String[0]);
- FileSplit hudiSplit = new RealtimeBootstrapBaseFileSplit(baseSplit, BASE_PATH, deltaLogPaths, maxCommitTime,
+ FileSplit hudiSplit = new RealtimeBootstrapBaseFileSplit(baseSplit, BASE_PATH, deltaLogFiles, maxCommitTime,
bootstrapSourceSplit);
// Test conversion of HudiSplit -> customSplitInfo
diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/util/TestHiveFileIterator.java b/presto-hive/src/test/java/com/facebook/presto/hive/util/TestHiveFileIterator.java
index b1e332b43a758..a0b2a4c9b3f75 100644
--- a/presto-hive/src/test/java/com/facebook/presto/hive/util/TestHiveFileIterator.java
+++ b/presto-hive/src/test/java/com/facebook/presto/hive/util/TestHiveFileIterator.java
@@ -29,6 +29,7 @@
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
+import java.util.Optional;
import static com.facebook.presto.hive.NestedDirectoryPolicy.IGNORED;
import static com.facebook.presto.hive.NestedDirectoryPolicy.RECURSE;
@@ -77,7 +78,7 @@ public void testDefaultPathFilterNoRecursion()
createFiles(basePath, 5, false);
Path rootPath = new Path("file://" + basePath + File.separator);
PathFilter pathFilter = path -> true;
- HiveFileIterator hiveFileIterator = new HiveFileIterator(rootPath, listDirectoryOperation, new NamenodeStats(), IGNORED, pathFilter);
+ HiveFileIterator hiveFileIterator = new HiveFileIterator(rootPath, listDirectoryOperation, new NamenodeStats(), IGNORED, Optional.of(pathFilter));
int actualCount = Iterators.size(hiveFileIterator);
assertEquals(actualCount, 8);
@@ -108,7 +109,7 @@ public void testDefaultPathFilterWithRecursion()
createFiles(dir2, 4, false);
Path rootPath = new Path("file://" + basePath + File.separator);
PathFilter pathFilter = path -> true;
- HiveFileIterator hiveFileIterator = new HiveFileIterator(rootPath, listDirectoryOperation, new NamenodeStats(), RECURSE, pathFilter);
+ HiveFileIterator hiveFileIterator = new HiveFileIterator(rootPath, listDirectoryOperation, new NamenodeStats(), RECURSE, Optional.of(pathFilter));
int actualCount = Iterators.size(hiveFileIterator);
assertEquals(actualCount, 20);
@@ -129,7 +130,7 @@ public void testPathFilterWithNoRecursion()
createFiles(basePath, 5, false);
Path rootPath = new Path("file://" + basePath + File.separator);
PathFilter pathFilter = path -> path.getName().contains(PATH_FILTER_MATCHED_PREFIX);
- HiveFileIterator hiveFileIterator = new HiveFileIterator(rootPath, listDirectoryOperation, new NamenodeStats(), IGNORED, pathFilter);
+ HiveFileIterator hiveFileIterator = new HiveFileIterator(rootPath, listDirectoryOperation, new NamenodeStats(), IGNORED, Optional.of(pathFilter));
int actualCount = Iterators.size(hiveFileIterator);
assertEquals(actualCount, 3);
@@ -160,7 +161,7 @@ public void testPathFilterWithRecursion()
createFiles(dir2, 4, false);
Path rootPath = new Path("file://" + basePath + File.separator);
PathFilter pathFilter = path -> path.getName().contains(PATH_FILTER_MATCHED_PREFIX);
- HiveFileIterator hiveFileIterator = new HiveFileIterator(rootPath, listDirectoryOperation, new NamenodeStats(), RECURSE, pathFilter);
+ HiveFileIterator hiveFileIterator = new HiveFileIterator(rootPath, listDirectoryOperation, new NamenodeStats(), RECURSE, Optional.of(pathFilter));
int actualCount = Iterators.size(hiveFileIterator);
assertEquals(actualCount, 9);
diff --git a/presto-hive/src/test/resources/hudi_non_part_cow/.hoodie/20211217110514527.commit b/presto-hive/src/test/resources/hudi_non_part_cow/.hoodie/20211217110514527.commit
new file mode 100644
index 0000000000000..f77eeb137f026
--- /dev/null
+++ b/presto-hive/src/test/resources/hudi_non_part_cow/.hoodie/20211217110514527.commit
@@ -0,0 +1,50 @@
+{
+ "partitionToWriteStats" : {
+ "" : [ {
+ "fileId" : "d0875d00-483d-4e8b-bbbe-c520366c47a0-0",
+ "path" : "d0875d00-483d-4e8b-bbbe-c520366c47a0-0_0-6-11_20211217110514527.parquet",
+ "prevCommit" : "null",
+ "numWrites" : 3,
+ "numDeletes" : 0,
+ "numUpdateWrites" : 0,
+ "numInserts" : 3,
+ "totalWriteBytes" : 436273,
+ "totalWriteErrors" : 0,
+ "tempPath" : null,
+ "partitionPath" : "",
+ "totalLogRecords" : 0,
+ "totalLogFilesCompacted" : 0,
+ "totalLogSizeCompacted" : 0,
+ "totalUpdatedRecordsCompacted" : 0,
+ "totalLogBlocks" : 0,
+ "totalCorruptLogBlock" : 0,
+ "totalRollbackBlocks" : 0,
+ "fileSizeInBytes" : 436273,
+ "minEventTime" : null,
+ "maxEventTime" : null
+ } ]
+ },
+ "compacted" : false,
+ "extraMetadata" : {
+ "schema" : "{\"type\":\"record\",\"name\":\"hudi_non_part_cow_record\",\"namespace\":\"hoodie.hudi_non_part_cow\",\"fields\":[{\"name\":\"rowId\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"partitionId\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"preComb\",\"type\":[\"null\",\"long\"],\"default\":null},{\"name\":\"name\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"versionId\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"toBeDeletedStr\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"intToLong\",\"type\":[\"null\",\"int\"],\"default\":null},{\"name\":\"longToInt\",\"type\":[\"null\",\"long\"],\"default\":null}]}"
+ },
+ "operationType" : "INSERT",
+ "writePartitionPaths" : [ "" ],
+ "fileIdAndRelativePaths" : {
+ "d0875d00-483d-4e8b-bbbe-c520366c47a0-0" : "d0875d00-483d-4e8b-bbbe-c520366c47a0-0_0-6-11_20211217110514527.parquet"
+ },
+ "totalRecordsDeleted" : 0,
+ "totalLogRecordsCompacted" : 0,
+ "totalLogFilesCompacted" : 0,
+ "totalCompactedRecordsUpdated" : 0,
+ "totalLogFilesSize" : 0,
+ "totalScanTime" : 0,
+ "totalCreateTime" : 1743,
+ "totalUpsertTime" : 0,
+ "minAndMaxEventTime" : {
+ "Optional.empty" : {
+ "val" : null,
+ "present" : false
+ }
+ }
+}
\ No newline at end of file
diff --git a/presto-hive/src/test/resources/hudi_non_part_cow/.hoodie/20211217110514527.commit.requested b/presto-hive/src/test/resources/hudi_non_part_cow/.hoodie/20211217110514527.commit.requested
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/presto-hive/src/test/resources/hudi_non_part_cow/.hoodie/20211217110514527.inflight b/presto-hive/src/test/resources/hudi_non_part_cow/.hoodie/20211217110514527.inflight
new file mode 100644
index 0000000000000..6605bcaf9b36c
--- /dev/null
+++ b/presto-hive/src/test/resources/hudi_non_part_cow/.hoodie/20211217110514527.inflight
@@ -0,0 +1,48 @@
+{
+ "partitionToWriteStats" : {
+ "" : [ {
+ "fileId" : "",
+ "path" : null,
+ "prevCommit" : "null",
+ "numWrites" : 0,
+ "numDeletes" : 0,
+ "numUpdateWrites" : 0,
+ "numInserts" : 3,
+ "totalWriteBytes" : 0,
+ "totalWriteErrors" : 0,
+ "tempPath" : null,
+ "partitionPath" : null,
+ "totalLogRecords" : 0,
+ "totalLogFilesCompacted" : 0,
+ "totalLogSizeCompacted" : 0,
+ "totalUpdatedRecordsCompacted" : 0,
+ "totalLogBlocks" : 0,
+ "totalCorruptLogBlock" : 0,
+ "totalRollbackBlocks" : 0,
+ "fileSizeInBytes" : 0,
+ "minEventTime" : null,
+ "maxEventTime" : null
+ } ]
+ },
+ "compacted" : false,
+ "extraMetadata" : { },
+ "operationType" : "INSERT",
+ "writePartitionPaths" : [ "" ],
+ "fileIdAndRelativePaths" : {
+ "" : null
+ },
+ "totalRecordsDeleted" : 0,
+ "totalLogRecordsCompacted" : 0,
+ "totalLogFilesCompacted" : 0,
+ "totalCompactedRecordsUpdated" : 0,
+ "totalLogFilesSize" : 0,
+ "totalScanTime" : 0,
+ "totalCreateTime" : 0,
+ "totalUpsertTime" : 0,
+ "minAndMaxEventTime" : {
+ "Optional.empty" : {
+ "val" : null,
+ "present" : false
+ }
+ }
+}
\ No newline at end of file
diff --git a/presto-hive/src/test/resources/hudi_non_part_cow/.hoodie/hoodie.properties b/presto-hive/src/test/resources/hudi_non_part_cow/.hoodie/hoodie.properties
new file mode 100644
index 0000000000000..3d03fa7915c39
--- /dev/null
+++ b/presto-hive/src/test/resources/hudi_non_part_cow/.hoodie/hoodie.properties
@@ -0,0 +1,14 @@
+#Properties saved on Fri Dec 17 11:05:14 UTC 2021
+#Fri Dec 17 11:05:14 UTC 2021
+hoodie.table.precombine.field=preComb
+hoodie.table.partition.fields=
+hoodie.table.type=COPY_ON_WRITE
+hoodie.archivelog.folder=archived
+hoodie.populate.meta.fields=true
+hoodie.timeline.layout.version=1
+hoodie.table.version=3
+hoodie.table.recordkey.fields=rowId
+hoodie.table.base.file.format=PARQUET
+hoodie.table.keygenerator.class=org.apache.hudi.keygen.NonpartitionedKeyGenerator
+hoodie.table.name=hudi_non_part_cow
+hoodie.datasource.write.hive_style_partitioning=false
diff --git a/presto-hive/src/test/resources/hudi_non_part_cow/.hoodie_partition_metadata b/presto-hive/src/test/resources/hudi_non_part_cow/.hoodie_partition_metadata
new file mode 100644
index 0000000000000..f2149eb6cd5a3
--- /dev/null
+++ b/presto-hive/src/test/resources/hudi_non_part_cow/.hoodie_partition_metadata
@@ -0,0 +1,4 @@
+#partition metadata
+#Fri Dec 17 11:05:23 UTC 2021
+commitTime=20211217110514527
+partitionDepth=0
diff --git a/presto-hive/src/test/resources/hudi_non_part_cow/d0875d00-483d-4e8b-bbbe-c520366c47a0-0_0-6-11_20211217110514527.parquet b/presto-hive/src/test/resources/hudi_non_part_cow/d0875d00-483d-4e8b-bbbe-c520366c47a0-0_0-6-11_20211217110514527.parquet
new file mode 100644
index 0000000000000..52de8719bf62d
Binary files /dev/null and b/presto-hive/src/test/resources/hudi_non_part_cow/d0875d00-483d-4e8b-bbbe-c520366c47a0-0_0-6-11_20211217110514527.parquet differ
diff --git a/presto-hive/src/test/resources/non_hudi_table/d0875d00-483d-4e8b-bbbe-c520366c47a0-0_0-6-11_20211217110514527.parquet b/presto-hive/src/test/resources/non_hudi_table/d0875d00-483d-4e8b-bbbe-c520366c47a0-0_0-6-11_20211217110514527.parquet
new file mode 100644
index 0000000000000..52de8719bf62d
Binary files /dev/null and b/presto-hive/src/test/resources/non_hudi_table/d0875d00-483d-4e8b-bbbe-c520366c47a0-0_0-6-11_20211217110514527.parquet differ
diff --git a/presto-iceberg/pom.xml b/presto-iceberg/pom.xml
index 99792c859b921..693fd3557d057 100644
--- a/presto-iceberg/pom.xml
+++ b/presto-iceberg/pom.xml
@@ -424,6 +424,7 @@
module-info
org.apache.avro.*
org.apache.parquet.*
+ org.apache.htrace.*
diff --git a/presto-jdbc/pom.xml b/presto-jdbc/pom.xml
index b2d81274da41e..72adfb10e41c0 100644
--- a/presto-jdbc/pom.xml
+++ b/presto-jdbc/pom.xml
@@ -207,6 +207,8 @@
shaded.parquet.it.unimi.dsi.fastutil.*
module-info
+ org.apache.htrace.*
+ org.apache.parquet.avro.*
diff --git a/presto-spark-base/pom.xml b/presto-spark-base/pom.xml
index 58a2f2ed21127..1ec66fa0e3cc9 100644
--- a/presto-spark-base/pom.xml
+++ b/presto-spark-base/pom.xml
@@ -249,6 +249,9 @@
shaded.parquet.it.unimi.dsi.fastutil.*
module-info
+ com.esotericsoftware.reflectasm.*
+ org.apache.htrace.*
+ org.apache.parquet.avro.*
diff --git a/presto-spark-testing/pom.xml b/presto-spark-testing/pom.xml
index 84c6b0c3be436..b468f7a62a48c 100644
--- a/presto-spark-testing/pom.xml
+++ b/presto-spark-testing/pom.xml
@@ -130,6 +130,8 @@
shaded.parquet.it.unimi.dsi.fastutil.*
module-info
+ org.apache.htrace.*
+ org.apache.parquet.avro.*