diff --git a/plugin/trino-hudi/pom.xml b/plugin/trino-hudi/pom.xml
index 850104555cfe..4084ab27374f 100644
--- a/plugin/trino-hudi/pom.xml
+++ b/plugin/trino-hudi/pom.xml
@@ -49,11 +49,6 @@
trino-plugin-toolkit
-
- io.trino.hadoop
- hadoop-apache
-
-
io.trino.hive
hive-apache
@@ -94,6 +89,11 @@
units
+
+ com.fasterxml.jackson.core
+ jackson-databind
+
+
com.google.code.findbugs
jsr305
@@ -110,6 +110,12 @@
guice
+
+ com.linkedin.calcite
+ calcite-core
+ shaded
+
+
javax.annotation
javax.annotation-api
@@ -131,91 +137,8 @@
- org.apache.hudi
- hudi-common
- ${dep.hudi.version}
-
-
- org.apache.hbase
- hbase-server
-
-
- org.apache.hbase
- hbase-client
-
-
- org.osgi
- org.osgi.core
-
-
- org.apache.orc
- orc-core
-
-
- com.fasterxml.jackson.core
- jackson-annotations
-
-
- com.fasterxml.jackson.core
- jackson-databind
-
-
- org.apache.httpcomponents
- httpclient
-
-
- org.apache.httpcomponents
- fluent-hc
-
-
- org.rocksdb
- rocksdbjni
-
-
- com.esotericsoftware
- kryo-shaded
-
-
- org.apache.hadoop
- hadoop-client
-
-
- org.apache.hadoop
- hadoop-hdfs
-
-
- org.apache.httpcomponents
- httpcore
-
-
- org.apache.hive
- hive-exec
-
-
- org.apache.hive
- hive-jdbc
-
-
- com.github.ben-manes.caffeine
- caffeine
-
-
- org.lz4
- lz4-java
-
-
-
-
-
- org.apache.hudi
- hudi-hadoop-mr
- ${dep.hudi.version}
-
-
- *
- *
-
-
+ org.apache.avro
+ avro
@@ -231,14 +154,14 @@
- io.airlift
- log-manager
+ io.trino.hadoop
+ hadoop-apache
runtime
- org.apache.avro
- avro
+ io.airlift
+ log-manager
runtime
@@ -424,6 +347,19 @@
+
+ org.apache.hudi
+ hudi-common
+ ${dep.hudi.version}
+ test
+
+
+ *
+ *
+
+
+
+
org.apache.hudi
hudi-java-client
diff --git a/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiErrorCode.java b/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiErrorCode.java
index e2eca0c4e61e..68e50edba22f 100644
--- a/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiErrorCode.java
+++ b/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiErrorCode.java
@@ -30,7 +30,8 @@ public enum HudiErrorCode
HUDI_UNSUPPORTED_FILE_FORMAT(5, EXTERNAL),
HUDI_CURSOR_ERROR(6, EXTERNAL),
HUDI_FILESYSTEM_ERROR(7, EXTERNAL),
- HUDI_PARTITION_NOT_FOUND(8, EXTERNAL);
+ HUDI_PARTITION_NOT_FOUND(8, EXTERNAL),
+ HUDI_UNSUPPORTED_TABLE_TYPE(9, EXTERNAL);
private final ErrorCode errorCode;
diff --git a/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiFileStatus.java b/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiFileStatus.java
index ded7389110cf..56d585db8772 100644
--- a/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiFileStatus.java
+++ b/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiFileStatus.java
@@ -13,14 +13,14 @@
*/
package io.trino.plugin.hudi;
-import org.apache.hadoop.fs.Path;
+import io.trino.filesystem.Location;
import static java.util.Objects.requireNonNull;
-public record HudiFileStatus(Path path, boolean isDirectory, long length, long modificationTime, long blockSize)
+public record HudiFileStatus(Location location, boolean isDirectory, long length, long modificationTime, long blockSize)
{
public HudiFileStatus
{
- requireNonNull(path, "path is null");
+ requireNonNull(location, "location is null");
}
}
diff --git a/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiMetadata.java b/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiMetadata.java
index 32f953d10aa7..43520a67157b 100644
--- a/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiMetadata.java
+++ b/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiMetadata.java
@@ -16,11 +16,8 @@
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import io.airlift.log.Logger;
-import io.trino.filesystem.FileIterator;
import io.trino.filesystem.Location;
-import io.trino.filesystem.TrinoFileSystem;
import io.trino.filesystem.TrinoFileSystemFactory;
-import io.trino.hdfs.HdfsEnvironment;
import io.trino.plugin.base.classloader.ClassLoaderSafeSystemTable;
import io.trino.plugin.hive.HiveColumnHandle;
import io.trino.plugin.hive.metastore.Column;
@@ -43,7 +40,6 @@
import io.trino.spi.predicate.TupleDomain;
import io.trino.spi.type.TypeManager;
-import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
@@ -60,18 +56,17 @@
import static io.trino.plugin.hive.util.HiveUtil.columnMetadataGetter;
import static io.trino.plugin.hive.util.HiveUtil.hiveColumnHandles;
import static io.trino.plugin.hive.util.HiveUtil.isHiveSystemSchema;
-import static io.trino.plugin.hudi.HudiErrorCode.HUDI_FILESYSTEM_ERROR;
import static io.trino.plugin.hudi.HudiSessionProperties.getColumnsToHide;
import static io.trino.plugin.hudi.HudiTableProperties.LOCATION_PROPERTY;
import static io.trino.plugin.hudi.HudiTableProperties.PARTITIONED_BY_PROPERTY;
-import static io.trino.plugin.hudi.model.HoodieTableType.COPY_ON_WRITE;
+import static io.trino.plugin.hudi.HudiUtil.isHudiTable;
+import static io.trino.plugin.hudi.model.HudiTableType.COPY_ON_WRITE;
import static io.trino.spi.StandardErrorCode.UNSUPPORTED_TABLE_TYPE;
import static io.trino.spi.connector.SchemaTableName.schemaTableName;
import static java.lang.String.format;
import static java.util.Collections.singletonList;
import static java.util.Objects.requireNonNull;
import static java.util.function.Function.identity;
-import static org.apache.hudi.common.table.HoodieTableMetaClient.METAFOLDER_NAME;
public class HudiMetadata
implements ConnectorMetadata
@@ -79,14 +74,12 @@ public class HudiMetadata
public static final Logger log = Logger.get(HudiMetadata.class);
private final HiveMetastore metastore;
- private final HdfsEnvironment hdfsEnvironment;
private final TrinoFileSystemFactory fileSystemFactory;
private final TypeManager typeManager;
- public HudiMetadata(HiveMetastore metastore, HdfsEnvironment hdfsEnvironment, TrinoFileSystemFactory fileSystemFactory, TypeManager typeManager)
+ public HudiMetadata(HiveMetastore metastore, TrinoFileSystemFactory fileSystemFactory, TypeManager typeManager)
{
this.metastore = requireNonNull(metastore, "metastore is null");
- this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null");
this.fileSystemFactory = requireNonNull(fileSystemFactory, "fileSystemFactory is null");
this.typeManager = requireNonNull(typeManager, "typeManager is null");
}
@@ -109,7 +102,7 @@ public HudiTableHandle getTableHandle(ConnectorSession session, SchemaTableName
if (table.isEmpty()) {
return null;
}
- if (!isHudiTable(session, table.get())) {
+ if (!isHudiTable(fileSystemFactory.create(session), Location.of(table.get().getStorage().getLocation()))) {
throw new TrinoException(UNSUPPORTED_TABLE_TYPE, format("Not a Hudi table: %s", tableName));
}
return new HudiTableHandle(
@@ -124,11 +117,11 @@ public HudiTableHandle getTableHandle(ConnectorSession session, SchemaTableName
@Override
public Optional getSystemTable(ConnectorSession session, SchemaTableName tableName)
{
- return getRawSystemTable(tableName)
+ return getRawSystemTable(tableName, session)
.map(systemTable -> new ClassLoaderSafeSystemTable(systemTable, getClass().getClassLoader()));
}
- private Optional getRawSystemTable(SchemaTableName tableName)
+ private Optional getRawSystemTable(SchemaTableName tableName, ConnectorSession session)
{
HudiTableName name = HudiTableName.from(tableName.getTableName());
if (name.getTableType() == TableType.DATA) {
@@ -144,7 +137,7 @@ private Optional getRawSystemTable(SchemaTableName tableName)
break;
case TIMELINE:
SchemaTableName systemTableName = new SchemaTableName(tableName.getSchemaName(), name.getTableNameWithType());
- return Optional.of(new TimelineTable(hdfsEnvironment, systemTableName, tableOptional.get()));
+ return Optional.of(new TimelineTable(fileSystemFactory.create(session), systemTableName, tableOptional.get()));
}
return Optional.empty();
}
@@ -227,31 +220,6 @@ HiveMetastore getMetastore()
return metastore;
}
- private boolean isHudiTable(ConnectorSession session, Table table)
- {
- String basePath = table.getStorage().getLocation();
- try {
- Location baseLocation = Location.of(basePath);
- Location metaLocation = baseLocation.appendPath(METAFOLDER_NAME);
-
- TrinoFileSystem trinoFileSystem = fileSystemFactory.create(session);
- FileIterator iterator = trinoFileSystem.listFiles(metaLocation);
- // If there is at least one file in the .hoodie directory, it's a valid Hudi table
- if (!iterator.hasNext()) {
- log.warn("Could not find Hudi table at path '%s'.", basePath);
- return false;
- }
- }
- catch (IllegalArgumentException e) {
- log.warn("Could not find Hudi table at path '%s'. Error: %s", basePath, e.getMessage());
- return false;
- }
- catch (IOException e) {
- throw new TrinoException(HUDI_FILESYSTEM_ERROR, format("Could not check if %s is a valid table", basePath), e);
- }
- return true;
- }
-
private Optional getTableColumnMetadata(ConnectorSession session, SchemaTableName table)
{
try {
diff --git a/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiMetadataFactory.java b/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiMetadataFactory.java
index 6ec7ec71564e..7f855394a4c9 100644
--- a/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiMetadataFactory.java
+++ b/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiMetadataFactory.java
@@ -14,7 +14,6 @@
package io.trino.plugin.hudi;
import io.trino.filesystem.TrinoFileSystemFactory;
-import io.trino.hdfs.HdfsEnvironment;
import io.trino.plugin.hive.metastore.HiveMetastore;
import io.trino.plugin.hive.metastore.HiveMetastoreFactory;
import io.trino.spi.security.ConnectorIdentity;
@@ -29,15 +28,13 @@
public class HudiMetadataFactory
{
private final HiveMetastoreFactory metastoreFactory;
- private final HdfsEnvironment hdfsEnvironment;
private final TrinoFileSystemFactory fileSystemFactory;
private final TypeManager typeManager;
@Inject
- public HudiMetadataFactory(HiveMetastoreFactory metastoreFactory, HdfsEnvironment hdfsEnvironment, TrinoFileSystemFactory fileSystemFactory, TypeManager typeManager)
+ public HudiMetadataFactory(HiveMetastoreFactory metastoreFactory, TrinoFileSystemFactory fileSystemFactory, TypeManager typeManager)
{
this.metastoreFactory = requireNonNull(metastoreFactory, "metastore is null");
- this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null");
this.fileSystemFactory = requireNonNull(fileSystemFactory, "fileSystemFactory is null");
this.typeManager = requireNonNull(typeManager, "typeManager is null");
}
@@ -45,6 +42,6 @@ public HudiMetadataFactory(HiveMetastoreFactory metastoreFactory, HdfsEnvironmen
public HudiMetadata create(ConnectorIdentity identity)
{
HiveMetastore metastore = metastoreFactory.createMetastore(Optional.of(identity));
- return new HudiMetadata(metastore, hdfsEnvironment, fileSystemFactory, typeManager);
+ return new HudiMetadata(metastore, fileSystemFactory, typeManager);
}
}
diff --git a/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiPageSourceProvider.java b/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiPageSourceProvider.java
index 43cd48d70f07..beab6f5bdd54 100644
--- a/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiPageSourceProvider.java
+++ b/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiPageSourceProvider.java
@@ -31,7 +31,7 @@
import io.trino.plugin.hive.ReaderColumns;
import io.trino.plugin.hive.parquet.ParquetReaderConfig;
import io.trino.plugin.hive.parquet.TrinoParquetDataSource;
-import io.trino.plugin.hudi.model.HoodieFileFormat;
+import io.trino.plugin.hudi.model.HudiFileFormat;
import io.trino.spi.TrinoException;
import io.trino.spi.block.Block;
import io.trino.spi.connector.ColumnHandle;
@@ -146,9 +146,9 @@ public ConnectorPageSource createPageSource(
DynamicFilter dynamicFilter)
{
HudiSplit split = (HudiSplit) connectorSplit;
- String path = split.getPath();
- HoodieFileFormat hudiFileFormat = getHudiFileFormat(path);
- if (!HoodieFileFormat.PARQUET.equals(hudiFileFormat)) {
+ String path = split.getLocation();
+ HudiFileFormat hudiFileFormat = getHudiFileFormat(path);
+ if (!HudiFileFormat.PARQUET.equals(hudiFileFormat)) {
throw new TrinoException(HUDI_UNSUPPORTED_FILE_FORMAT, format("File format %s not supported", hudiFileFormat));
}
@@ -185,7 +185,7 @@ private static ConnectorPageSource createPageSource(
{
ParquetDataSource dataSource = null;
boolean useColumnNames = shouldUseParquetColumnNames(session);
- String path = hudiSplit.getPath();
+ String path = hudiSplit.getLocation();
long start = hudiSplit.getStart();
long length = hudiSplit.getLength();
try {
diff --git a/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiSplit.java b/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiSplit.java
index 0081e35fa7d1..5c752221e500 100644
--- a/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiSplit.java
+++ b/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiSplit.java
@@ -33,7 +33,7 @@
public class HudiSplit
implements ConnectorSplit
{
- private final String path;
+ private final String location;
private final long start;
private final long length;
private final long fileSize;
@@ -45,7 +45,7 @@ public class HudiSplit
@JsonCreator
public HudiSplit(
- @JsonProperty("path") String path,
+ @JsonProperty("location") String location,
@JsonProperty("start") long start,
@JsonProperty("length") long length,
@JsonProperty("fileSize") long fileSize,
@@ -59,7 +59,7 @@ public HudiSplit(
checkArgument(length >= 0, "length must be positive");
checkArgument(start + length <= fileSize, "fileSize must be at least start + length");
- this.path = requireNonNull(path, "path is null");
+ this.location = requireNonNull(location, "location is null");
this.start = start;
this.length = length;
this.fileSize = fileSize;
@@ -87,7 +87,7 @@ public List getAddresses()
public Object getInfo()
{
return ImmutableMap.builder()
- .put("path", path)
+ .put("location", location)
.put("start", start)
.put("length", length)
.put("fileSize", fileSize)
@@ -103,9 +103,9 @@ public SplitWeight getSplitWeight()
}
@JsonProperty
- public String getPath()
+ public String getLocation()
{
- return path;
+ return location;
}
@JsonProperty
@@ -148,7 +148,7 @@ public List getPartitionKeys()
public String toString()
{
return toStringHelper(this)
- .addValue(path)
+ .addValue(location)
.addValue(start)
.addValue(length)
.addValue(fileSize)
diff --git a/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiSplitManager.java b/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiSplitManager.java
index 3b1006e2c3e4..c9a25c3db15f 100644
--- a/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiSplitManager.java
+++ b/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiSplitManager.java
@@ -13,7 +13,7 @@
*/
package io.trino.plugin.hudi;
-import io.trino.hdfs.HdfsEnvironment;
+import io.trino.filesystem.TrinoFileSystemFactory;
import io.trino.plugin.base.classloader.ClassLoaderSafeConnectorSplitSource;
import io.trino.plugin.hive.HiveColumnHandle;
import io.trino.plugin.hive.HiveTransactionHandle;
@@ -46,7 +46,7 @@ public class HudiSplitManager
{
private final HudiTransactionManager transactionManager;
private final BiFunction metastoreProvider;
- private final HdfsEnvironment hdfsEnvironment;
+ private final TrinoFileSystemFactory fileSystemFactory;
private final ExecutorService executor;
private final int maxSplitsPerSecond;
private final int maxOutstandingSplits;
@@ -55,14 +55,14 @@ public class HudiSplitManager
public HudiSplitManager(
HudiTransactionManager transactionManager,
BiFunction metastoreProvider,
- HdfsEnvironment hdfsEnvironment,
@ForHudiSplitManager ExecutorService executor,
+ TrinoFileSystemFactory fileSystemFactory,
HudiConfig hudiConfig)
{
this.transactionManager = requireNonNull(transactionManager, "transactionManager is null");
this.metastoreProvider = requireNonNull(metastoreProvider, "metastoreProvider is null");
- this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null");
this.executor = requireNonNull(executor, "executor is null");
+ this.fileSystemFactory = requireNonNull(fileSystemFactory, "fileSystemFactory is null");
this.maxSplitsPerSecond = requireNonNull(hudiConfig, "hudiConfig is null").getMaxSplitsPerSecond();
this.maxOutstandingSplits = hudiConfig.getMaxOutstandingSplits();
}
@@ -95,7 +95,7 @@ public ConnectorSplitSource getSplits(
metastore,
table,
hudiTableHandle,
- hdfsEnvironment,
+ fileSystemFactory,
partitionColumnHandles,
executor,
maxSplitsPerSecond,
diff --git a/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiSplitSource.java b/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiSplitSource.java
index 6b367071c984..edeb4c2f0403 100644
--- a/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiSplitSource.java
+++ b/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiSplitSource.java
@@ -15,8 +15,7 @@
import com.google.common.util.concurrent.Futures;
import io.airlift.units.DataSize;
-import io.trino.hdfs.HdfsContext;
-import io.trino.hdfs.HdfsEnvironment;
+import io.trino.filesystem.TrinoFileSystemFactory;
import io.trino.plugin.hive.HiveColumnHandle;
import io.trino.plugin.hive.metastore.HiveMetastore;
import io.trino.plugin.hive.metastore.Table;
@@ -27,15 +26,11 @@
import io.trino.plugin.hudi.split.HudiBackgroundSplitLoader;
import io.trino.plugin.hudi.split.HudiSplitWeightProvider;
import io.trino.plugin.hudi.split.SizeBasedSplitWeightProvider;
+import io.trino.plugin.hudi.table.HudiTableMetaClient;
import io.trino.spi.TrinoException;
import io.trino.spi.connector.ConnectorSession;
import io.trino.spi.connector.ConnectorSplit;
import io.trino.spi.connector.ConnectorSplitSource;
-import org.apache.hadoop.fs.Path;
-import org.apache.hudi.common.config.HoodieMetadataConfig;
-import org.apache.hudi.common.engine.HoodieEngineContext;
-import org.apache.hudi.common.engine.HoodieLocalEngineContext;
-import org.apache.hudi.common.table.HoodieTableMetaClient;
import java.util.List;
import java.util.Map;
@@ -47,7 +42,6 @@
import static io.airlift.concurrent.MoreFutures.toCompletableFuture;
import static io.trino.plugin.hudi.HudiSessionProperties.getMinimumAssignedSplitWeight;
import static io.trino.plugin.hudi.HudiSessionProperties.getStandardSplitWeightSize;
-import static io.trino.plugin.hudi.HudiSessionProperties.isHudiMetadataEnabled;
import static io.trino.plugin.hudi.HudiSessionProperties.isSizeBasedSplitWeightsEnabled;
import static io.trino.plugin.hudi.HudiUtil.buildTableMetaClient;
import static io.trino.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR;
@@ -64,24 +58,17 @@ public HudiSplitSource(
HiveMetastore metastore,
Table table,
HudiTableHandle tableHandle,
- HdfsEnvironment hdfsEnvironment,
+ TrinoFileSystemFactory fileSystemFactory,
Map partitionColumnHandleMap,
ExecutorService executor,
int maxSplitsPerSecond,
int maxOutstandingSplits)
{
- boolean metadataEnabled = isHudiMetadataEnabled(session);
- HoodieTableMetaClient metaClient = buildTableMetaClient(hdfsEnvironment, session, tableHandle.getBasePath());
- HoodieEngineContext engineContext = new HoodieLocalEngineContext(hdfsEnvironment.getConfiguration(new HdfsContext(session), new Path(tableHandle.getBasePath())));
- HoodieMetadataConfig metadataConfig = HoodieMetadataConfig.newBuilder()
- .enable(metadataEnabled)
- .build();
+ HudiTableMetaClient metaClient = buildTableMetaClient(fileSystemFactory.create(session), tableHandle.getBasePath());
List partitionColumnHandles = table.getPartitionColumns().stream()
.map(column -> partitionColumnHandleMap.get(column.getName())).collect(toList());
HudiDirectoryLister hudiDirectoryLister = new HudiReadOptimizedDirectoryLister(
- metadataConfig,
- engineContext,
tableHandle,
metaClient,
metastore,
diff --git a/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiTableHandle.java b/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiTableHandle.java
index 54f6dc38ae6c..0da9f2d897a7 100644
--- a/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiTableHandle.java
+++ b/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiTableHandle.java
@@ -16,7 +16,7 @@
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import io.trino.plugin.hive.HiveColumnHandle;
-import io.trino.plugin.hudi.model.HoodieTableType;
+import io.trino.plugin.hudi.model.HudiTableType;
import io.trino.spi.connector.ConnectorTableHandle;
import io.trino.spi.connector.SchemaTableName;
import io.trino.spi.predicate.TupleDomain;
@@ -30,7 +30,7 @@ public class HudiTableHandle
private final String schemaName;
private final String tableName;
private final String basePath;
- private final HoodieTableType tableType;
+ private final HudiTableType tableType;
private final TupleDomain partitionPredicates;
private final TupleDomain regularPredicates;
@@ -39,7 +39,7 @@ public HudiTableHandle(
@JsonProperty("schemaName") String schemaName,
@JsonProperty("tableName") String tableName,
@JsonProperty("basePath") String basePath,
- @JsonProperty("tableType") HoodieTableType tableType,
+ @JsonProperty("tableType") HudiTableType tableType,
@JsonProperty("partitionPredicates") TupleDomain partitionPredicates,
@JsonProperty("regularPredicates") TupleDomain regularPredicates)
{
@@ -70,7 +70,7 @@ public String getBasePath()
}
@JsonProperty
- public HoodieTableType getTableType()
+ public HudiTableType getTableType()
{
return tableType;
}
diff --git a/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiUtil.java b/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiUtil.java
index 5a94618a2b4b..88ef46c2704c 100644
--- a/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiUtil.java
+++ b/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiUtil.java
@@ -15,53 +15,54 @@
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
+import io.trino.filesystem.FileIterator;
import io.trino.filesystem.Location;
-import io.trino.hdfs.HdfsContext;
-import io.trino.hdfs.HdfsEnvironment;
+import io.trino.filesystem.TrinoFileSystem;
import io.trino.plugin.hive.HiveColumnHandle;
import io.trino.plugin.hive.HivePartition;
import io.trino.plugin.hive.HivePartitionKey;
import io.trino.plugin.hive.HivePartitionManager;
import io.trino.plugin.hive.metastore.Column;
-import io.trino.plugin.hudi.model.HoodieFileFormat;
+import io.trino.plugin.hudi.model.HudiFileFormat;
+import io.trino.plugin.hudi.table.HudiTableMetaClient;
import io.trino.spi.TrinoException;
import io.trino.spi.connector.ColumnHandle;
-import io.trino.spi.connector.ConnectorSession;
import io.trino.spi.connector.SchemaTableName;
import io.trino.spi.predicate.Domain;
import io.trino.spi.predicate.NullableValue;
import io.trino.spi.predicate.TupleDomain;
import io.trino.spi.type.Type;
-import org.apache.hadoop.fs.Path;
-import org.apache.hudi.common.table.HoodieTableMetaClient;
+import java.io.IOException;
import java.util.List;
import java.util.Map;
import static io.trino.plugin.hive.HiveErrorCode.HIVE_INVALID_METADATA;
import static io.trino.plugin.hive.util.HiveUtil.checkCondition;
import static io.trino.plugin.hive.util.HiveUtil.parsePartitionValue;
+import static io.trino.plugin.hudi.HudiErrorCode.HUDI_FILESYSTEM_ERROR;
import static io.trino.plugin.hudi.HudiErrorCode.HUDI_UNSUPPORTED_FILE_FORMAT;
+import static io.trino.plugin.hudi.table.HudiTableMetaClient.METAFOLDER_NAME;
import static java.util.stream.Collectors.toList;
public final class HudiUtil
{
private HudiUtil() {}
- public static HoodieFileFormat getHudiFileFormat(String path)
+ public static HudiFileFormat getHudiFileFormat(String path)
{
String extension = getFileExtension(path);
- if (extension.equals(HoodieFileFormat.PARQUET.getFileExtension())) {
- return HoodieFileFormat.PARQUET;
+ if (extension.equals(HudiFileFormat.PARQUET.getFileExtension())) {
+ return HudiFileFormat.PARQUET;
}
- if (extension.equals(HoodieFileFormat.HOODIE_LOG.getFileExtension())) {
- return HoodieFileFormat.HOODIE_LOG;
+ if (extension.equals(HudiFileFormat.HOODIE_LOG.getFileExtension())) {
+ return HudiFileFormat.HOODIE_LOG;
}
- if (extension.equals(HoodieFileFormat.ORC.getFileExtension())) {
- return HoodieFileFormat.ORC;
+ if (extension.equals(HudiFileFormat.ORC.getFileExtension())) {
+ return HudiFileFormat.ORC;
}
- if (extension.equals(HoodieFileFormat.HFILE.getFileExtension())) {
- return HoodieFileFormat.HFILE;
+ if (extension.equals(HudiFileFormat.HFILE.getFileExtension())) {
+ return HudiFileFormat.HFILE;
}
throw new TrinoException(HUDI_UNSUPPORTED_FILE_FORMAT, "Hoodie InputFormat not implemented for base file of type " + extension);
}
@@ -73,6 +74,22 @@ private static String getFileExtension(String fullName)
return dotIndex == -1 ? "" : fileName.substring(dotIndex);
}
+ public static boolean isHudiTable(TrinoFileSystem trinoFileSystem, Location baseLocation)
+ {
+ try {
+ Location metaLocation = baseLocation.appendPath(METAFOLDER_NAME);
+ FileIterator iterator = trinoFileSystem.listFiles(metaLocation);
+ // If there is at least one file in the .hoodie directory, it's a valid Hudi table
+ if (!iterator.hasNext()) {
+ return false;
+ }
+ }
+ catch (IOException e) {
+ throw new TrinoException(HUDI_FILESYSTEM_ERROR, "Failed to check for Hudi table at location: " + baseLocation, e);
+ }
+ return true;
+ }
+
public static boolean partitionMatchesPredicates(
SchemaTableName tableName,
String hivePartitionName,
@@ -149,11 +166,13 @@ public static List buildPartitionKeys(List keys, List<
return partitionKeys.build();
}
- public static HoodieTableMetaClient buildTableMetaClient(HdfsEnvironment hdfsEnvironment, ConnectorSession session, String basePath)
+ public static HudiTableMetaClient buildTableMetaClient(
+ TrinoFileSystem fileSystem,
+ String basePath)
{
- HoodieTableMetaClient client = HoodieTableMetaClient.builder().setConf(hdfsEnvironment.getConfiguration(new HdfsContext(session), new Path(basePath))).setBasePath(basePath).build();
- // Do not load the bootstrap index, will not read bootstrap base data or a mapping index defined
- client.getTableConfig().setValue("hoodie.bootstrap.index.enable", "false");
- return client;
+ return HudiTableMetaClient.builder()
+ .setTrinoFileSystem(fileSystem)
+ .setBasePath(Location.of(basePath))
+ .build();
}
}
diff --git a/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/TimelineTable.java b/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/TimelineTable.java
index 42a005a0a476..da6a56e1ff0b 100644
--- a/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/TimelineTable.java
+++ b/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/TimelineTable.java
@@ -14,8 +14,10 @@
package io.trino.plugin.hudi;
import com.google.common.collect.ImmutableList;
-import io.trino.hdfs.HdfsEnvironment;
+import io.trino.filesystem.TrinoFileSystem;
import io.trino.plugin.hive.metastore.Table;
+import io.trino.plugin.hudi.model.HudiInstant;
+import io.trino.plugin.hudi.table.HudiTableMetaClient;
import io.trino.spi.connector.ColumnMetadata;
import io.trino.spi.connector.ConnectorSession;
import io.trino.spi.connector.ConnectorTableMetadata;
@@ -26,8 +28,6 @@
import io.trino.spi.connector.SystemTable;
import io.trino.spi.predicate.TupleDomain;
import io.trino.spi.type.Type;
-import org.apache.hudi.common.table.HoodieTableMetaClient;
-import org.apache.hudi.common.table.timeline.HoodieInstant;
import java.util.ArrayList;
import java.util.List;
@@ -43,10 +43,10 @@ public class TimelineTable
{
private final ConnectorTableMetadata tableMetadata;
private final List types;
- private final HdfsEnvironment hdfsEnvironment;
+ private final TrinoFileSystem fileSystem;
private final String location;
- public TimelineTable(HdfsEnvironment hdfsEnvironment, SchemaTableName tableName, Table hudiTable)
+ public TimelineTable(TrinoFileSystem fileSystem, SchemaTableName tableName, Table hudiTable)
{
this.tableMetadata = new ConnectorTableMetadata(requireNonNull(tableName, "tableName is null"),
ImmutableList.builder()
@@ -55,7 +55,7 @@ public TimelineTable(HdfsEnvironment hdfsEnvironment, SchemaTableName tableName,
.add(new ColumnMetadata("state", VARCHAR))
.build());
this.types = tableMetadata.getColumns().stream().map(ColumnMetadata::getType).collect(toImmutableList());
- this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null");
+ this.fileSystem = requireNonNull(fileSystem, "fileSystem is null");
this.location = requireNonNull(hudiTable.getStorage().getLocation(), "location is null");
}
@@ -74,12 +74,12 @@ public ConnectorTableMetadata getTableMetadata()
@Override
public RecordCursor cursor(ConnectorTransactionHandle transactionHandle, ConnectorSession session, TupleDomain constraint)
{
- HoodieTableMetaClient metaClient = buildTableMetaClient(hdfsEnvironment, session, location);
+ HudiTableMetaClient metaClient = buildTableMetaClient(fileSystem, location);
Iterable> records = () -> metaClient.getCommitsTimeline().getInstants().map(this::getRecord).iterator();
return new InMemoryRecordSet(types, records).cursor();
}
- private List