Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -188,9 +188,9 @@ public final class HiveUtil

// Input formats class names are listed below as String due to hudi-hadoop-mr dependency is not in the context of trino-hive plugin
public static final String HUDI_PARQUET_INPUT_FORMAT = "org.apache.hudi.hadoop.HoodieParquetInputFormat";
private static final String HUDI_PARQUET_REALTIME_INPUT_FORMAT = "org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat";
private static final String HUDI_INPUT_FORMAT = "com.uber.hoodie.hadoop.HoodieInputFormat";
private static final String HUDI_REALTIME_INPUT_FORMAT = "com.uber.hoodie.hadoop.realtime.HoodieRealtimeInputFormat";
public static final String HUDI_PARQUET_REALTIME_INPUT_FORMAT = "org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat";
public static final String HUDI_INPUT_FORMAT = "com.uber.hoodie.hadoop.HoodieInputFormat";
public static final String HUDI_REALTIME_INPUT_FORMAT = "com.uber.hoodie.hadoop.realtime.HoodieRealtimeInputFormat";

private static final HexFormat HEX_UPPER_FORMAT = HexFormat.of().withUpperCase();

Expand Down
113 changes: 94 additions & 19 deletions plugin/trino-hudi/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@
<artifactId>guice</artifactId>
</dependency>

<dependency>
<groupId>io.airlift</groupId>
<artifactId>aircompressor</artifactId>
</dependency>

<dependency>
<groupId>io.airlift</groupId>
<artifactId>bootstrap</artifactId>
Expand Down Expand Up @@ -110,6 +115,11 @@
<artifactId>trino-plugin-toolkit</artifactId>
</dependency>

<dependency>
<groupId>io.trino.hadoop</groupId>
<artifactId>hadoop-apache</artifactId>
</dependency>

<dependency>
<groupId>io.trino.hive</groupId>
<artifactId>hive-apache</artifactId>
Expand All @@ -135,6 +145,90 @@
<artifactId>avro</artifactId>
</dependency>

<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-common</artifactId>
<version>${dep.hudi.version}</version>
<exclusions>
<exclusion>
<groupId>com.esotericsoftware</groupId>
<artifactId>kryo-shaded</artifactId>
</exclusion>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-annotations</artifactId>
</exclusion>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.hive</groupId>
<artifactId>hive-jdbc</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>fluent-hc</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpcore</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.orc</groupId>
<artifactId>orc-core</artifactId>
</exclusion>
<exclusion>
<groupId>org.lz4</groupId>
<artifactId>lz4-java</artifactId>
</exclusion>
<exclusion>
<groupId>org.osgi</groupId>
<artifactId>org.osgi.core</artifactId>
</exclusion>
<exclusion>
<groupId>org.rocksdb</groupId>
<artifactId>rocksdbjni</artifactId>
</exclusion>
</exclusions>
</dependency>

<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-hadoop-mr</artifactId>
<version>${dep.hudi.version}</version>
<exclusions>
<exclusion>
<groupId>*</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>

<dependency>
<groupId>org.weakref</groupId>
<artifactId>jmxutils</artifactId>
Expand Down Expand Up @@ -188,12 +282,6 @@
<scope>runtime</scope>
</dependency>

<dependency>
<groupId>io.trino.hadoop</groupId>
<artifactId>hadoop-apache</artifactId>
<scope>runtime</scope>
</dependency>

<dependency>
<groupId>io.airlift</groupId>
<artifactId>testing</artifactId>
Expand Down Expand Up @@ -332,19 +420,6 @@
</exclusions>
</dependency>

<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-common</artifactId>
<version>${dep.hudi.version}</version>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>*</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>

<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-java-client</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ public enum HudiErrorCode
HUDI_CURSOR_ERROR(6, EXTERNAL),
HUDI_FILESYSTEM_ERROR(7, EXTERNAL),
HUDI_PARTITION_NOT_FOUND(8, EXTERNAL),
// HUDI_UNSUPPORTED_TABLE_TYPE(9, EXTERNAL), // Unused. Could be mistaken with HUDI_UNKNOWN_TABLE_TYPE.

HUDI_UNSUPPORTED_TABLE_TYPE(9, EXTERNAL),
HUDI_NO_VALID_COMMIT(10, EXTERNAL)
/**/;

private final ErrorCode errorCode;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,82 @@
*/
package io.trino.plugin.hudi;

import io.trino.filesystem.Location;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;

import static com.google.common.base.Preconditions.checkArgument;
import static java.util.Objects.requireNonNull;

public record HudiFileStatus(Location location, boolean isDirectory, long length, long modificationTime, long blockSize)
public class HudiFileStatus
{
public HudiFileStatus
private final boolean isDirectory;
private final String fileName;
private final String location;
private final long blockSize;
private final long length;
private final long offset;
private final long modificationTime;

@JsonCreator
public HudiFileStatus(@JsonProperty("isDirectory") boolean isDirectory,
@JsonProperty("fileName") String fileName,
@JsonProperty("location") String location,
@JsonProperty("length") long length,
@JsonProperty("modificationTime") long modificationTime,
@JsonProperty("blockSize") long blockSize,
@JsonProperty("offset") long offset)
{
checkArgument(blockSize > 0, "blockSize myst be positive");
checkArgument(length > 0, "length myst be positive");
checkArgument(offset >= 0, "offset myst be positive");
this.isDirectory = isDirectory;
this.fileName = requireNonNull(fileName, "fileName is null");
this.location = requireNonNull(location, "location is null");
this.blockSize = blockSize;
this.length = length;
this.offset = offset;
this.modificationTime = modificationTime;
}

@JsonProperty
public boolean isDirectory()
{
return isDirectory;
}

@JsonProperty
public String getFileName()
{
return fileName;
}

@JsonProperty
public String getLocation()
{
return location;
}

@JsonProperty
public long getBlockSize()
{
return blockSize;
}

@JsonProperty
public long getLength()
{
return length;
}

@JsonProperty
public long getOffset()
{
return offset;
}

@JsonProperty
public long getModificationTime()
{
requireNonNull(location, "location is null");
return modificationTime;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import io.trino.plugin.hive.metastore.Column;
import io.trino.plugin.hive.metastore.HiveMetastore;
import io.trino.plugin.hive.metastore.Table;
import io.trino.plugin.hudi.model.HudiTableType;
import io.trino.spi.TrinoException;
import io.trino.spi.connector.ColumnHandle;
import io.trino.spi.connector.ColumnMetadata;
Expand Down Expand Up @@ -52,6 +53,7 @@
import static com.google.common.collect.ImmutableMap.toImmutableMap;
import static io.trino.plugin.hive.HiveMetadata.TABLE_COMMENT;
import static io.trino.plugin.hive.HiveTimestampPrecision.NANOSECONDS;
import static io.trino.plugin.hive.metastore.MetastoreUtil.getHiveSchema;
import static io.trino.plugin.hive.util.HiveUtil.columnMetadataGetter;
import static io.trino.plugin.hive.util.HiveUtil.hiveColumnHandles;
import static io.trino.plugin.hive.util.HiveUtil.isHiveSystemSchema;
Expand All @@ -61,7 +63,6 @@
import static io.trino.plugin.hudi.HudiTableProperties.LOCATION_PROPERTY;
import static io.trino.plugin.hudi.HudiTableProperties.PARTITIONED_BY_PROPERTY;
import static io.trino.plugin.hudi.HudiUtil.hudiMetadataExists;
import static io.trino.plugin.hudi.model.HudiTableType.COPY_ON_WRITE;
import static io.trino.spi.StandardErrorCode.UNSUPPORTED_TABLE_TYPE;
import static io.trino.spi.connector.SchemaTableName.schemaTableName;
import static java.lang.String.format;
Expand Down Expand Up @@ -109,13 +110,17 @@ public HudiTableHandle getTableHandle(ConnectorSession session, SchemaTableName
throw new TrinoException(HUDI_BAD_DATA, "Location of table %s does not contain Hudi table metadata: %s".formatted(tableName, location));
}

String inputFormat = table.get().getStorage().getStorageFormat().getInputFormat();
HudiTableType hudiTableType = HudiTableType.fromInputFormat(inputFormat);

return new HudiTableHandle(
tableName.getSchemaName(),
tableName.getTableName(),
table.get().getStorage().getLocation(),
COPY_ON_WRITE,
hudiTableType,
TupleDomain.all(),
TupleDomain.all(),
TupleDomain.all());
getHiveSchema(table.get()));
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import com.google.inject.Provides;
import com.google.inject.Scopes;
import com.google.inject.Singleton;
import io.trino.hdfs.HdfsEnvironment;
import io.trino.plugin.base.session.SessionPropertiesProvider;
import io.trino.plugin.hive.FileFormatDataSourceStats;
import io.trino.plugin.hive.HiveNodePartitioningProvider;
Expand Down Expand Up @@ -59,6 +60,7 @@ public void configure(Binder binder)
newSetBinder(binder, SessionPropertiesProvider.class).addBinding().to(HudiSessionProperties.class).in(Scopes.SINGLETON);
binder.bind(HudiTableProperties.class).in(Scopes.SINGLETON);

binder.bind(HdfsEnvironment.class).in(Scopes.SINGLETON);
binder.bind(ConnectorSplitManager.class).to(HudiSplitManager.class).in(Scopes.SINGLETON);
binder.bind(ConnectorPageSourceProvider.class).to(HudiPageSourceProvider.class).in(Scopes.SINGLETON);
binder.bind(ConnectorNodePartitioningProvider.class).to(HiveNodePartitioningProvider.class).in(Scopes.SINGLETON);
Expand Down
Loading