diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/HdfsInputFile.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/HdfsInputFile.java deleted file mode 100644 index 838901c0d01e..000000000000 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/HdfsInputFile.java +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.iceberg; - -import io.trino.plugin.hive.HdfsEnvironment; -import io.trino.plugin.hive.HdfsEnvironment.HdfsContext; -import io.trino.spi.TrinoException; -import io.trino.spi.security.ConnectorIdentity; -import org.apache.hadoop.fs.Path; -import org.apache.iceberg.hadoop.HadoopInputFile; -import org.apache.iceberg.io.InputFile; -import org.apache.iceberg.io.SeekableInputStream; - -import java.io.IOException; - -import static com.google.common.base.MoreObjects.toStringHelper; -import static io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_FILESYSTEM_ERROR; -import static java.util.Objects.requireNonNull; - -public class HdfsInputFile - implements InputFile -{ - private final InputFile delegate; - private final HdfsEnvironment environment; - private final ConnectorIdentity identity; - - public HdfsInputFile(Path path, HdfsEnvironment environment, HdfsContext context) - { - requireNonNull(path, "path is null"); - this.environment = requireNonNull(environment, "environment is null"); - requireNonNull(context, "context is null"); - try { - this.delegate = HadoopInputFile.fromPath(path, environment.getFileSystem(context, path), environment.getConfiguration(context, path)); - } - catch (IOException e) { - throw new TrinoException(ICEBERG_FILESYSTEM_ERROR, "Failed to create input file: " + path, e); - } - this.identity = context.getIdentity(); - } - - @Override - public long getLength() - { - return environment.doAs(identity, delegate::getLength); - } - - @Override - public SeekableInputStream newStream() - { - // Hack: this wrapping is required to circumvent https://github.com/trinodb/trino/issues/5201 - return new HdfsInputStream(environment.doAs(identity, delegate::newStream)); - } - - @Override - public String location() - { - return delegate.location(); - } - - @Override - public boolean exists() - { - return environment.doAs(identity, delegate::exists); - } - - @Override - public String toString() - { - return toStringHelper(this) - .add("delegate", delegate) - .add("identity", identity) - .toString(); - } - - private static class HdfsInputStream - extends SeekableInputStream - { - private final SeekableInputStream delegate; - - public HdfsInputStream(SeekableInputStream delegate) - { - this.delegate = requireNonNull(delegate, "delegate is null"); - } - - @Override - public int read() - throws IOException - { - return delegate.read(); - } - - @Override - public int read(byte[] b, int off, int len) - throws IOException - { - return delegate.read(b, off, len); - } - - @Override - public long getPos() - throws IOException - { - return delegate.getPos(); - } - - @Override - public void seek(long newPos) - throws IOException - { - delegate.seek(newPos); - } - - @Override - public void close() - throws IOException - { - delegate.close(); - } - } -} diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/HdfsOutputFile.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/HdfsOutputFile.java deleted file mode 100644 index 818f8a50226a..000000000000 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/HdfsOutputFile.java +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.iceberg; - -import io.trino.plugin.hive.HdfsEnvironment; -import io.trino.plugin.hive.HdfsEnvironment.HdfsContext; -import io.trino.spi.TrinoException; -import org.apache.hadoop.fs.Path; -import org.apache.iceberg.hadoop.HadoopOutputFile; -import org.apache.iceberg.io.InputFile; -import org.apache.iceberg.io.OutputFile; -import org.apache.iceberg.io.PositionOutputStream; - -import java.io.IOException; - -import static io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_FILESYSTEM_ERROR; -import static java.util.Objects.requireNonNull; - -public class HdfsOutputFile - implements OutputFile -{ - private final OutputFile delegate; - private final Path path; - private final HdfsEnvironment environment; - private final HdfsContext context; - - public HdfsOutputFile(Path path, HdfsEnvironment environment, HdfsContext context) - { - this.path = requireNonNull(path, "path is null"); - this.environment = requireNonNull(environment, "environment is null"); - this.context = requireNonNull(context, "context is null"); - try { - this.delegate = HadoopOutputFile.fromPath(path, environment.getFileSystem(context, path), environment.getConfiguration(context, path)); - } - catch (IOException e) { - throw new TrinoException(ICEBERG_FILESYSTEM_ERROR, "Failed to create output file: " + path.toString(), e); - } - } - - @Override - public PositionOutputStream create() - { - return environment.doAs(context.getIdentity(), delegate::create); - } - - @Override - public PositionOutputStream createOrOverwrite() - { - return environment.doAs(context.getIdentity(), delegate::createOrOverwrite); - } - - @Override - public String location() - { - return delegate.location(); - } - - @Override - public InputFile toInputFile() - { - return new HdfsInputFile(path, environment, context); - } -} diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergAvroFileWriter.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergAvroFileWriter.java index a84991fb633e..14227e34d749 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergAvroFileWriter.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergAvroFileWriter.java @@ -18,14 +18,13 @@ import io.trino.spi.Page; import io.trino.spi.TrinoException; import io.trino.spi.type.Type; -import org.apache.hadoop.fs.Path; import org.apache.iceberg.Metrics; import org.apache.iceberg.Schema; import org.apache.iceberg.avro.Avro; import org.apache.iceberg.data.Record; import org.apache.iceberg.data.avro.DataWriter; import org.apache.iceberg.io.FileAppender; -import org.apache.iceberg.io.FileIO; +import org.apache.iceberg.io.OutputFile; import org.openjdk.jol.info.ClassLayout; import java.io.IOException; @@ -53,8 +52,7 @@ public class IcebergAvroFileWriter private final Callable rollbackAction; public IcebergAvroFileWriter( - FileIO fileIo, - Path path, + OutputFile file, Callable rollbackAction, Schema icebergSchema, List types, @@ -65,7 +63,7 @@ public IcebergAvroFileWriter( this.types = ImmutableList.copyOf(requireNonNull(types, "types is null")); try { - avroWriter = Avro.write(fileIo.newOutputFile(path.toString())) + avroWriter = Avro.write(file) .schema(icebergSchema) .createWriterFunc(DataWriter::create) .named(AVRO_TABLE_NAME) @@ -73,7 +71,7 @@ public IcebergAvroFileWriter( .build(); } catch (IOException e) { - throw new TrinoException(ICEBERG_WRITER_OPEN_ERROR, "Error creating Avro file: " + path, e); + throw new TrinoException(ICEBERG_WRITER_OPEN_ERROR, "Error creating Avro file: " + file.location(), e); } } diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergAvroPageSource.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergAvroPageSource.java index f09a30c8ac6c..3add0c0ceef6 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergAvroPageSource.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergAvroPageSource.java @@ -25,7 +25,7 @@ import org.apache.iceberg.data.Record; import org.apache.iceberg.data.avro.DataReader; import org.apache.iceberg.io.CloseableIterator; -import org.apache.iceberg.io.FileIO; +import org.apache.iceberg.io.InputFile; import org.apache.iceberg.mapping.NameMapping; import org.apache.iceberg.types.Types; @@ -61,8 +61,7 @@ public class IcebergAvroPageSource private long readTimeNanos; public IcebergAvroPageSource( - FileIO fileIo, - String path, + InputFile file, long start, long length, Schema fileSchema, @@ -72,9 +71,6 @@ public IcebergAvroPageSource( List rowIndexLocations, AggregatedMemoryContext memoryUsage) { - requireNonNull(fileIo, "fileIo is null"); - requireNonNull(path, "path is null"); - requireNonNull(fileSchema, "fileSchema is null"); this.columnNames = ImmutableList.copyOf(requireNonNull(columnNames, "columnNames is null")); this.columnTypes = ImmutableList.copyOf(requireNonNull(columnTypes, "columnTypes is null")); this.rowIndexLocations = ImmutableList.copyOf(requireNonNull(rowIndexLocations, "rowIndexLocations is null")); @@ -85,7 +81,7 @@ public IcebergAvroPageSource( // The column orders in the generated schema might be different from the original order Schema readSchema = fileSchema.select(columnNames); - Avro.ReadBuilder builder = Avro.read(fileIo.newInputFile(path)) + Avro.ReadBuilder builder = Avro.read(file) .project(readSchema) .createReaderFunc(DataReader::create) .split(start, length); diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergFileWriterFactory.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergFileWriterFactory.java index f01a2670b228..fbd48f4546e2 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergFileWriterFactory.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergFileWriterFactory.java @@ -19,25 +19,21 @@ import io.airlift.units.DataSize; import io.trino.orc.OrcDataSink; import io.trino.orc.OrcDataSource; -import io.trino.orc.OrcDataSourceId; import io.trino.orc.OrcReaderOptions; import io.trino.orc.OrcWriterOptions; import io.trino.orc.OrcWriterStats; import io.trino.orc.OutputStreamOrcDataSink; import io.trino.parquet.writer.ParquetWriterOptions; import io.trino.plugin.hive.FileFormatDataSourceStats; -import io.trino.plugin.hive.HdfsEnvironment; -import io.trino.plugin.hive.HdfsEnvironment.HdfsContext; import io.trino.plugin.hive.NodeVersion; -import io.trino.plugin.hive.orc.HdfsOrcDataSource; import io.trino.plugin.hive.orc.OrcWriterConfig; +import io.trino.plugin.iceberg.io.TrinoFileSystem; +import io.trino.plugin.iceberg.io.TrinoInputFile; +import io.trino.plugin.iceberg.io.TrinoOrcDataSource; import io.trino.spi.TrinoException; import io.trino.spi.connector.ConnectorSession; import io.trino.spi.type.Type; import io.trino.spi.type.TypeManager; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.mapred.JobConf; import org.apache.iceberg.MetricsConfig; import org.apache.iceberg.Schema; import org.apache.iceberg.io.FileIO; @@ -47,6 +43,7 @@ import javax.inject.Inject; import java.io.IOException; +import java.io.OutputStream; import java.util.List; import java.util.Map; import java.util.Optional; @@ -92,7 +89,6 @@ public class IcebergFileWriterFactory private static final MetricsConfig FULL_METRICS_CONFIG = MetricsConfig.fromProperties(ImmutableMap.of(DEFAULT_WRITE_METRICS_MODE, "full")); private static final Splitter COLUMN_NAMES_SPLITTER = Splitter.on(',').trimResults().omitEmptyStrings(); - private final HdfsEnvironment hdfsEnvironment; private final TypeManager typeManager; private final NodeVersion nodeVersion; private final FileFormatDataSourceStats readStats; @@ -101,14 +97,12 @@ public class IcebergFileWriterFactory @Inject public IcebergFileWriterFactory( - HdfsEnvironment hdfsEnvironment, TypeManager typeManager, NodeVersion nodeVersion, FileFormatDataSourceStats readStats, OrcWriterConfig orcWriterConfig) { checkArgument(!requireNonNull(orcWriterConfig, "orcWriterConfig is null").isUseLegacyVersion(), "the ORC writer shouldn't be configured to use a legacy version"); - this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null"); this.typeManager = requireNonNull(typeManager, "typeManager is null"); this.nodeVersion = requireNonNull(nodeVersion, "nodeVersion is null"); this.readStats = requireNonNull(readStats, "readStats is null"); @@ -122,12 +116,10 @@ public OrcWriterStats getOrcWriterStats() } public IcebergFileWriter createDataFileWriter( - Path outputPath, + TrinoFileSystem fileSystem, + String outputPath, Schema icebergSchema, - JobConf jobConf, ConnectorSession session, - HdfsContext hdfsContext, - FileIoProvider fileIoProvider, IcebergFileFormat fileFormat, MetricsConfig metricsConfig, Map storageProperties) @@ -135,32 +127,30 @@ public IcebergFileWriter createDataFileWriter( switch (fileFormat) { case PARQUET: // TODO use metricsConfig https://github.com/trinodb/trino/issues/9791 - return createParquetWriter(MetricsConfig.getDefault(), outputPath, icebergSchema, jobConf, session, hdfsContext); + return createParquetWriter(MetricsConfig.getDefault(), fileSystem, outputPath, icebergSchema, session); case ORC: - return createOrcWriter(metricsConfig, outputPath, icebergSchema, jobConf, session, storageProperties, getOrcStringStatisticsLimit(session)); + return createOrcWriter(metricsConfig, fileSystem, outputPath, icebergSchema, session, storageProperties, getOrcStringStatisticsLimit(session)); case AVRO: - return createAvroWriter(fileIoProvider.createFileIo(hdfsContext, session.getQueryId()), outputPath, icebergSchema, jobConf, session); + return createAvroWriter(fileSystem.toFileIo(), outputPath, icebergSchema, session); default: throw new TrinoException(NOT_SUPPORTED, "File format not supported: " + fileFormat); } } public IcebergFileWriter createPositionDeleteWriter( - Path outputPath, - JobConf jobConf, + TrinoFileSystem fileSystem, + String outputPath, ConnectorSession session, - HdfsContext hdfsContext, - FileIoProvider fileIoProvider, IcebergFileFormat fileFormat, Map storageProperties) { switch (fileFormat) { case PARQUET: - return createParquetWriter(FULL_METRICS_CONFIG, outputPath, POSITION_DELETE_SCHEMA, jobConf, session, hdfsContext); + return createParquetWriter(FULL_METRICS_CONFIG, fileSystem, outputPath, POSITION_DELETE_SCHEMA, session); case ORC: - return createOrcWriter(FULL_METRICS_CONFIG, outputPath, POSITION_DELETE_SCHEMA, jobConf, session, storageProperties, DataSize.ofBytes(Integer.MAX_VALUE)); + return createOrcWriter(FULL_METRICS_CONFIG, fileSystem, outputPath, POSITION_DELETE_SCHEMA, session, storageProperties, DataSize.ofBytes(Integer.MAX_VALUE)); case AVRO: - return createAvroWriter(fileIoProvider.createFileIo(hdfsContext, session.getQueryId()), outputPath, POSITION_DELETE_SCHEMA, jobConf, session); + return createAvroWriter(fileSystem.toFileIo(), outputPath, POSITION_DELETE_SCHEMA, session); default: throw new TrinoException(NOT_SUPPORTED, "File format not supported: " + fileFormat); } @@ -168,11 +158,10 @@ public IcebergFileWriter createPositionDeleteWriter( private IcebergFileWriter createParquetWriter( MetricsConfig metricsConfig, - Path outputPath, + TrinoFileSystem fileSystem, + String outputPath, Schema icebergSchema, - JobConf jobConf, - ConnectorSession session, - HdfsContext hdfsContext) + ConnectorSession session) { List fileColumnNames = icebergSchema.columns().stream() .map(Types.NestedField::name) @@ -182,10 +171,10 @@ private IcebergFileWriter createParquetWriter( .collect(toImmutableList()); try { - FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getIdentity(), outputPath, jobConf); + OutputStream outputStream = fileSystem.newOutputFile(outputPath).create(); Callable rollbackAction = () -> { - fileSystem.delete(outputPath, false); + fileSystem.deleteFile(outputPath); return null; }; @@ -197,7 +186,7 @@ private IcebergFileWriter createParquetWriter( return new IcebergParquetFileWriter( metricsConfig, - hdfsEnvironment.doAs(session.getIdentity(), () -> fileSystem.create(outputPath)), + outputStream, rollbackAction, fileColumnTypes, convert(icebergSchema, "table"), @@ -207,8 +196,7 @@ private IcebergFileWriter createParquetWriter( getCompressionCodec(session).getParquetCompressionCodec(), nodeVersion.toString(), outputPath, - hdfsEnvironment, - hdfsContext); + fileSystem); } catch (IOException e) { throw new TrinoException(ICEBERG_WRITER_OPEN_ERROR, "Error creating Parquet file", e); @@ -217,18 +205,18 @@ private IcebergFileWriter createParquetWriter( private IcebergFileWriter createOrcWriter( MetricsConfig metricsConfig, - Path outputPath, + TrinoFileSystem fileSystem, + String outputPath, Schema icebergSchema, - JobConf jobConf, ConnectorSession session, Map storageProperties, DataSize stringStatisticsLimit) { try { - FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getIdentity(), outputPath, jobConf); - OrcDataSink orcDataSink = hdfsEnvironment.doAs(session.getIdentity(), () -> new OutputStreamOrcDataSink(fileSystem.create(outputPath))); + OrcDataSink orcDataSink = new OutputStreamOrcDataSink(fileSystem.newOutputFile(outputPath).create()); + Callable rollbackAction = () -> { - hdfsEnvironment.doAs(session.getIdentity(), () -> fileSystem.delete(outputPath, false)); + fileSystem.deleteFile(outputPath); return null; }; @@ -245,12 +233,8 @@ private IcebergFileWriter createOrcWriter( if (isOrcWriterValidate(session)) { validationInputFactory = Optional.of(() -> { try { - return new HdfsOrcDataSource( - new OrcDataSourceId(outputPath.toString()), - hdfsEnvironment.doAs(session.getIdentity(), () -> fileSystem.getFileStatus(outputPath).getLen()), - new OrcReaderOptions(), - hdfsEnvironment.doAs(session.getIdentity(), () -> fileSystem.open(outputPath)), - readStats); + TrinoInputFile inputFile = fileSystem.newInputFile(outputPath); + return new TrinoOrcDataSource(inputFile, new OrcReaderOptions(), readStats); } catch (IOException e) { throw new TrinoException(ICEBERG_WRITE_VALIDATION_FAILED, e); @@ -309,33 +293,24 @@ public static OrcWriterOptions withBloomFilterOptions(OrcWriterOptions orcWriter private IcebergFileWriter createAvroWriter( FileIO fileIo, - Path outputPath, + String outputPath, Schema icebergSchema, - JobConf jobConf, ConnectorSession session) { - try { - FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getIdentity(), outputPath, jobConf); + Callable rollbackAction = () -> { + fileIo.deleteFile(outputPath); + return null; + }; - Callable rollbackAction = () -> { - fileSystem.delete(outputPath, false); - return null; - }; - - List columnTypes = icebergSchema.columns().stream() - .map(column -> toTrinoType(column.type(), typeManager)) - .collect(toImmutableList()); + List columnTypes = icebergSchema.columns().stream() + .map(column -> toTrinoType(column.type(), typeManager)) + .collect(toImmutableList()); - return new IcebergAvroFileWriter( - fileIo, - outputPath, - rollbackAction, - icebergSchema, - columnTypes, - getCompressionCodec(session)); - } - catch (IOException e) { - throw new TrinoException(ICEBERG_WRITER_OPEN_ERROR, "Error creating AVRO file", e); - } + return new IcebergAvroFileWriter( + fileIo.newOutputFile(outputPath), + rollbackAction, + icebergSchema, + columnTypes, + getCompressionCodec(session)); } } diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergMergeSink.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergMergeSink.java index a0b26d2a1ea8..85aa83e2268a 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergMergeSink.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergMergeSink.java @@ -17,9 +17,8 @@ import com.google.common.collect.ImmutableMap; import io.airlift.json.JsonCodec; import io.airlift.slice.Slice; -import io.trino.plugin.hive.HdfsEnvironment; -import io.trino.plugin.hive.HdfsEnvironment.HdfsContext; import io.trino.plugin.iceberg.delete.IcebergPositionDeletePageSink; +import io.trino.plugin.iceberg.io.TrinoFileSystem; import io.trino.spi.Page; import io.trino.spi.PageBuilder; import io.trino.spi.block.ColumnarRow; @@ -58,8 +57,7 @@ public class IcebergMergeSink { private final LocationProvider locationProvider; private final IcebergFileWriterFactory fileWriterFactory; - private final HdfsEnvironment hdfsEnvironment; - private final FileIoProvider fileIoProvider; + private final TrinoFileSystem fileSystem; private final JsonCodec jsonCodec; private final ConnectorSession session; private final IcebergFileFormat fileFormat; @@ -73,8 +71,7 @@ public class IcebergMergeSink public IcebergMergeSink( LocationProvider locationProvider, IcebergFileWriterFactory fileWriterFactory, - HdfsEnvironment hdfsEnvironment, - FileIoProvider fileIoProvider, + TrinoFileSystem fileSystem, JsonCodec jsonCodec, ConnectorSession session, IcebergFileFormat fileFormat, @@ -86,8 +83,7 @@ public IcebergMergeSink( { this.locationProvider = requireNonNull(locationProvider, "locationProvider is null"); this.fileWriterFactory = requireNonNull(fileWriterFactory, "fileWriterFactory is null"); - this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null"); - this.fileIoProvider = requireNonNull(fileIoProvider, "fileIoProvider is null"); + this.fileSystem = requireNonNull(fileSystem, "fileSystem is null"); this.jsonCodec = requireNonNull(jsonCodec, "jsonCodec is null"); this.session = requireNonNull(session, "session is null"); this.fileFormat = requireNonNull(fileFormat, "fileFormat is null"); @@ -163,9 +159,7 @@ private ConnectorPageSink createPositionDeletePageSink(String dataFilePath, Part partitionData, locationProvider, fileWriterFactory, - hdfsEnvironment, - new HdfsContext(session), - fileIoProvider, + fileSystem, jsonCodec, session, fileFormat, diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergMetadata.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergMetadata.java index ccff8c56c13b..95909dcc51a6 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergMetadata.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergMetadata.java @@ -26,12 +26,14 @@ import io.airlift.units.DataSize; import io.airlift.units.Duration; import io.trino.plugin.base.classloader.ClassLoaderSafeSystemTable; -import io.trino.plugin.hive.HdfsEnvironment; -import io.trino.plugin.hive.HdfsEnvironment.HdfsContext; import io.trino.plugin.hive.HiveApplyProjectionUtil; import io.trino.plugin.hive.HiveApplyProjectionUtil.ProjectedColumnRepresentation; import io.trino.plugin.hive.HiveWrittenPartitions; import io.trino.plugin.iceberg.catalog.TrinoCatalog; +import io.trino.plugin.iceberg.io.FileEntry; +import io.trino.plugin.iceberg.io.FileIterator; +import io.trino.plugin.iceberg.io.TrinoFileSystem; +import io.trino.plugin.iceberg.io.TrinoFileSystemFactory; import io.trino.plugin.iceberg.procedure.IcebergExpireSnapshotsHandle; import io.trino.plugin.iceberg.procedure.IcebergOptimizeHandle; import io.trino.plugin.iceberg.procedure.IcebergRemoveOrphanFilesHandle; @@ -85,10 +87,6 @@ import io.trino.spi.type.TimestampWithTimeZoneType; import io.trino.spi.type.TypeManager; import io.trino.spi.type.TypeOperators; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.LocatedFileStatus; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.RemoteIterator; import org.apache.iceberg.AppendFiles; import org.apache.iceberg.BaseTable; import org.apache.iceberg.DataFile; @@ -126,7 +124,6 @@ import java.io.IOException; import java.io.UncheckedIOException; -import java.net.URI; import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Collection; @@ -148,10 +145,12 @@ import java.util.stream.Collectors; import java.util.stream.Stream; +import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Verify.verify; import static com.google.common.collect.ImmutableList.toImmutableList; import static com.google.common.collect.ImmutableMap.toImmutableMap; import static com.google.common.collect.ImmutableSet.toImmutableSet; +import static com.google.common.collect.Iterables.getLast; import static com.google.common.collect.Maps.transformValues; import static com.google.common.collect.Sets.difference; import static com.google.common.collect.Sets.union; @@ -246,7 +245,7 @@ public class IcebergMetadata private final TypeOperators typeOperators; private final JsonCodec commitTaskCodec; private final TrinoCatalog catalog; - private final HdfsEnvironment hdfsEnvironment; + private final TrinoFileSystemFactory fileSystemFactory; private final Map snapshotIds = new ConcurrentHashMap<>(); @@ -257,13 +256,13 @@ public IcebergMetadata( TypeOperators typeOperators, JsonCodec commitTaskCodec, TrinoCatalog catalog, - HdfsEnvironment hdfsEnvironment) + TrinoFileSystemFactory fileSystemFactory) { this.typeManager = requireNonNull(typeManager, "typeManager is null"); this.typeOperators = requireNonNull(typeOperators, "typeOperators is null"); this.commitTaskCodec = requireNonNull(commitTaskCodec, "commitTaskCodec is null"); this.catalog = requireNonNull(catalog, "catalog is null"); - this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null"); + this.fileSystemFactory = requireNonNull(fileSystemFactory, "fileSystemFactory is null"); } @Override @@ -664,11 +663,9 @@ public ConnectorOutputTableHandle beginCreateTable(ConnectorSession session, Con verify(transaction == null, "transaction already set"); transaction = newCreateTableTransaction(catalog, tableMetadata, session); String location = transaction.table().location(); - HdfsContext hdfsContext = new HdfsContext(session); + TrinoFileSystem fileSystem = fileSystemFactory.create(session); try { - Path path = new Path(location); - FileSystem fileSystem = hdfsEnvironment.getFileSystem(hdfsContext, path); - if (fileSystem.exists(path) && fileSystem.listFiles(path, true).hasNext()) { + if (fileSystem.listFiles(location).hasNext()) { throw new TrinoException(ICEBERG_FILESYSTEM_ERROR, format("" + "Cannot create a table on a non-empty location: %s, set 'iceberg.unique-table-location=true' in your Iceberg catalog properties " + "to use unique table locations for every table.", location)); @@ -807,23 +804,20 @@ public Optional finishInsert(ConnectorSession session, .collect(toImmutableList()))); } - private void cleanExtraOutputFiles(HdfsContext hdfsContext, String queryId, String location, Set filesToKeep) + private static void cleanExtraOutputFiles(TrinoFileSystem fileSystem, String queryId, String location, Set filesToKeep) { + checkArgument(!queryId.contains("-"), "query ID should not contain hyphens: %s", queryId); + Deque filesToDelete = new ArrayDeque<>(); try { log.debug("Deleting failed attempt files from %s for query %s", location, queryId); - FileSystem fileSystem = hdfsEnvironment.getFileSystem(hdfsContext, new Path(location)); - if (!fileSystem.exists(new Path(location))) { - // directory may not exist if no files were actually written - return; - } - // files within given partition are written flat into location; we need to list recursively - RemoteIterator iterator = fileSystem.listFiles(new Path(location), false); + FileIterator iterator = fileSystem.listFiles(location); while (iterator.hasNext()) { - Path file = iterator.next().getPath(); - if (isFileCreatedByQuery(file.getName(), queryId) && !filesToKeep.contains(location + "/" + file.getName())) { - filesToDelete.add(file.getName()); + FileEntry entry = iterator.next(); + String name = getLast(Splitter.on('/').splitToList(entry.path())); + if (name.startsWith(queryId + "-") && !filesToKeep.contains(location + "/" + name)) { + filesToDelete.add(name); } } @@ -837,7 +831,7 @@ private void cleanExtraOutputFiles(HdfsContext hdfsContext, String queryId, Stri while (filesToDeleteIterator.hasNext()) { String fileName = filesToDeleteIterator.next(); log.debug("Deleting failed attempt file %s/%s for query %s", location, fileName, queryId); - fileSystem.delete(new Path(location, fileName), false); + fileSystem.deleteFile(location + "/" + fileName); deletedFilesBuilder.add(fileName); filesToDeleteIterator.remove(); } @@ -853,12 +847,6 @@ private void cleanExtraOutputFiles(HdfsContext hdfsContext, String queryId, Stri } } - private static boolean isFileCreatedByQuery(String fileName, String queryId) - { - verify(!queryId.contains("-"), "queryId(%s) should not contain hyphens", queryId); - return fileName.startsWith(queryId + "-"); - } - private static Set getOutputFilesLocations(Set writtenFiles) { return writtenFiles.stream() @@ -1201,13 +1189,12 @@ private void removeOrphanFiles(Table table, ConnectorSession session, SchemaTabl Set validDataFilePaths = stream(table.snapshots()) .map(Snapshot::snapshotId) .flatMap(snapshotId -> stream(table.newScan().useSnapshot(snapshotId).planFiles())) - // compare only paths not to delete too many files, see https://github.com/apache/iceberg/pull/2890 - .map(fileScanTask -> URI.create(fileScanTask.file().path().toString()).getPath()) + .map(fileScanTask -> fileName(fileScanTask.file().path().toString())) .collect(toImmutableSet()); Set validDeleteFilePaths = stream(table.snapshots()) .map(Snapshot::snapshotId) .flatMap(snapshotId -> stream(table.newScan().useSnapshot(snapshotId).planFiles())) - .flatMap(fileScanTask -> fileScanTask.deletes().stream().map(deleteFile -> URI.create(deleteFile.path().toString()).getPath())) + .flatMap(fileScanTask -> fileScanTask.deletes().stream().map(file -> fileName(file.path().toString()))) .collect(Collectors.toUnmodifiableSet()); scanAndDeleteInvalidFiles(table, session, schemaTableName, expireTimestamp, union(validDataFilePaths, validDeleteFilePaths), "/data"); } @@ -1224,27 +1211,24 @@ private void removeOrphanMetadataFiles(Table table, ConnectorSession session, Sc Stream.of(versionHintLocation(table))) .collect(toImmutableList()); Set validMetadataFiles = concat(manifests.stream(), manifestLists.stream(), otherMetadataFiles.stream()) - .map(path -> URI.create(path).getPath()) + .map(IcebergMetadata::fileName) .collect(toImmutableSet()); - scanAndDeleteInvalidFiles(table, session, schemaTableName, expireTimestamp, validMetadataFiles, "/metadata"); + scanAndDeleteInvalidFiles(table, session, schemaTableName, expireTimestamp, validMetadataFiles, "metadata"); } private void scanAndDeleteInvalidFiles(Table table, ConnectorSession session, SchemaTableName schemaTableName, long expireTimestamp, Set validFiles, String subfolder) { try { - FileSystem fileSystem = hdfsEnvironment.getFileSystem(new HdfsEnvironment.HdfsContext(session), new Path(table.location())); - RemoteIterator allFiles = fileSystem.listFiles(new Path(table.location() + subfolder), true); + TrinoFileSystem fileSystem = fileSystemFactory.create(session); + FileIterator allFiles = fileSystem.listFiles(table.location() + "/" + subfolder); while (allFiles.hasNext()) { - LocatedFileStatus file = allFiles.next(); - if (file.isFile()) { - String normalizedPath = file.getPath().toUri().getPath(); - if (file.getModificationTime() < expireTimestamp && !validFiles.contains(normalizedPath)) { - log.debug("Deleting %s file while removing orphan files %s", file.getPath().toString(), schemaTableName.getTableName()); - fileSystem.delete(file.getPath(), false); - } - else { - log.debug("%s file retained while removing orphan files %s", file.getPath().toString(), schemaTableName.getTableName()); - } + FileEntry entry = allFiles.next(); + if (entry.lastModified() < expireTimestamp && !validFiles.contains(fileName(entry.path()))) { + log.debug("Deleting %s file while removing orphan files %s", entry.path(), schemaTableName.getTableName()); + fileSystem.deleteFile(entry.path()); + } + else { + log.debug("%s file retained while removing orphan files %s", entry.path(), schemaTableName.getTableName()); } } } @@ -1253,6 +1237,11 @@ private void scanAndDeleteInvalidFiles(Table table, ConnectorSession session, Sc } } + private static String fileName(String path) + { + return path.substring(path.lastIndexOf('/') + 1); + } + @Override public Optional getInfo(ConnectorTableHandle tableHandle) { @@ -2047,10 +2036,10 @@ public Optional finishRefreshMaterializedView( private void cleanExtraOutputFiles(ConnectorSession session, Set writtenFiles) { - HdfsContext hdfsContext = new HdfsContext(session); + TrinoFileSystem fileSystem = fileSystemFactory.create(session); Set locations = getOutputFilesLocations(writtenFiles); for (String location : locations) { - cleanExtraOutputFiles(hdfsContext, session.getQueryId(), location, writtenFiles); + cleanExtraOutputFiles(fileSystem, session.getQueryId(), location, writtenFiles); } } diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergMetadataFactory.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergMetadataFactory.java index 3be12c8c16e1..aa214231fd52 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergMetadataFactory.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergMetadataFactory.java @@ -14,8 +14,8 @@ package io.trino.plugin.iceberg; import io.airlift.json.JsonCodec; -import io.trino.plugin.hive.HdfsEnvironment; import io.trino.plugin.iceberg.catalog.TrinoCatalogFactory; +import io.trino.plugin.iceberg.io.TrinoFileSystemFactory; import io.trino.spi.security.ConnectorIdentity; import io.trino.spi.type.TypeManager; import io.trino.spi.type.TypeOperators; @@ -30,25 +30,25 @@ public class IcebergMetadataFactory private final TypeOperators typeOperators; private final JsonCodec commitTaskCodec; private final TrinoCatalogFactory catalogFactory; - private final HdfsEnvironment hdfsEnvironment; + private final TrinoFileSystemFactory fileSystemFactory; @Inject public IcebergMetadataFactory( TypeManager typeManager, JsonCodec commitTaskCodec, TrinoCatalogFactory catalogFactory, - HdfsEnvironment hdfsEnvironment) + TrinoFileSystemFactory fileSystemFactory) { this.typeManager = requireNonNull(typeManager, "typeManager is null"); // TODO consider providing TypeOperators in ConnectorContext to increase cache reuse this.typeOperators = new TypeOperators(); this.commitTaskCodec = requireNonNull(commitTaskCodec, "commitTaskCodec is null"); this.catalogFactory = requireNonNull(catalogFactory, "catalogFactory is null"); - this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null"); + this.fileSystemFactory = requireNonNull(fileSystemFactory, "fileSystemFactory is null"); } public IcebergMetadata create(ConnectorIdentity identity) { - return new IcebergMetadata(typeManager, typeOperators, commitTaskCodec, catalogFactory.create(identity), hdfsEnvironment); + return new IcebergMetadata(typeManager, typeOperators, commitTaskCodec, catalogFactory.create(identity), fileSystemFactory); } } diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergPageSink.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergPageSink.java index 87c4cadaab8e..0a4137f8e3fb 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergPageSink.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergPageSink.java @@ -17,9 +17,8 @@ import com.google.common.collect.Iterables; import io.airlift.json.JsonCodec; import io.airlift.slice.Slice; -import io.trino.plugin.hive.HdfsEnvironment; -import io.trino.plugin.hive.HdfsEnvironment.HdfsContext; import io.trino.plugin.iceberg.PartitionTransforms.ColumnTransform; +import io.trino.plugin.iceberg.io.TrinoFileSystem; import io.trino.spi.Page; import io.trino.spi.PageIndexer; import io.trino.spi.PageIndexerFactory; @@ -40,8 +39,6 @@ import io.trino.spi.type.UuidType; import io.trino.spi.type.VarbinaryType; import io.trino.spi.type.VarcharType; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.mapred.JobConf; import org.apache.iceberg.MetricsConfig; import org.apache.iceberg.PartitionField; import org.apache.iceberg.PartitionSpec; @@ -63,7 +60,6 @@ import static com.google.common.base.Verify.verify; import static com.google.common.collect.ImmutableList.toImmutableList; import static io.airlift.slice.Slices.wrappedBuffer; -import static io.trino.plugin.hive.util.ConfigurationUtils.toJobConf; import static io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_TOO_MANY_OPEN_PARTITIONS; import static io.trino.plugin.iceberg.PartitionTransforms.getColumnTransform; import static io.trino.plugin.iceberg.util.Timestamps.getTimestampTz; @@ -92,10 +88,7 @@ public class IcebergPageSink private final PartitionSpec partitionSpec; private final LocationProvider locationProvider; private final IcebergFileWriterFactory fileWriterFactory; - private final HdfsEnvironment hdfsEnvironment; - private final HdfsContext hdfsContext; - private final FileIoProvider fileIoProvider; - private final JobConf jobConf; + private final TrinoFileSystem fileSystem; private final JsonCodec jsonCodec; private final ConnectorSession session; private final IcebergFileFormat fileFormat; @@ -118,9 +111,7 @@ public IcebergPageSink( LocationProvider locationProvider, IcebergFileWriterFactory fileWriterFactory, PageIndexerFactory pageIndexerFactory, - HdfsEnvironment hdfsEnvironment, - HdfsContext hdfsContext, - FileIoProvider fileIoProvider, + TrinoFileSystem fileSystem, List inputColumns, JsonCodec jsonCodec, ConnectorSession session, @@ -133,10 +124,7 @@ public IcebergPageSink( this.partitionSpec = requireNonNull(partitionSpec, "partitionSpec is null"); this.locationProvider = requireNonNull(locationProvider, "locationProvider is null"); this.fileWriterFactory = requireNonNull(fileWriterFactory, "fileWriterFactory is null"); - this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null"); - this.hdfsContext = requireNonNull(hdfsContext, "hdfsContext is null"); - this.fileIoProvider = requireNonNull(fileIoProvider, "fileIoProvider is null"); - this.jobConf = toJobConf(hdfsEnvironment.getConfiguration(hdfsContext, new Path(locationProvider.newDataLocation("data-file")))); + this.fileSystem = requireNonNull(fileSystem, "fileSystem is null"); this.jsonCodec = requireNonNull(jsonCodec, "jsonCodec is null"); this.session = requireNonNull(session, "session is null"); this.fileFormat = requireNonNull(fileFormat, "fileFormat is null"); @@ -168,8 +156,7 @@ public long getValidationCpuNanos() @Override public CompletableFuture appendPage(Page page) { - hdfsEnvironment.doAs(session.getIdentity(), () -> doAppend(page)); - + doAppend(page); return NOT_BLOCKED; } @@ -337,16 +324,15 @@ private WriteContext createWriter(Optional partitionData) // prepend query id to a file name so we can determine which files were written by which query. This is needed for opportunistic cleanup of extra files // which may be present for successfully completing query in presence of failure recovery mechanisms. String fileName = fileFormat.toIceberg().addExtension(session.getQueryId() + "-" + randomUUID()); - Path outputPath = partitionData.map(partition -> new Path(locationProvider.newDataLocation(partitionSpec, partition, fileName))) - .orElse(new Path(locationProvider.newDataLocation(fileName))); + String outputPath = partitionData + .map(partition -> locationProvider.newDataLocation(partitionSpec, partition, fileName)) + .orElseGet(() -> locationProvider.newDataLocation(fileName)); IcebergFileWriter writer = fileWriterFactory.createDataFileWriter( + fileSystem, outputPath, outputSchema, - jobConf, session, - hdfsContext, - fileIoProvider, fileFormat, metricsConfig, storageProperties); @@ -442,10 +428,10 @@ private static List toPartitionColumns(List partitionData; - public WriteContext(IcebergFileWriter writer, Path path, Optional partitionData) + public WriteContext(IcebergFileWriter writer, String path, Optional partitionData) { this.writer = requireNonNull(writer, "writer is null"); this.path = requireNonNull(path, "path is null"); @@ -457,7 +443,7 @@ public IcebergFileWriter getWriter() return writer; } - public Path getPath() + public String getPath() { return path; } diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergPageSinkProvider.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergPageSinkProvider.java index cf62fca1ca3e..5254d5c52a43 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergPageSinkProvider.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergPageSinkProvider.java @@ -14,8 +14,7 @@ package io.trino.plugin.iceberg; import io.airlift.json.JsonCodec; -import io.trino.plugin.hive.HdfsEnvironment; -import io.trino.plugin.hive.HdfsEnvironment.HdfsContext; +import io.trino.plugin.iceberg.io.TrinoFileSystemFactory; import io.trino.plugin.iceberg.procedure.IcebergOptimizeHandle; import io.trino.plugin.iceberg.procedure.IcebergTableExecuteHandle; import io.trino.spi.PageIndexerFactory; @@ -45,27 +44,24 @@ public class IcebergPageSinkProvider implements ConnectorPageSinkProvider { - private final HdfsEnvironment hdfsEnvironment; + private final TrinoFileSystemFactory fileSystemFactory; private final JsonCodec jsonCodec; private final IcebergFileWriterFactory fileWriterFactory; private final PageIndexerFactory pageIndexerFactory; - private final FileIoProvider fileIoProvider; private final int maxOpenPartitions; @Inject public IcebergPageSinkProvider( - HdfsEnvironment hdfsEnvironment, + TrinoFileSystemFactory fileSystemFactory, JsonCodec jsonCodec, IcebergFileWriterFactory fileWriterFactory, PageIndexerFactory pageIndexerFactory, - FileIoProvider fileIoProvider, IcebergConfig config) { - this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null"); + this.fileSystemFactory = requireNonNull(fileSystemFactory, "fileSystemFactory is null"); this.jsonCodec = requireNonNull(jsonCodec, "jsonCodec is null"); this.fileWriterFactory = requireNonNull(fileWriterFactory, "fileWriterFactory is null"); this.pageIndexerFactory = requireNonNull(pageIndexerFactory, "pageIndexerFactory is null"); - this.fileIoProvider = requireNonNull(fileIoProvider, "fileIoProvider is null"); requireNonNull(config, "config is null"); this.maxOpenPartitions = config.getMaxPartitionsPerWriter(); } @@ -84,7 +80,6 @@ public ConnectorPageSink createPageSink(ConnectorTransactionHandle transactionHa private ConnectorPageSink createPageSink(ConnectorSession session, IcebergWritableTableHandle tableHandle) { - HdfsContext hdfsContext = new HdfsContext(session); Schema schema = SchemaParser.fromJson(tableHandle.getSchemaAsJson()); String partitionSpecJson = tableHandle.getPartitionsSpecsAsJson().get(tableHandle.getPartitionSpecId()); PartitionSpec partitionSpec = PartitionSpecParser.fromJson(schema, partitionSpecJson); @@ -95,9 +90,7 @@ private ConnectorPageSink createPageSink(ConnectorSession session, IcebergWritab locationProvider, fileWriterFactory, pageIndexerFactory, - hdfsEnvironment, - hdfsContext, - fileIoProvider, + fileSystemFactory.create(session), tableHandle.getInputColumns(), jsonCodec, session, @@ -112,7 +105,6 @@ public ConnectorPageSink createPageSink(ConnectorTransactionHandle transactionHa IcebergTableExecuteHandle executeHandle = (IcebergTableExecuteHandle) tableExecuteHandle; switch (executeHandle.getProcedureId()) { case OPTIMIZE: - HdfsContext hdfsContext = new HdfsContext(session); IcebergOptimizeHandle optimizeHandle = (IcebergOptimizeHandle) executeHandle.getProcedureHandle(); Schema schema = SchemaParser.fromJson(optimizeHandle.getSchemaAsJson()); PartitionSpec partitionSpec = PartitionSpecParser.fromJson(schema, optimizeHandle.getPartitionSpecAsJson()); @@ -124,9 +116,7 @@ public ConnectorPageSink createPageSink(ConnectorTransactionHandle transactionHa locationProvider, fileWriterFactory, pageIndexerFactory, - hdfsEnvironment, - hdfsContext, - fileIoProvider, + fileSystemFactory.create(session), optimizeHandle.getTableColumns(), jsonCodec, session, @@ -153,8 +143,7 @@ public ConnectorMergeSink createMergeSink(ConnectorTransactionHandle transaction return new IcebergMergeSink( locationProvider, fileWriterFactory, - hdfsEnvironment, - fileIoProvider, + fileSystemFactory.create(session), jsonCodec, session, tableHandle.getFileFormat(), diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergPageSourceProvider.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergPageSourceProvider.java index 69bf0f9322b9..5946bbd59a98 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergPageSourceProvider.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergPageSourceProvider.java @@ -35,7 +35,6 @@ import io.trino.parquet.Field; import io.trino.parquet.ParquetCorruptionException; import io.trino.parquet.ParquetDataSource; -import io.trino.parquet.ParquetDataSourceId; import io.trino.parquet.ParquetReaderOptions; import io.trino.parquet.RichColumnDescriptor; import io.trino.parquet.predicate.Predicate; @@ -43,16 +42,12 @@ import io.trino.parquet.reader.ParquetReader; import io.trino.plugin.base.classloader.ClassLoaderSafeUpdatablePageSource; import io.trino.plugin.hive.FileFormatDataSourceStats; -import io.trino.plugin.hive.HdfsEnvironment; -import io.trino.plugin.hive.HdfsEnvironment.HdfsContext; import io.trino.plugin.hive.ReaderColumns; import io.trino.plugin.hive.ReaderPageSource; import io.trino.plugin.hive.ReaderProjectionsAdapter; -import io.trino.plugin.hive.orc.HdfsOrcDataSource; import io.trino.plugin.hive.orc.OrcPageSource; import io.trino.plugin.hive.orc.OrcPageSource.ColumnAdaptation; import io.trino.plugin.hive.orc.OrcReaderConfig; -import io.trino.plugin.hive.parquet.HdfsParquetDataSource; import io.trino.plugin.hive.parquet.ParquetPageSource; import io.trino.plugin.hive.parquet.ParquetReaderConfig; import io.trino.plugin.iceberg.IcebergParquetColumnIOConverter.FieldContext; @@ -61,6 +56,11 @@ import io.trino.plugin.iceberg.delete.IcebergPositionDeletePageSink; import io.trino.plugin.iceberg.delete.PositionDeleteFilter; import io.trino.plugin.iceberg.delete.RowPredicate; +import io.trino.plugin.iceberg.io.TrinoFileSystem; +import io.trino.plugin.iceberg.io.TrinoFileSystemFactory; +import io.trino.plugin.iceberg.io.TrinoInputFile; +import io.trino.plugin.iceberg.io.TrinoOrcDataSource; +import io.trino.plugin.iceberg.io.TrinoParquetDataSource; import io.trino.spi.PageIndexerFactory; import io.trino.spi.TrinoException; import io.trino.spi.connector.ColumnHandle; @@ -77,7 +77,6 @@ import io.trino.spi.predicate.Range; import io.trino.spi.predicate.TupleDomain; import io.trino.spi.predicate.ValueSet; -import io.trino.spi.security.ConnectorIdentity; import io.trino.spi.type.ArrayType; import io.trino.spi.type.MapType; import io.trino.spi.type.RowType; @@ -86,12 +85,6 @@ import io.trino.spi.type.TypeManager; import org.apache.avro.file.DataFileStream; import org.apache.avro.generic.GenericDatumReader; -import org.apache.avro.generic.GenericRecord; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.BlockMissingException; import org.apache.iceberg.MetadataColumns; import org.apache.iceberg.PartitionSpec; @@ -99,7 +92,7 @@ import org.apache.iceberg.Schema; import org.apache.iceberg.SchemaParser; import org.apache.iceberg.avro.AvroSchemaUtil; -import org.apache.iceberg.io.FileIO; +import org.apache.iceberg.io.InputFile; import org.apache.iceberg.io.LocationProvider; import org.apache.iceberg.mapping.MappedField; import org.apache.iceberg.mapping.MappedFields; @@ -121,8 +114,6 @@ import java.io.IOException; import java.io.UncheckedIOException; -import java.net.URI; -import java.net.URLEncoder; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.HashMap; @@ -155,8 +146,6 @@ import static io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_BAD_DATA; import static io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_CANNOT_OPEN_SPLIT; import static io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_CURSOR_ERROR; -import static io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_FILESYSTEM_ERROR; -import static io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_INVALID_METADATA; import static io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_MISSING_DATA; import static io.trino.plugin.iceberg.IcebergMetadataColumn.FILE_MODIFIED_TIME; import static io.trino.plugin.iceberg.IcebergMetadataColumn.FILE_PATH; @@ -191,7 +180,6 @@ import static io.trino.spi.type.VarbinaryType.VARBINARY; import static io.trino.spi.type.VarcharType.VARCHAR; import static java.lang.String.format; -import static java.nio.charset.StandardCharsets.UTF_8; import static java.util.Locale.ENGLISH; import static java.util.Objects.requireNonNull; import static java.util.function.Predicate.not; @@ -211,12 +199,11 @@ public class IcebergPageSourceProvider { private static final String AVRO_FIELD_ID = "field-id"; - private final HdfsEnvironment hdfsEnvironment; + private final TrinoFileSystemFactory fileSystemFactory; private final FileFormatDataSourceStats fileFormatDataSourceStats; private final OrcReaderOptions orcReaderOptions; private final ParquetReaderOptions parquetReaderOptions; private final TypeManager typeManager; - private final FileIoProvider fileIoProvider; private final JsonCodec jsonCodec; private final IcebergFileWriterFactory fileWriterFactory; private final PageIndexerFactory pageIndexerFactory; @@ -224,23 +211,21 @@ public class IcebergPageSourceProvider @Inject public IcebergPageSourceProvider( - HdfsEnvironment hdfsEnvironment, + TrinoFileSystemFactory fileSystemFactory, FileFormatDataSourceStats fileFormatDataSourceStats, OrcReaderConfig orcReaderConfig, ParquetReaderConfig parquetReaderConfig, TypeManager typeManager, - FileIoProvider fileIoProvider, JsonCodec jsonCodec, IcebergFileWriterFactory fileWriterFactory, PageIndexerFactory pageIndexerFactory, IcebergConfig icebergConfig) { - this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null"); + this.fileSystemFactory = requireNonNull(fileSystemFactory, "fileSystemFactory is null"); this.fileFormatDataSourceStats = requireNonNull(fileFormatDataSourceStats, "fileFormatDataSourceStats is null"); this.orcReaderOptions = requireNonNull(orcReaderConfig, "orcReaderConfig is null").toOrcReaderOptions(); this.parquetReaderOptions = requireNonNull(parquetReaderConfig, "parquetReaderConfig is null").toParquetReaderOptions(); this.typeManager = requireNonNull(typeManager, "typeManager is null"); - this.fileIoProvider = requireNonNull(fileIoProvider, "fileIoProvider is null"); this.jsonCodec = requireNonNull(jsonCodec, "jsonCodec is null"); this.fileWriterFactory = requireNonNull(fileWriterFactory, "fileWriterFactory is null"); this.pageIndexerFactory = requireNonNull(pageIndexerFactory, "pageIndexerFactory is null"); @@ -316,33 +301,17 @@ else if (identity.getId() == TRINO_MERGE_PARTITION_DATA) { return new EmptyPageSource(); } - HdfsContext hdfsContext = new HdfsContext(session); - long fileSize = split.getFileSize(); - if (!isUseFileSizeFromMetadata(session)) { - fileSize = fileIoProvider.createFileIo(hdfsContext, session.getQueryId()) - .newInputFile(split.getPath()).getLength(); - } - OptionalLong fileModifiedTime = OptionalLong.empty(); - if (requiredColumns.stream().anyMatch(IcebergColumnHandle::isFileModifiedTimeColumn)) { - try { - FileStatus fileStatus = hdfsEnvironment.doAs( - session.getIdentity(), - () -> hdfsEnvironment.getFileSystem(hdfsContext, new Path(split.getPath())).getFileStatus(new Path(split.getPath()))); - fileModifiedTime = OptionalLong.of(fileStatus.getModificationTime()); - } - catch (IOException e) { - throw new TrinoException(ICEBERG_FILESYSTEM_ERROR, e); - } - } + TrinoFileSystem fileSystem = fileSystemFactory.create(session); + TrinoInputFile inputfile = isUseFileSizeFromMetadata(session) + ? fileSystem.newInputFile(split.getPath(), split.getFileSize()) + : fileSystem.newInputFile(split.getPath()); ReaderPageSourceWithRowPositions readerPageSourceWithRowPositions = createDataPageSource( session, - hdfsContext, - split.getPath(), + fileSystem, + inputfile, split.getStart(), split.getLength(), - fileSize, - fileModifiedTime, partitionSpec.specId(), split.getPartitionDataJson(), split.getFileFormat(), @@ -385,9 +354,7 @@ else if (identity.getId() == TRINO_MERGE_PARTITION_DATA) { partition, locationProvider, fileWriterFactory, - hdfsEnvironment, - hdfsContext, - fileIoProvider, + fileSystem, jsonCodec, session, split.getFileFormat(), @@ -402,9 +369,7 @@ else if (identity.getId() == TRINO_MERGE_PARTITION_DATA) { locationProvider, fileWriterFactory, pageIndexerFactory, - hdfsEnvironment, - hdfsContext, - fileIoProvider, + fileSystem, tableSchema.columns().stream().map(column -> getColumnHandle(column, typeManager)).collect(toImmutableList()), jsonCodec, session, @@ -523,14 +488,13 @@ private ConnectorPageSource openDeletes( List columns, TupleDomain tupleDomain) { + TrinoFileSystem fileSystem = fileSystemFactory.create(session); return createDataPageSource( session, - new HdfsContext(session), - delete.path().toString(), + fileSystem, + fileSystem.newInputFile(delete.path(), delete.fileSizeInBytes()), 0, delete.fileSizeInBytes(), - delete.fileSizeInBytes(), - OptionalLong.empty(), 0, "", IcebergFileFormat.fromIceberg(delete.format()), @@ -545,12 +509,10 @@ private ConnectorPageSource openDeletes( public ReaderPageSourceWithRowPositions createDataPageSource( ConnectorSession session, - HdfsContext hdfsContext, - String path, + TrinoFileSystem fileSystem, + TrinoInputFile inputFile, long start, long length, - long fileSize, - OptionalLong fileModifiedTime, int partitionSpecId, String partitionData, IcebergFileFormat fileFormat, @@ -560,19 +522,12 @@ public ReaderPageSourceWithRowPositions createDataPageSource( Optional nameMapping, Map> partitionKeys) { - Path hadoopPath = new Path(hadoopPath(path)); switch (fileFormat) { case ORC: return createOrcPageSource( - hdfsEnvironment, - session.getIdentity(), - hdfsEnvironment.getConfiguration(hdfsContext, hadoopPath), - path, - hadoopPath, + inputFile, start, length, - fileSize, - fileModifiedTime, partitionSpecId, partitionData, dataColumns, @@ -592,15 +547,9 @@ public ReaderPageSourceWithRowPositions createDataPageSource( partitionKeys); case PARQUET: return createParquetPageSource( - hdfsEnvironment, - session.getIdentity(), - hdfsEnvironment.getConfiguration(hdfsContext, hadoopPath), - path, - hadoopPath, + inputFile, start, length, - fileSize, - fileModifiedTime, partitionSpecId, partitionData, dataColumns, @@ -612,12 +561,10 @@ public ReaderPageSourceWithRowPositions createDataPageSource( partitionKeys); case AVRO: return createAvroPageSource( - fileIoProvider.createFileIo(hdfsContext, session.getQueryId()), - path, - hadoopPath, + fileSystem, + inputFile, start, length, - fileModifiedTime, partitionSpecId, partitionData, fileSchema.orElseThrow(), @@ -629,15 +576,9 @@ public ReaderPageSourceWithRowPositions createDataPageSource( } private static ReaderPageSourceWithRowPositions createOrcPageSource( - HdfsEnvironment hdfsEnvironment, - ConnectorIdentity identity, - Configuration configuration, - String path, - Path hadoopPath, + TrinoInputFile inputFile, long start, long length, - long fileSize, - OptionalLong fileModifiedTime, int partitionSpecId, String partitionData, List columns, @@ -650,14 +591,7 @@ private static ReaderPageSourceWithRowPositions createOrcPageSource( { OrcDataSource orcDataSource = null; try { - FileSystem fileSystem = hdfsEnvironment.getFileSystem(identity, hadoopPath, configuration); - FSDataInputStream inputStream = hdfsEnvironment.doAs(identity, () -> fileSystem.open(hadoopPath)); - orcDataSource = new HdfsOrcDataSource( - new OrcDataSourceId(hadoopPath.toString()), - fileSize, - options, - inputStream, - stats); + orcDataSource = new TrinoOrcDataSource(inputFile, options, stats); OrcReader reader = OrcReader.createOrcReader(orcDataSource, options) .orElseThrow(() -> new TrinoException(ICEBERG_BAD_DATA, "ORC file is zero length")); @@ -704,10 +638,10 @@ else if (partitionKeys.containsKey(column.getId())) { deserializePartitionValue(trinoType, partitionKeys.get(column.getId()).orElse(null), column.getName())))); } else if (column.isPathColumn()) { - columnAdaptations.add(ColumnAdaptation.constantColumn(nativeValueToBlock(FILE_PATH.getType(), utf8Slice(path)))); + columnAdaptations.add(ColumnAdaptation.constantColumn(nativeValueToBlock(FILE_PATH.getType(), utf8Slice(inputFile.location())))); } else if (column.isFileModifiedTimeColumn()) { - columnAdaptations.add(ColumnAdaptation.constantColumn(nativeValueToBlock(FILE_MODIFIED_TIME.getType(), packDateTimeWithZone(fileModifiedTime.orElseThrow(), UTC_KEY)))); + columnAdaptations.add(ColumnAdaptation.constantColumn(nativeValueToBlock(FILE_MODIFIED_TIME.getType(), packDateTimeWithZone(inputFile.modificationTime(), UTC_KEY)))); } else if (column.isUpdateRowIdColumn() || column.isMergeRowIdColumn()) { // $row_id is a composite of multiple physical columns. It is assembled by the IcebergPageSource @@ -795,7 +729,7 @@ else if (orcColumn != null) { if (e instanceof TrinoException) { throw (TrinoException) e; } - String message = format("Error opening Iceberg split %s (offset=%s, length=%s): %s", hadoopPath, start, length, e.getMessage()); + String message = format("Error opening Iceberg split %s (offset=%s, length=%s): %s", inputFile.location(), start, length, e.getMessage()); if (e instanceof BlockMissingException) { throw new TrinoException(ICEBERG_MISSING_DATA, message, e); } @@ -988,15 +922,9 @@ public OrcColumn get(String fieldName) } private static ReaderPageSourceWithRowPositions createParquetPageSource( - HdfsEnvironment hdfsEnvironment, - ConnectorIdentity identity, - Configuration configuration, - String path, - Path hadoopPath, + TrinoInputFile inputFile, long start, long length, - long fileSize, - OptionalLong fileModifiedTime, int partitionSpecId, String partitionData, List regularColumns, @@ -1010,11 +938,8 @@ private static ReaderPageSourceWithRowPositions createParquetPageSource( ParquetDataSource dataSource = null; try { - FileSystem fileSystem = hdfsEnvironment.getFileSystem(identity, hadoopPath, configuration); - FSDataInputStream inputStream = hdfsEnvironment.doAs(identity, () -> fileSystem.open(hadoopPath)); - dataSource = new HdfsParquetDataSource(new ParquetDataSourceId(hadoopPath.toString()), fileSize, inputStream, fileFormatDataSourceStats, options); - ParquetDataSource theDataSource = dataSource; // extra variable required for lambda below - ParquetMetadata parquetMetadata = hdfsEnvironment.doAs(identity, () -> MetadataReader.readFooter(theDataSource)); + dataSource = new TrinoParquetDataSource(inputFile, options, fileFormatDataSourceStats); + ParquetMetadata parquetMetadata = MetadataReader.readFooter(dataSource); FileMetaData fileMetaData = parquetMetadata.getFileMetaData(); MessageType fileSchema = fileMetaData.getSchema(); if (nameMapping.isPresent() && !ParquetSchemaUtil.hasIds(fileSchema)) { @@ -1089,10 +1014,10 @@ else if (partitionKeys.containsKey(column.getId())) { deserializePartitionValue(trinoType, partitionKeys.get(column.getId()).orElse(null), column.getName()))); } else if (column.isPathColumn()) { - constantPopulatingPageSourceBuilder.addConstantColumn(nativeValueToBlock(FILE_PATH.getType(), utf8Slice(path))); + constantPopulatingPageSourceBuilder.addConstantColumn(nativeValueToBlock(FILE_PATH.getType(), utf8Slice(inputFile.location()))); } else if (column.isFileModifiedTimeColumn()) { - constantPopulatingPageSourceBuilder.addConstantColumn(nativeValueToBlock(FILE_MODIFIED_TIME.getType(), packDateTimeWithZone(fileModifiedTime.orElseThrow(), UTC_KEY))); + constantPopulatingPageSourceBuilder.addConstantColumn(nativeValueToBlock(FILE_MODIFIED_TIME.getType(), packDateTimeWithZone(inputFile.modificationTime(), UTC_KEY))); } else if (column.isUpdateRowIdColumn() || column.isMergeRowIdColumn()) { // $row_id is a composite of multiple physical columns, it is assembled by the IcebergPageSource @@ -1156,7 +1081,7 @@ else if (column.getId() == TRINO_MERGE_PARTITION_DATA) { if (e instanceof TrinoException) { throw (TrinoException) e; } - String message = format("Error opening Iceberg split %s (offset=%s, length=%s): %s", hadoopPath, start, length, e.getMessage()); + String message = format("Error opening Iceberg split %s (offset=%s, length=%s): %s", inputFile.location(), start, length, e.getMessage()); if (e instanceof ParquetCorruptionException) { throw new TrinoException(ICEBERG_BAD_DATA, message, e); @@ -1169,13 +1094,11 @@ else if (column.getId() == TRINO_MERGE_PARTITION_DATA) { } } - private ReaderPageSourceWithRowPositions createAvroPageSource( - FileIO fileIo, - String path, - Path hadoopPath, + private static ReaderPageSourceWithRowPositions createAvroPageSource( + TrinoFileSystem fileSystem, + TrinoInputFile inputFile, long start, long length, - OptionalLong fileModifiedTime, int partitionSpecId, String partitionData, Schema fileSchema, @@ -1191,8 +1114,20 @@ private ReaderPageSourceWithRowPositions createAvroPageSource( .map(readerColumns -> (List) readerColumns.get().stream().map(IcebergColumnHandle.class::cast).collect(toImmutableList())) .orElse(columns); + InputFile file; + OptionalLong fileModifiedTime = OptionalLong.empty(); + try { + file = fileSystem.toFileIo().newInputFile(inputFile.location(), inputFile.length()); + if (readColumns.stream().anyMatch(IcebergColumnHandle::isFileModifiedTimeColumn)) { + fileModifiedTime = OptionalLong.of(inputFile.modificationTime()); + } + } + catch (IOException e) { + throw new TrinoException(ICEBERG_CANNOT_OPEN_SPLIT, e); + } + // The column orders in the generated schema might be different from the original order - try (DataFileStream avroFileReader = new DataFileStream<>(fileIo.newInputFile(hadoopPath.toString()).newStream(), new GenericDatumReader<>())) { + try (DataFileStream avroFileReader = new DataFileStream<>(file.newStream(), new GenericDatumReader<>())) { org.apache.avro.Schema avroSchema = avroFileReader.getSchema(); List fileFields = avroSchema.getFields(); if (nameMapping.isPresent() && fileFields.stream().noneMatch(IcebergPageSourceProvider::hasId)) { @@ -1212,7 +1147,7 @@ private ReaderPageSourceWithRowPositions createAvroPageSource( org.apache.avro.Schema.Field field = fileColumnsByIcebergId.get(column.getId()); if (column.isPathColumn()) { - constantPopulatingPageSourceBuilder.addConstantColumn(nativeValueToBlock(FILE_PATH.getType(), utf8Slice(path))); + constantPopulatingPageSourceBuilder.addConstantColumn(nativeValueToBlock(FILE_PATH.getType(), utf8Slice(file.location()))); } else if (column.isFileModifiedTimeColumn()) { constantPopulatingPageSourceBuilder.addConstantColumn(nativeValueToBlock(FILE_MODIFIED_TIME.getType(), packDateTimeWithZone(fileModifiedTime.orElseThrow(), UTC_KEY))); @@ -1246,8 +1181,7 @@ else if (field == null) { return new ReaderPageSourceWithRowPositions( new ReaderPageSource( constantPopulatingPageSourceBuilder.build(new IcebergAvroPageSource( - fileIo, - hadoopPath.toString(), + file, start, length, fileSchema, @@ -1427,20 +1361,6 @@ private static TrinoException handleException(OrcDataSourceId dataSourceId, Exce return new TrinoException(ICEBERG_CURSOR_ERROR, format("Failed to read ORC file: %s", dataSourceId), exception); } - private static String hadoopPath(String path) - { - // hack to preserve the original path for S3 if necessary - Path hadoopPath = new Path(path); - if ("s3".equals(hadoopPath.toUri().getScheme()) && !path.equals(hadoopPath.toString())) { - if (hadoopPath.toUri().getFragment() != null) { - throw new TrinoException(ICEBERG_INVALID_METADATA, "Unexpected URI fragment in path: " + path); - } - URI uri = URI.create(path); - return uri + "#" + URLEncoder.encode(uri.getPath(), UTF_8); - } - return path; - } - private static final class ReaderPageSourceWithRowPositions { private final ReaderPageSource readerPageSource; diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergParquetFileWriter.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergParquetFileWriter.java index dfeb78ce704d..01a03c702114 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergParquetFileWriter.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergParquetFileWriter.java @@ -14,14 +14,12 @@ package io.trino.plugin.iceberg; import io.trino.parquet.writer.ParquetWriterOptions; -import io.trino.plugin.hive.HdfsEnvironment; -import io.trino.plugin.hive.HdfsEnvironment.HdfsContext; import io.trino.plugin.hive.parquet.ParquetFileWriter; +import io.trino.plugin.iceberg.io.TrinoFileSystem; import io.trino.spi.type.Type; -import org.apache.hadoop.fs.Path; import org.apache.iceberg.Metrics; import org.apache.iceberg.MetricsConfig; -import org.apache.iceberg.parquet.ParquetUtil; +import org.apache.iceberg.io.InputFile; import org.apache.parquet.hadoop.metadata.CompressionCodecName; import org.apache.parquet.schema.MessageType; @@ -32,15 +30,15 @@ import java.util.concurrent.Callable; import static java.util.Objects.requireNonNull; +import static org.apache.iceberg.parquet.ParquetUtil.fileMetrics; public class IcebergParquetFileWriter extends ParquetFileWriter implements IcebergFileWriter { private final MetricsConfig metricsConfig; - private final Path outputPath; - private final HdfsEnvironment hdfsEnvironment; - private final HdfsContext hdfsContext; + private final String outputPath; + private final TrinoFileSystem fileSystem; public IcebergParquetFileWriter( MetricsConfig metricsConfig, @@ -53,9 +51,8 @@ public IcebergParquetFileWriter( int[] fileInputColumnIndexes, CompressionCodecName compressionCodecName, String trinoVersion, - Path outputPath, - HdfsEnvironment hdfsEnvironment, - HdfsContext hdfsContext) + String outputPath, + TrinoFileSystem fileSystem) { super(outputStream, rollbackAction, @@ -69,13 +66,13 @@ public IcebergParquetFileWriter( Optional.empty()); this.metricsConfig = requireNonNull(metricsConfig, "metricsConfig is null"); this.outputPath = requireNonNull(outputPath, "outputPath is null"); - this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null"); - this.hdfsContext = requireNonNull(hdfsContext, "hdfsContext is null"); + this.fileSystem = requireNonNull(fileSystem, "fileSystem is null"); } @Override public Metrics getMetrics() { - return hdfsEnvironment.doAs(hdfsContext.getIdentity(), () -> ParquetUtil.fileMetrics(new HdfsInputFile(outputPath, hdfsEnvironment, hdfsContext), metricsConfig)); + InputFile inputFile = fileSystem.toFileIo().newInputFile(outputPath); + return fileMetrics(inputFile, metricsConfig); } } diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/InternalIcebergConnectorFactory.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/InternalIcebergConnectorFactory.java index ab25e3216c4c..16481713f2f0 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/InternalIcebergConnectorFactory.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/InternalIcebergConnectorFactory.java @@ -37,6 +37,8 @@ import io.trino.plugin.hive.metastore.HiveMetastore; import io.trino.plugin.hive.s3.HiveS3Module; import io.trino.plugin.iceberg.catalog.IcebergCatalogModule; +import io.trino.plugin.iceberg.io.TrinoFileSystemFactory; +import io.trino.plugin.iceberg.io.hdfs.HdfsFileSystemFactory; import io.trino.spi.NodeManager; import io.trino.spi.PageIndexerFactory; import io.trino.spi.classloader.ThreadContextClassLoader; @@ -73,7 +75,7 @@ public static Connector createConnector( ConnectorContext context, Module module, Optional metastore, - Optional fileIoProvider) + Optional fileSystemFactory) { ClassLoader classLoader = InternalIcebergConnectorFactory.class.getClassLoader(); try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(classLoader)) { @@ -91,15 +93,15 @@ public static Connector createConnector( new HiveAzureModule(), new HdfsAuthenticationModule(), new MBeanServerModule(), - fileIoProvider - .map(provider -> binder -> binder.bind(FileIoProvider.class).toInstance(provider)) - .orElse(binder -> binder.bind(FileIoProvider.class).to(HdfsFileIoProvider.class).in(SINGLETON)), binder -> { binder.bind(NodeVersion.class).toInstance(new NodeVersion(context.getNodeManager().getCurrentNode().getVersion())); binder.bind(NodeManager.class).toInstance(context.getNodeManager()); binder.bind(TypeManager.class).toInstance(context.getTypeManager()); binder.bind(PageIndexerFactory.class).toInstance(context.getPageIndexerFactory()); binder.bind(CatalogName.class).toInstance(new CatalogName(catalogName)); + fileSystemFactory.ifPresentOrElse( + factory -> binder.bind(TrinoFileSystemFactory.class).toInstance(factory), + () -> binder.bind(TrinoFileSystemFactory.class).to(HdfsFileSystemFactory.class).in(SINGLETON)); }, module); diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/AbstractTrinoCatalog.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/AbstractTrinoCatalog.java index 594fd10d50fe..3755c2f55622 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/AbstractTrinoCatalog.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/AbstractTrinoCatalog.java @@ -15,13 +15,13 @@ import com.google.common.collect.ImmutableMap; import io.trino.plugin.base.CatalogName; -import io.trino.plugin.hive.HdfsEnvironment; import io.trino.plugin.hive.HiveMetadata; import io.trino.plugin.hive.HiveViewNotSupportedException; import io.trino.plugin.hive.ViewReaderUtil; import io.trino.plugin.iceberg.ColumnIdentity; import io.trino.plugin.iceberg.IcebergMaterializedViewDefinition; import io.trino.plugin.iceberg.IcebergUtil; +import io.trino.plugin.iceberg.io.TrinoFileSystem; import io.trino.spi.TrinoException; import io.trino.spi.connector.CatalogSchemaTableName; import io.trino.spi.connector.ColumnMetadata; @@ -33,8 +33,6 @@ import io.trino.spi.type.TypeManager; import net.jodah.failsafe.Failsafe; import net.jodah.failsafe.RetryPolicy; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.Schema; import org.apache.iceberg.Table; @@ -191,15 +189,10 @@ protected String createNewTableName(String baseTableName) return tableName; } - protected void deleteTableDirectory( - ConnectorSession session, - SchemaTableName schemaTableName, - HdfsEnvironment hdfsEnvironment, - Path tableLocation) + protected void deleteTableDirectory(TrinoFileSystem fileSystem, SchemaTableName schemaTableName, String tableLocation) { try { - FileSystem fileSystem = hdfsEnvironment.getFileSystem(new HdfsEnvironment.HdfsContext(session), tableLocation); - fileSystem.delete(tableLocation, true); + fileSystem.deleteDirectory(tableLocation); } catch (IOException e) { throw new TrinoException(ICEBERG_FILESYSTEM_ERROR, format("Failed to delete directory %s of the table %s", tableLocation, schemaTableName), e); diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/file/FileMetastoreTableOperationsProvider.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/file/FileMetastoreTableOperationsProvider.java index 09f569a7962a..91c1ad0cd4df 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/file/FileMetastoreTableOperationsProvider.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/file/FileMetastoreTableOperationsProvider.java @@ -13,12 +13,11 @@ */ package io.trino.plugin.iceberg.catalog.file; -import io.trino.plugin.hive.HdfsEnvironment.HdfsContext; -import io.trino.plugin.iceberg.FileIoProvider; import io.trino.plugin.iceberg.catalog.IcebergTableOperations; import io.trino.plugin.iceberg.catalog.IcebergTableOperationsProvider; import io.trino.plugin.iceberg.catalog.TrinoCatalog; import io.trino.plugin.iceberg.catalog.hms.TrinoHiveCatalog; +import io.trino.plugin.iceberg.io.TrinoFileSystemFactory; import io.trino.spi.connector.ConnectorSession; import javax.inject.Inject; @@ -30,12 +29,12 @@ public class FileMetastoreTableOperationsProvider implements IcebergTableOperationsProvider { - private final FileIoProvider fileIoProvider; + private final TrinoFileSystemFactory fileSystemFactory; @Inject - public FileMetastoreTableOperationsProvider(FileIoProvider fileIoProvider) + public FileMetastoreTableOperationsProvider(TrinoFileSystemFactory fileSystemFactory) { - this.fileIoProvider = requireNonNull(fileIoProvider, "fileIoProvider is null"); + this.fileSystemFactory = requireNonNull(fileSystemFactory, "fileSystemFactory is null"); } @Override @@ -48,7 +47,7 @@ public IcebergTableOperations createTableOperations( Optional location) { return new FileMetastoreTableOperations( - fileIoProvider.createFileIo(new HdfsContext(session), session.getQueryId()), + fileSystemFactory.create(session).toFileIo(), ((TrinoHiveCatalog) catalog).getMetastore(), session, database, diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/GlueIcebergTableOperationsProvider.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/GlueIcebergTableOperationsProvider.java index d603db336050..1f28b67d5c5d 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/GlueIcebergTableOperationsProvider.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/GlueIcebergTableOperationsProvider.java @@ -15,13 +15,12 @@ import com.amazonaws.auth.AWSCredentialsProvider; import com.amazonaws.services.glue.AWSGlueAsync; -import io.trino.plugin.hive.HdfsEnvironment.HdfsContext; import io.trino.plugin.hive.metastore.glue.GlueHiveMetastoreConfig; import io.trino.plugin.hive.metastore.glue.GlueMetastoreStats; -import io.trino.plugin.iceberg.FileIoProvider; import io.trino.plugin.iceberg.catalog.IcebergTableOperations; import io.trino.plugin.iceberg.catalog.IcebergTableOperationsProvider; import io.trino.plugin.iceberg.catalog.TrinoCatalog; +import io.trino.plugin.iceberg.io.TrinoFileSystemFactory; import io.trino.spi.connector.ConnectorSession; import javax.inject.Inject; @@ -34,14 +33,18 @@ public class GlueIcebergTableOperationsProvider implements IcebergTableOperationsProvider { - private final FileIoProvider fileIoProvider; + private final TrinoFileSystemFactory fileSystemFactory; private final AWSGlueAsync glueClient; private final GlueMetastoreStats stats; @Inject - public GlueIcebergTableOperationsProvider(FileIoProvider fileIoProvider, GlueMetastoreStats stats, GlueHiveMetastoreConfig glueConfig, AWSCredentialsProvider credentialsProvider) + public GlueIcebergTableOperationsProvider( + TrinoFileSystemFactory fileSystemFactory, + GlueMetastoreStats stats, + GlueHiveMetastoreConfig glueConfig, + AWSCredentialsProvider credentialsProvider) { - this.fileIoProvider = requireNonNull(fileIoProvider, "fileIoProvider is null"); + this.fileSystemFactory = requireNonNull(fileSystemFactory, "fileSystemFactory is null"); this.stats = requireNonNull(stats, "stats is null"); requireNonNull(glueConfig, "glueConfig is null"); requireNonNull(credentialsProvider, "credentialsProvider is null"); @@ -60,7 +63,7 @@ public IcebergTableOperations createTableOperations( return new GlueIcebergTableOperations( glueClient, stats, - fileIoProvider.createFileIo(new HdfsContext(session), session.getQueryId()), + fileSystemFactory.create(session).toFileIo(), session, database, table, diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/TrinoGlueCatalog.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/TrinoGlueCatalog.java index c5568153f8ee..058013f81fd3 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/TrinoGlueCatalog.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/TrinoGlueCatalog.java @@ -35,12 +35,12 @@ import com.google.common.collect.ImmutableMap; import io.airlift.log.Logger; import io.trino.plugin.base.CatalogName; -import io.trino.plugin.hive.HdfsEnvironment; import io.trino.plugin.hive.SchemaAlreadyExistsException; import io.trino.plugin.hive.ViewAlreadyExistsException; import io.trino.plugin.hive.metastore.glue.GlueMetastoreStats; import io.trino.plugin.iceberg.catalog.AbstractTrinoCatalog; import io.trino.plugin.iceberg.catalog.IcebergTableOperationsProvider; +import io.trino.plugin.iceberg.io.TrinoFileSystemFactory; import io.trino.spi.TrinoException; import io.trino.spi.connector.CatalogSchemaTableName; import io.trino.spi.connector.ConnectorMaterializedViewDefinition; @@ -111,7 +111,7 @@ public class TrinoGlueCatalog { private static final Logger LOG = Logger.get(TrinoGlueCatalog.class); - private final HdfsEnvironment hdfsEnvironment; + private TrinoFileSystemFactory fileSystemFactory; private final Optional defaultSchemaLocation; private final AWSGlueAsync glueClient; private final GlueMetastoreStats stats; @@ -120,7 +120,7 @@ public class TrinoGlueCatalog public TrinoGlueCatalog( CatalogName catalogName, - HdfsEnvironment hdfsEnvironment, + TrinoFileSystemFactory fileSystemFactory, TypeManager typeManager, IcebergTableOperationsProvider tableOperationsProvider, String trinoVersion, @@ -130,7 +130,7 @@ public TrinoGlueCatalog( boolean useUniqueTableLocation) { super(catalogName, typeManager, tableOperationsProvider, trinoVersion, useUniqueTableLocation); - this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null"); + this.fileSystemFactory = requireNonNull(fileSystemFactory, "fileSystemFactory is null"); this.glueClient = requireNonNull(glueClient, "glueClient is null"); this.stats = requireNonNull(stats, "stats is null"); this.defaultSchemaLocation = requireNonNull(defaultSchemaLocation, "defaultSchemaLocation is null"); @@ -311,7 +311,7 @@ public void dropTable(ConnectorSession session, SchemaTableName schemaTableName) throw new TrinoException(HIVE_METASTORE_ERROR, e); } dropTableData(table.io(), table.operations().current()); - deleteTableDirectory(session, schemaTableName, hdfsEnvironment, new Path(table.location())); + deleteTableDirectory(fileSystemFactory.create(session), schemaTableName, table.location()); } @Override diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/TrinoGlueCatalogFactory.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/TrinoGlueCatalogFactory.java index 2c9c6758b26a..ea188bdb1404 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/TrinoGlueCatalogFactory.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/TrinoGlueCatalogFactory.java @@ -16,7 +16,6 @@ import com.amazonaws.auth.AWSCredentialsProvider; import com.amazonaws.services.glue.AWSGlueAsync; import io.trino.plugin.base.CatalogName; -import io.trino.plugin.hive.HdfsEnvironment; import io.trino.plugin.hive.NodeVersion; import io.trino.plugin.hive.metastore.glue.GlueHiveMetastoreConfig; import io.trino.plugin.hive.metastore.glue.GlueMetastoreStats; @@ -24,6 +23,7 @@ import io.trino.plugin.iceberg.catalog.IcebergTableOperationsProvider; import io.trino.plugin.iceberg.catalog.TrinoCatalog; import io.trino.plugin.iceberg.catalog.TrinoCatalogFactory; +import io.trino.plugin.iceberg.io.TrinoFileSystemFactory; import io.trino.spi.security.ConnectorIdentity; import io.trino.spi.type.TypeManager; import org.weakref.jmx.Flatten; @@ -40,7 +40,7 @@ public class TrinoGlueCatalogFactory implements TrinoCatalogFactory { private final CatalogName catalogName; - private final HdfsEnvironment hdfsEnvironment; + private final TrinoFileSystemFactory fileSystemFactory; private final TypeManager typeManager; private final IcebergTableOperationsProvider tableOperationsProvider; private final String trinoVersion; @@ -52,7 +52,7 @@ public class TrinoGlueCatalogFactory @Inject public TrinoGlueCatalogFactory( CatalogName catalogName, - HdfsEnvironment hdfsEnvironment, + TrinoFileSystemFactory fileSystemFactory, TypeManager typeManager, IcebergTableOperationsProvider tableOperationsProvider, NodeVersion nodeVersion, @@ -62,7 +62,7 @@ public TrinoGlueCatalogFactory( GlueMetastoreStats stats) { this.catalogName = requireNonNull(catalogName, "catalogName is null"); - this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null"); + this.fileSystemFactory = requireNonNull(fileSystemFactory, "fileSystemFactory is null"); this.typeManager = requireNonNull(typeManager, "typeManager is null"); this.tableOperationsProvider = requireNonNull(tableOperationsProvider, "tableOperationsProvider is null"); this.trinoVersion = requireNonNull(nodeVersion, "nodeVersion is null").toString(); @@ -87,7 +87,7 @@ public TrinoCatalog create(ConnectorIdentity identity) { return new TrinoGlueCatalog( catalogName, - hdfsEnvironment, + fileSystemFactory, typeManager, tableOperationsProvider, trinoVersion, diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/hms/HiveMetastoreTableOperationsProvider.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/hms/HiveMetastoreTableOperationsProvider.java index 8ee2b606914b..5815c746cacd 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/hms/HiveMetastoreTableOperationsProvider.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/hms/HiveMetastoreTableOperationsProvider.java @@ -13,12 +13,11 @@ */ package io.trino.plugin.iceberg.catalog.hms; -import io.trino.plugin.hive.HdfsEnvironment.HdfsContext; import io.trino.plugin.hive.metastore.thrift.ThriftMetastoreFactory; -import io.trino.plugin.iceberg.FileIoProvider; import io.trino.plugin.iceberg.catalog.IcebergTableOperations; import io.trino.plugin.iceberg.catalog.IcebergTableOperationsProvider; import io.trino.plugin.iceberg.catalog.TrinoCatalog; +import io.trino.plugin.iceberg.io.TrinoFileSystemFactory; import io.trino.spi.connector.ConnectorSession; import javax.inject.Inject; @@ -30,13 +29,13 @@ public class HiveMetastoreTableOperationsProvider implements IcebergTableOperationsProvider { - private final FileIoProvider fileIoProvider; + private final TrinoFileSystemFactory fileSystemFactory; private final ThriftMetastoreFactory thriftMetastoreFactory; @Inject - public HiveMetastoreTableOperationsProvider(FileIoProvider fileIoProvider, ThriftMetastoreFactory thriftMetastoreFactory) + public HiveMetastoreTableOperationsProvider(TrinoFileSystemFactory fileSystemFactory, ThriftMetastoreFactory thriftMetastoreFactory) { - this.fileIoProvider = requireNonNull(fileIoProvider, "fileIoProvider is null"); + this.fileSystemFactory = requireNonNull(fileSystemFactory, "fileSystemFactory is null"); this.thriftMetastoreFactory = requireNonNull(thriftMetastoreFactory, "thriftMetastoreFactory is null"); } @@ -50,7 +49,7 @@ public IcebergTableOperations createTableOperations( Optional location) { return new HiveMetastoreTableOperations( - fileIoProvider.createFileIo(new HdfsContext(session), session.getQueryId()), + fileSystemFactory.create(session).toFileIo(), ((TrinoHiveCatalog) catalog).getMetastore(), thriftMetastoreFactory.createMetastore(Optional.of(session.getIdentity())), session, diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/hms/TrinoHiveCatalog.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/hms/TrinoHiveCatalog.java index 963bb5059e44..b31d5d82a3c4 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/hms/TrinoHiveCatalog.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/hms/TrinoHiveCatalog.java @@ -17,8 +17,6 @@ import com.google.common.collect.ImmutableSet; import io.airlift.log.Logger; import io.trino.plugin.base.CatalogName; -import io.trino.plugin.hive.HdfsEnvironment; -import io.trino.plugin.hive.HdfsEnvironment.HdfsContext; import io.trino.plugin.hive.HiveSchemaProperties; import io.trino.plugin.hive.TableAlreadyExistsException; import io.trino.plugin.hive.ViewAlreadyExistsException; @@ -31,6 +29,7 @@ import io.trino.plugin.iceberg.ColumnIdentity; import io.trino.plugin.iceberg.catalog.AbstractTrinoCatalog; import io.trino.plugin.iceberg.catalog.IcebergTableOperationsProvider; +import io.trino.plugin.iceberg.io.TrinoFileSystemFactory; import io.trino.spi.TrinoException; import io.trino.spi.connector.CatalogSchemaTableName; import io.trino.spi.connector.ConnectorMaterializedViewDefinition; @@ -43,8 +42,6 @@ import io.trino.spi.connector.ViewNotFoundException; import io.trino.spi.security.TrinoPrincipal; import io.trino.spi.type.TypeManager; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; import org.apache.iceberg.BaseTable; import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.Schema; @@ -62,6 +59,7 @@ import static com.google.common.base.Preconditions.checkState; import static com.google.common.collect.ImmutableList.toImmutableList; +import static io.trino.plugin.hive.HiveErrorCode.HIVE_DATABASE_LOCATION_ERROR; import static io.trino.plugin.hive.HiveErrorCode.HIVE_INVALID_METADATA; import static io.trino.plugin.hive.HiveMetadata.STORAGE_TABLE; import static io.trino.plugin.hive.HiveMetadata.TABLE_COMMENT; @@ -75,7 +73,6 @@ import static io.trino.plugin.hive.metastore.PrincipalPrivileges.NO_PRIVILEGES; import static io.trino.plugin.hive.metastore.StorageFormat.VIEW_STORAGE_FORMAT; import static io.trino.plugin.hive.util.HiveUtil.isHiveSystemSchema; -import static io.trino.plugin.hive.util.HiveWriteUtils.getTableDefaultLocation; import static io.trino.plugin.iceberg.IcebergMaterializedViewAdditionalProperties.STORAGE_SCHEMA; import static io.trino.plugin.iceberg.IcebergMaterializedViewDefinition.encodeMaterializedViewData; import static io.trino.plugin.iceberg.IcebergMaterializedViewDefinition.fromConnectorMaterializedViewDefinition; @@ -91,6 +88,8 @@ import static io.trino.spi.StandardErrorCode.SCHEMA_NOT_EMPTY; import static io.trino.spi.StandardErrorCode.UNSUPPORTED_TABLE_TYPE; import static io.trino.spi.connector.SchemaTableName.schemaTableName; +import static java.lang.String.format; +import static java.lang.String.join; import static java.util.Objects.requireNonNull; import static org.apache.hadoop.hive.metastore.TableType.VIRTUAL_VIEW; import static org.apache.iceberg.CatalogUtil.dropTableData; @@ -102,7 +101,7 @@ public class TrinoHiveCatalog public static final String DEPENDS_ON_TABLES = "dependsOnTables"; private final CachingHiveMetastore metastore; - private final HdfsEnvironment hdfsEnvironment; + private final TrinoFileSystemFactory fileSystemFactory; private final boolean isUsingSystemSecurity; private final boolean deleteSchemaLocationsFallback; @@ -111,7 +110,7 @@ public class TrinoHiveCatalog public TrinoHiveCatalog( CatalogName catalogName, CachingHiveMetastore metastore, - HdfsEnvironment hdfsEnvironment, + TrinoFileSystemFactory fileSystemFactory, TypeManager typeManager, IcebergTableOperationsProvider tableOperationsProvider, String trinoVersion, @@ -121,7 +120,7 @@ public TrinoHiveCatalog( { super(catalogName, typeManager, tableOperationsProvider, trinoVersion, useUniqueTableLocation); this.metastore = requireNonNull(metastore, "metastore is null"); - this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null"); + this.fileSystemFactory = requireNonNull(fileSystemFactory, "fileSystemFactory is null"); this.isUsingSystemSecurity = isUsingSystemSecurity; this.deleteSchemaLocationsFallback = deleteSchemaLocationsFallback; } @@ -166,9 +165,9 @@ public void createNamespace(ConnectorSession session, String namespace, Map location = getSchemaLocation(properties).map(uri -> { try { - hdfsEnvironment.getFileSystem(new HdfsContext(session), new Path(uri)); + fileSystemFactory.create(session).newInputFile(uri).exists(); } - catch (IOException | IllegalArgumentException e) { + catch (IOException e) { throw new TrinoException(INVALID_SCHEMA_PROPERTY, "Invalid location URI: " + uri, e); } return uri; @@ -192,18 +191,16 @@ public void dropNamespace(ConnectorSession session, String namespace) throw new TrinoException(SCHEMA_NOT_EMPTY, "Schema not empty: " + namespace); } - Optional location = metastore.getDatabase(namespace) + Optional location = metastore.getDatabase(namespace) .orElseThrow(() -> new SchemaNotFoundException(namespace)) - .getLocation() - .map(Path::new); + .getLocation(); // If we see files in the schema location, don't delete it. // If we see no files, request deletion. // If we fail to check the schema location, behave according to fallback. boolean deleteData = location.map(path -> { - HdfsContext context = new HdfsContext(session); - try (FileSystem fs = hdfsEnvironment.getFileSystem(context, path)) { - return !fs.listLocatedStatus(path).hasNext(); + try { + return !fileSystemFactory.create(session).listFiles(path).hasNext(); } catch (IOException e) { log.warn(e, "Could not check schema directory '%s'", path); @@ -271,7 +268,7 @@ public void dropTable(ConnectorSession session, SchemaTableName schemaTableName) // Use the Iceberg routine for dropping the table data because the data files // of the Iceberg table may be located in different locations dropTableData(table.io(), metadata); - deleteTableDirectory(session, schemaTableName, hdfsEnvironment, new Path(metastoreTable.getStorage().getLocation())); + deleteTableDirectory(fileSystemFactory.create(session), schemaTableName, metastoreTable.getStorage().getLocation()); } @Override @@ -335,8 +332,9 @@ public String defaultTableLocation(ConnectorSession session, SchemaTableName sch Database database = metastore.getDatabase(schemaTableName.getSchemaName()) .orElseThrow(() -> new SchemaNotFoundException(schemaTableName.getSchemaName())); String tableNameForLocation = createNewTableName(schemaTableName.getTableName()); - return getTableDefaultLocation(database, new HdfsEnvironment.HdfsContext(session), hdfsEnvironment, - schemaTableName.getSchemaName(), tableNameForLocation).toString(); + String location = database.getLocation().orElseThrow(() -> + new TrinoException(HIVE_DATABASE_LOCATION_ERROR, format("Database '%s' location is not set", schemaTableName.getSchemaName()))); + return join("/", location, tableNameForLocation); } @Override diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/hms/TrinoHiveCatalogFactory.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/hms/TrinoHiveCatalogFactory.java index 018618a618db..0c92aa21c1c0 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/hms/TrinoHiveCatalogFactory.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/hms/TrinoHiveCatalogFactory.java @@ -14,7 +14,6 @@ package io.trino.plugin.iceberg.catalog.hms; import io.trino.plugin.base.CatalogName; -import io.trino.plugin.hive.HdfsEnvironment; import io.trino.plugin.hive.NodeVersion; import io.trino.plugin.hive.metastore.HiveMetastoreFactory; import io.trino.plugin.iceberg.IcebergConfig; @@ -22,6 +21,7 @@ import io.trino.plugin.iceberg.catalog.IcebergTableOperationsProvider; import io.trino.plugin.iceberg.catalog.TrinoCatalog; import io.trino.plugin.iceberg.catalog.TrinoCatalogFactory; +import io.trino.plugin.iceberg.io.TrinoFileSystemFactory; import io.trino.spi.security.ConnectorIdentity; import io.trino.spi.type.TypeManager; @@ -38,7 +38,7 @@ public class TrinoHiveCatalogFactory { private final CatalogName catalogName; private final HiveMetastoreFactory metastoreFactory; - private final HdfsEnvironment hdfsEnvironment; + private final TrinoFileSystemFactory fileSystemFactory; private final TypeManager typeManager; private final IcebergTableOperationsProvider tableOperationsProvider; private final String trinoVersion; @@ -51,7 +51,7 @@ public TrinoHiveCatalogFactory( IcebergConfig config, CatalogName catalogName, HiveMetastoreFactory metastoreFactory, - HdfsEnvironment hdfsEnvironment, + TrinoFileSystemFactory fileSystemFactory, TypeManager typeManager, IcebergTableOperationsProvider tableOperationsProvider, NodeVersion nodeVersion, @@ -59,7 +59,7 @@ public TrinoHiveCatalogFactory( { this.catalogName = requireNonNull(catalogName, "catalogName is null"); this.metastoreFactory = requireNonNull(metastoreFactory, "metastoreFactory is null"); - this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null"); + this.fileSystemFactory = requireNonNull(fileSystemFactory, "fileSystemFactory is null"); this.typeManager = requireNonNull(typeManager, "typeManager is null"); this.tableOperationsProvider = requireNonNull(tableOperationsProvider, "tableOperationProvider is null"); this.trinoVersion = requireNonNull(nodeVersion, "trinoVersion is null").toString(); @@ -76,7 +76,7 @@ public TrinoCatalog create(ConnectorIdentity identity) return new TrinoHiveCatalog( catalogName, memoizeMetastore(metastoreFactory.createMetastore(Optional.of(identity)), 1000), - hdfsEnvironment, + fileSystemFactory, typeManager, tableOperationsProvider, trinoVersion, diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/delete/DeleteFile.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/delete/DeleteFile.java index 4dc9c0e3c050..feb28ac3afff 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/delete/DeleteFile.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/delete/DeleteFile.java @@ -89,7 +89,7 @@ public FileContent content() } @JsonProperty - public CharSequence path() + public String path() { return path; } diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/delete/IcebergPositionDeletePageSink.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/delete/IcebergPositionDeletePageSink.java index 5c2b43050918..704fddb9e37f 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/delete/IcebergPositionDeletePageSink.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/delete/IcebergPositionDeletePageSink.java @@ -15,22 +15,18 @@ import io.airlift.json.JsonCodec; import io.airlift.slice.Slice; -import io.trino.plugin.hive.HdfsEnvironment; -import io.trino.plugin.hive.HdfsEnvironment.HdfsContext; import io.trino.plugin.iceberg.CommitTaskData; -import io.trino.plugin.iceberg.FileIoProvider; import io.trino.plugin.iceberg.IcebergFileFormat; import io.trino.plugin.iceberg.IcebergFileWriter; import io.trino.plugin.iceberg.IcebergFileWriterFactory; import io.trino.plugin.iceberg.MetricsWrapper; import io.trino.plugin.iceberg.PartitionData; +import io.trino.plugin.iceberg.io.TrinoFileSystem; import io.trino.spi.Page; import io.trino.spi.block.Block; import io.trino.spi.block.RunLengthEncodedBlock; import io.trino.spi.connector.ConnectorPageSink; import io.trino.spi.connector.ConnectorSession; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.mapred.JobConf; import org.apache.iceberg.FileContent; import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.PartitionSpecParser; @@ -45,7 +41,6 @@ import static com.google.common.base.Preconditions.checkArgument; import static io.airlift.slice.Slices.utf8Slice; import static io.airlift.slice.Slices.wrappedBuffer; -import static io.trino.plugin.hive.util.ConfigurationUtils.toJobConf; import static io.trino.spi.predicate.Utils.nativeValueToBlock; import static io.trino.spi.type.VarcharType.VARCHAR; import static java.util.Objects.requireNonNull; @@ -72,9 +67,7 @@ public IcebergPositionDeletePageSink( Optional partition, LocationProvider locationProvider, IcebergFileWriterFactory fileWriterFactory, - HdfsEnvironment hdfsEnvironment, - HdfsContext hdfsContext, - FileIoProvider fileIoProvider, + TrinoFileSystem fileSystem, JsonCodec jsonCodec, ConnectorSession session, IcebergFileFormat fileFormat, @@ -91,8 +84,7 @@ public IcebergPositionDeletePageSink( this.outputPath = partition .map(partitionData -> locationProvider.newDataLocation(partitionSpec, partitionData, fileName)) .orElseGet(() -> locationProvider.newDataLocation(fileName)); - JobConf jobConf = toJobConf(hdfsEnvironment.getConfiguration(hdfsContext, new Path(outputPath))); - this.writer = fileWriterFactory.createPositionDeleteWriter(new Path(outputPath), jobConf, session, hdfsContext, fileIoProvider, fileFormat, storageProperties); + this.writer = fileWriterFactory.createPositionDeleteWriter(fileSystem, outputPath, session, fileFormat, storageProperties); } @Override diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/FileEntry.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/FileEntry.java new file mode 100644 index 000000000000..4dba8d5aa783 --- /dev/null +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/FileEntry.java @@ -0,0 +1,26 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.iceberg.io; + +import static com.google.common.base.Preconditions.checkArgument; +import static java.util.Objects.requireNonNull; + +public record FileEntry(String path, long length, long lastModified) +{ + public FileEntry + { + checkArgument(length >= 0, "length is negative"); + requireNonNull(path, "path is null"); + } +} diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/FileIterator.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/FileIterator.java new file mode 100644 index 000000000000..b2525fe6243e --- /dev/null +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/FileIterator.java @@ -0,0 +1,44 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.iceberg.io; + +import java.io.IOException; +import java.util.NoSuchElementException; + +public interface FileIterator +{ + boolean hasNext() + throws IOException; + + FileEntry next() + throws IOException; + + static FileIterator empty() + { + return new FileIterator() + { + @Override + public boolean hasNext() + { + return false; + } + + @Override + public FileEntry next() + { + throw new NoSuchElementException(); + } + }; + } +} diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/TrinoFileSystem.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/TrinoFileSystem.java new file mode 100644 index 000000000000..a1141b068ccb --- /dev/null +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/TrinoFileSystem.java @@ -0,0 +1,38 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.iceberg.io; + +import org.apache.iceberg.io.FileIO; + +import java.io.IOException; + +public interface TrinoFileSystem +{ + TrinoInputFile newInputFile(String path); + + TrinoInputFile newInputFile(String path, long length); + + TrinoOutputFile newOutputFile(String path); + + void deleteFile(String path) + throws IOException; + + void deleteDirectory(String path) + throws IOException; + + FileIterator listFiles(String path) + throws IOException; + + FileIO toFileIo(); +} diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/TrinoFileSystemFactory.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/TrinoFileSystemFactory.java new file mode 100644 index 000000000000..43b742112da0 --- /dev/null +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/TrinoFileSystemFactory.java @@ -0,0 +1,27 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.iceberg.io; + +import io.trino.spi.connector.ConnectorSession; +import io.trino.spi.security.ConnectorIdentity; + +public interface TrinoFileSystemFactory +{ + TrinoFileSystem create(ConnectorIdentity identity); + + default TrinoFileSystem create(ConnectorSession session) + { + return create(session.getIdentity()); + } +} diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/TrinoInput.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/TrinoInput.java new file mode 100644 index 000000000000..c74ea749dcfc --- /dev/null +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/TrinoInput.java @@ -0,0 +1,49 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.iceberg.io; + +import io.airlift.slice.Slice; +import io.airlift.slice.Slices; +import org.apache.iceberg.io.SeekableInputStream; + +import java.io.Closeable; +import java.io.IOException; + +public interface TrinoInput + extends Closeable +{ + SeekableInputStream inputStream(); + + void readFully(long position, byte[] buffer, int bufferOffset, int bufferLength) + throws IOException; + + int readTail(byte[] buffer, int bufferOffset, int bufferLength) + throws IOException; + + default Slice readFully(long position, int length) + throws IOException + { + byte[] buffer = new byte[length]; + readFully(position, buffer, 0, length); + return Slices.wrappedBuffer(buffer); + } + + default Slice readTail(int length) + throws IOException + { + byte[] buffer = new byte[length]; + int read = readTail(buffer, 0, length); + return Slices.wrappedBuffer(buffer, 0, read); + } +} diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/TrinoInputFile.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/TrinoInputFile.java new file mode 100644 index 000000000000..e8af7d0f3853 --- /dev/null +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/TrinoInputFile.java @@ -0,0 +1,33 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.iceberg.io; + +import java.io.IOException; + +public interface TrinoInputFile +{ + TrinoInput newInput() + throws IOException; + + long length() + throws IOException; + + long modificationTime() + throws IOException; + + boolean exists() + throws IOException; + + String location(); +} diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/TrinoOrcDataSource.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/TrinoOrcDataSource.java new file mode 100644 index 000000000000..789f6239083a --- /dev/null +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/TrinoOrcDataSource.java @@ -0,0 +1,65 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.iceberg.io; + +import io.airlift.slice.Slice; +import io.trino.orc.AbstractOrcDataSource; +import io.trino.orc.OrcDataSourceId; +import io.trino.orc.OrcReaderOptions; +import io.trino.plugin.hive.FileFormatDataSourceStats; + +import java.io.IOException; + +import static java.util.Objects.requireNonNull; + +public class TrinoOrcDataSource + extends AbstractOrcDataSource +{ + private final FileFormatDataSourceStats stats; + private final TrinoInput input; + + public TrinoOrcDataSource(TrinoInputFile file, OrcReaderOptions options, FileFormatDataSourceStats stats) + throws IOException + { + super(new OrcDataSourceId(file.location()), file.length(), options); + this.stats = requireNonNull(stats, "stats is null"); + this.input = file.newInput(); + } + + @Override + public void close() + throws IOException + { + input.close(); + } + + @Override + protected Slice readTailInternal(int length) + throws IOException + { + long readStart = System.nanoTime(); + Slice tail = input.readTail(length); + stats.readDataBytesPerSecond(tail.length(), System.nanoTime() - readStart); + return tail; + } + + @Override + protected void readInternal(long position, byte[] buffer, int bufferOffset, int bufferLength) + throws IOException + { + long readStart = System.nanoTime(); + input.readFully(position, buffer, bufferOffset, bufferLength); + stats.readDataBytesPerSecond(bufferLength, System.nanoTime() - readStart); + } +} diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/FileIoProvider.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/TrinoOutputFile.java similarity index 71% rename from plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/FileIoProvider.java rename to plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/TrinoOutputFile.java index 13c8fbe0547c..b9dc3900c64e 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/FileIoProvider.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/TrinoOutputFile.java @@ -11,12 +11,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.plugin.iceberg; -import io.trino.plugin.hive.HdfsEnvironment.HdfsContext; -import org.apache.iceberg.io.FileIO; +package io.trino.plugin.iceberg.io; -public interface FileIoProvider +import java.io.IOException; +import java.io.OutputStream; + +public interface TrinoOutputFile { - FileIO createFileIo(HdfsContext hdfsContext, String queryId); + OutputStream create() + throws IOException; + + String location(); } diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/TrinoParquetDataSource.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/TrinoParquetDataSource.java new file mode 100644 index 000000000000..9aee5944df78 --- /dev/null +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/TrinoParquetDataSource.java @@ -0,0 +1,65 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.iceberg.io; + +import io.airlift.slice.Slice; +import io.trino.parquet.AbstractParquetDataSource; +import io.trino.parquet.ParquetDataSourceId; +import io.trino.parquet.ParquetReaderOptions; +import io.trino.plugin.hive.FileFormatDataSourceStats; + +import java.io.IOException; + +import static java.util.Objects.requireNonNull; + +public class TrinoParquetDataSource + extends AbstractParquetDataSource +{ + private final FileFormatDataSourceStats stats; + private final TrinoInput input; + + public TrinoParquetDataSource(TrinoInputFile file, ParquetReaderOptions options, FileFormatDataSourceStats stats) + throws IOException + { + super(new ParquetDataSourceId(file.location()), file.length(), options); + this.stats = requireNonNull(stats, "stats is null"); + this.input = file.newInput(); + } + + @Override + public void close() + throws IOException + { + input.close(); + } + + @Override + protected Slice readTailInternal(int length) + throws IOException + { + long readStart = System.nanoTime(); + Slice tail = input.readTail(length); + stats.readDataBytesPerSecond(tail.length(), System.nanoTime() - readStart); + return tail; + } + + @Override + protected void readInternal(long position, byte[] buffer, int bufferOffset, int bufferLength) + throws IOException + { + long readStart = System.nanoTime(); + input.readFully(position, buffer, bufferOffset, bufferLength); + stats.readDataBytesPerSecond(bufferLength, System.nanoTime() - readStart); + } +} diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/fileio/ForwardingFileIo.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/fileio/ForwardingFileIo.java new file mode 100644 index 000000000000..320b6875a481 --- /dev/null +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/fileio/ForwardingFileIo.java @@ -0,0 +1,64 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.iceberg.io.fileio; + +import io.trino.plugin.iceberg.io.TrinoFileSystem; +import org.apache.iceberg.io.FileIO; +import org.apache.iceberg.io.InputFile; +import org.apache.iceberg.io.OutputFile; + +import java.io.IOException; +import java.io.UncheckedIOException; + +import static java.util.Objects.requireNonNull; + +public class ForwardingFileIo + implements FileIO +{ + private final TrinoFileSystem fileSystem; + + public ForwardingFileIo(TrinoFileSystem fileSystem) + { + this.fileSystem = requireNonNull(fileSystem, "fileSystem is null"); + } + + @Override + public InputFile newInputFile(String path) + { + return new ForwardingInputFile(fileSystem.newInputFile(path)); + } + + @Override + public InputFile newInputFile(String path, long length) + { + return new ForwardingInputFile(fileSystem.newInputFile(path, length)); + } + + @Override + public OutputFile newOutputFile(String path) + { + return new ForwardingOutputFile(fileSystem, path); + } + + @Override + public void deleteFile(String path) + { + try { + fileSystem.deleteFile(path); + } + catch (IOException e) { + throw new UncheckedIOException("Failed to delete file: " + path, e); + } + } +} diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/fileio/ForwardingInputFile.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/fileio/ForwardingInputFile.java new file mode 100644 index 000000000000..6ffdb10f3d5f --- /dev/null +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/fileio/ForwardingInputFile.java @@ -0,0 +1,78 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.iceberg.io.fileio; + +import io.trino.plugin.iceberg.io.TrinoInputFile; +import org.apache.iceberg.exceptions.NotFoundException; +import org.apache.iceberg.io.InputFile; +import org.apache.iceberg.io.SeekableInputStream; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.UncheckedIOException; + +import static java.util.Objects.requireNonNull; + +public class ForwardingInputFile + implements InputFile +{ + private final TrinoInputFile inputFile; + + public ForwardingInputFile(TrinoInputFile inputFile) + { + this.inputFile = requireNonNull(inputFile, "inputFile is null"); + } + + @Override + public long getLength() + { + try { + return inputFile.length(); + } + catch (IOException e) { + throw new UncheckedIOException("Failed to get status for file: " + location(), e); + } + } + + @Override + public SeekableInputStream newStream() + { + try { + return inputFile.newInput().inputStream(); + } + catch (FileNotFoundException e) { + throw new NotFoundException(e, "Failed to open input stream for file: %s", location()); + } + catch (IOException e) { + throw new UncheckedIOException("Failed to open input stream for file: " + location(), e); + } + } + + @Override + public String location() + { + return inputFile.location(); + } + + @Override + public boolean exists() + { + try { + return inputFile.exists(); + } + catch (IOException e) { + throw new UncheckedIOException("Failed to check existence for file: " + location(), e); + } + } +} diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/fileio/ForwardingOutputFile.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/fileio/ForwardingOutputFile.java new file mode 100644 index 000000000000..47bc824cd251 --- /dev/null +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/fileio/ForwardingOutputFile.java @@ -0,0 +1,115 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.iceberg.io.fileio; + +import com.google.common.io.CountingOutputStream; +import io.trino.plugin.iceberg.io.TrinoFileSystem; +import io.trino.plugin.iceberg.io.TrinoOutputFile; +import org.apache.iceberg.io.InputFile; +import org.apache.iceberg.io.OutputFile; +import org.apache.iceberg.io.PositionOutputStream; + +import java.io.IOException; +import java.io.OutputStream; +import java.io.UncheckedIOException; + +import static java.util.Objects.requireNonNull; + +public class ForwardingOutputFile + implements OutputFile +{ + private final TrinoFileSystem fileSystem; + private final TrinoOutputFile outputFile; + + public ForwardingOutputFile(TrinoFileSystem fileSystem, String path) + { + this.fileSystem = requireNonNull(fileSystem, "fileSystem is null"); + this.outputFile = fileSystem.newOutputFile(path); + } + + @Override + public PositionOutputStream create() + { + try { + return new CountingPositionOutputStream(outputFile.create()); + } + catch (IOException e) { + throw new UncheckedIOException("Failed to create file: " + location(), e); + } + } + + @Override + public PositionOutputStream createOrOverwrite() + { + // we shouldn't need to overwrite existing files + return create(); + } + + @Override + public String location() + { + return outputFile.location(); + } + + @Override + public InputFile toInputFile() + { + return new ForwardingInputFile(fileSystem.newInputFile(outputFile.location())); + } + + private static class CountingPositionOutputStream + extends PositionOutputStream + { + private final CountingOutputStream stream; + + private CountingPositionOutputStream(OutputStream stream) + { + this.stream = new CountingOutputStream(stream); + } + + @Override + public long getPos() + { + return stream.getCount(); + } + + @Override + public void write(int b) + throws IOException + { + stream.write(b); + } + + @Override + public void write(byte[] b, int off, int len) + throws IOException + { + stream.write(b, off, len); + } + + @Override + public void flush() + throws IOException + { + stream.flush(); + } + + @Override + public void close() + throws IOException + { + stream.close(); + } + } +} diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/hdfs/HadoopPaths.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/hdfs/HadoopPaths.java new file mode 100644 index 000000000000..16ded7e9565b --- /dev/null +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/hdfs/HadoopPaths.java @@ -0,0 +1,42 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.iceberg.io.hdfs; + +import io.trino.spi.TrinoException; +import org.apache.hadoop.fs.Path; + +import java.net.URI; +import java.net.URLEncoder; + +import static io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_INVALID_METADATA; +import static java.nio.charset.StandardCharsets.UTF_8; + +public final class HadoopPaths +{ + private HadoopPaths() {} + + public static Path hadoopPath(String path) + { + // hack to preserve the original path for S3 if necessary + Path hadoopPath = new Path(path); + if ("s3".equals(hadoopPath.toUri().getScheme()) && !path.equals(hadoopPath.toString())) { + if (hadoopPath.toUri().getFragment() != null) { + throw new TrinoException(ICEBERG_INVALID_METADATA, "Unexpected URI fragment in path: " + path); + } + URI uri = URI.create(path); + return new Path(uri + "#" + URLEncoder.encode(uri.getPath(), UTF_8)); + } + return hadoopPath; + } +} diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/hdfs/HdfsFileIterator.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/hdfs/HdfsFileIterator.java new file mode 100644 index 000000000000..c3d6b5079da2 --- /dev/null +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/hdfs/HdfsFileIterator.java @@ -0,0 +1,72 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.iceberg.io.hdfs; + +import io.trino.plugin.iceberg.io.FileEntry; +import io.trino.plugin.iceberg.io.FileIterator; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocatedFileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RemoteIterator; + +import java.io.IOException; +import java.net.URI; + +import static com.google.common.base.Verify.verify; +import static java.util.Objects.requireNonNull; + +class HdfsFileIterator + implements FileIterator +{ + private final String listingPath; + private final URI listingUri; + private final RemoteIterator iterator; + + public HdfsFileIterator(String listingPath, FileSystem fs, RemoteIterator iterator) + { + this.listingPath = requireNonNull(listingPath, "listingPath is null"); + this.listingUri = new Path(listingPath).makeQualified(fs.getUri(), fs.getWorkingDirectory()).toUri(); + this.iterator = requireNonNull(iterator, "iterator is null"); + } + + @Override + public boolean hasNext() + throws IOException + { + return iterator.hasNext(); + } + + @Override + public FileEntry next() + throws IOException + { + LocatedFileStatus status = iterator.next(); + + verify(status.isFile(), "iterator returned a non-file: %s", status); + + URI pathUri = URI.create(status.getPath().toString()); + URI relativeUri = listingUri.relativize(pathUri); + verify(!relativeUri.equals(pathUri), "cannot relativize [%s] against [%s]", pathUri, listingUri); + + String path = listingPath; + if (!relativeUri.getPath().isEmpty()) { + if (!path.endsWith("/")) { + path += "/"; + } + path += relativeUri.getPath(); + } + + return new FileEntry(path, status.getLen(), status.getModificationTime()); + } +} diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/hdfs/HdfsFileSystem.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/hdfs/HdfsFileSystem.java new file mode 100644 index 000000000000..f9b38a54a653 --- /dev/null +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/hdfs/HdfsFileSystem.java @@ -0,0 +1,112 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.iceberg.io.hdfs; + +import io.trino.plugin.hive.HdfsEnvironment; +import io.trino.plugin.hive.HdfsEnvironment.HdfsContext; +import io.trino.plugin.iceberg.io.FileIterator; +import io.trino.plugin.iceberg.io.TrinoFileSystem; +import io.trino.plugin.iceberg.io.TrinoInputFile; +import io.trino.plugin.iceberg.io.TrinoOutputFile; +import io.trino.plugin.iceberg.io.fileio.ForwardingFileIo; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.iceberg.io.FileIO; + +import java.io.FileNotFoundException; +import java.io.IOException; + +import static io.trino.plugin.iceberg.io.hdfs.HadoopPaths.hadoopPath; +import static java.util.Objects.requireNonNull; + +class HdfsFileSystem + implements TrinoFileSystem +{ + private final HdfsEnvironment environment; + private final HdfsContext context; + + public HdfsFileSystem(HdfsEnvironment environment, HdfsContext context) + { + this.environment = requireNonNull(environment, "environment is null"); + this.context = requireNonNull(context, "context is null"); + } + + @Override + public TrinoInputFile newInputFile(String path) + { + return new HdfsInputFile(path, null, environment, context); + } + + @Override + public TrinoInputFile newInputFile(String path, long length) + { + return new HdfsInputFile(path, length, environment, context); + } + + @Override + public TrinoOutputFile newOutputFile(String path) + { + return new HdfsOutputFile(path, environment, context); + } + + @Override + public void deleteFile(String path) + throws IOException + { + Path file = hadoopPath(path); + FileSystem fileSystem = environment.getFileSystem(context, file); + environment.doAs(context.getIdentity(), () -> { + if (!fileSystem.delete(file, false)) { + throw new IOException("Failed to delete file: " + file); + } + return null; + }); + } + + @Override + public void deleteDirectory(String path) + throws IOException + { + Path directory = hadoopPath(path); + FileSystem fileSystem = environment.getFileSystem(context, directory); + environment.doAs(context.getIdentity(), () -> { + if (!fileSystem.delete(directory, true) && fileSystem.exists(directory)) { + throw new IOException("Failed to delete directory: " + directory); + } + return null; + }); + } + + @Override + public FileIterator listFiles(String path) + throws IOException + { + Path directory = hadoopPath(path); + FileSystem fileSystem = environment.getFileSystem(context, directory); + return environment.doAs(context.getIdentity(), () -> { + try { + return new HdfsFileIterator(path, fileSystem, fileSystem.listFiles(directory, true)); + } + catch (FileNotFoundException e) { + return FileIterator.empty(); + } + }); + } + + @Override + public FileIO toFileIo() + { + return new ForwardingFileIo(this); + } +} diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/HdfsFileIoProvider.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/hdfs/HdfsFileSystemFactory.java similarity index 57% rename from plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/HdfsFileIoProvider.java rename to plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/hdfs/HdfsFileSystemFactory.java index 50467380a44f..2f0ed341327f 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/HdfsFileIoProvider.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/hdfs/HdfsFileSystemFactory.java @@ -11,30 +11,32 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.plugin.iceberg; +package io.trino.plugin.iceberg.io.hdfs; import io.trino.plugin.hive.HdfsEnvironment; import io.trino.plugin.hive.HdfsEnvironment.HdfsContext; -import org.apache.iceberg.io.FileIO; +import io.trino.plugin.iceberg.io.TrinoFileSystem; +import io.trino.plugin.iceberg.io.TrinoFileSystemFactory; +import io.trino.spi.security.ConnectorIdentity; import javax.inject.Inject; import static java.util.Objects.requireNonNull; -public class HdfsFileIoProvider - implements FileIoProvider +public class HdfsFileSystemFactory + implements TrinoFileSystemFactory { - private final HdfsEnvironment hdfsEnvironment; + private final HdfsEnvironment environment; @Inject - public HdfsFileIoProvider(HdfsEnvironment hdfsEnvironment) + public HdfsFileSystemFactory(HdfsEnvironment environment) { - this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null"); + this.environment = requireNonNull(environment, "environment is null"); } @Override - public FileIO createFileIo(HdfsContext hdfsContext, String queryId) + public TrinoFileSystem create(ConnectorIdentity identity) { - return new HdfsFileIo(hdfsEnvironment, hdfsContext); + return new HdfsFileSystem(environment, new HdfsContext(identity)); } } diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/hdfs/HdfsInput.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/hdfs/HdfsInput.java new file mode 100644 index 000000000000..1f1945c95c36 --- /dev/null +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/hdfs/HdfsInput.java @@ -0,0 +1,120 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.iceberg.io.hdfs; + +import io.airlift.slice.Slice; +import io.trino.plugin.hive.util.FSDataInputStreamTail; +import io.trino.plugin.iceberg.io.TrinoInput; +import io.trino.plugin.iceberg.io.TrinoInputFile; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.iceberg.io.SeekableInputStream; + +import java.io.IOException; + +import static java.util.Objects.requireNonNull; + +class HdfsInput + implements TrinoInput +{ + private final FSDataInputStream stream; + private final TrinoInputFile inputFile; + + public HdfsInput(FSDataInputStream stream, TrinoInputFile inputFile) + { + this.stream = requireNonNull(stream, "stream is null"); + this.inputFile = requireNonNull(inputFile, "inputFile is null"); + } + + @Override + public SeekableInputStream inputStream() + { + return new HdfsSeekableInputStream(stream); + } + + @Override + public void readFully(long position, byte[] buffer, int bufferOffset, int bufferLength) + throws IOException + { + stream.readFully(position, buffer, bufferOffset, bufferLength); + } + + @Override + public int readTail(byte[] buffer, int bufferOffset, int bufferLength) + throws IOException + { + Slice tail = FSDataInputStreamTail.readTail(inputFile.location(), inputFile.length(), stream, bufferLength).getTailSlice(); + tail.getBytes(0, buffer, bufferOffset, tail.length()); + return tail.length(); + } + + @Override + public void close() + throws IOException + { + stream.close(); + } + + private static class HdfsSeekableInputStream + extends SeekableInputStream + { + private final FSDataInputStream stream; + + private HdfsSeekableInputStream(FSDataInputStream stream) + { + this.stream = requireNonNull(stream, "stream is null"); + } + + @Override + public long getPos() + throws IOException + { + return stream.getPos(); + } + + @Override + public void seek(long newPos) + throws IOException + { + stream.seek(newPos); + } + + @Override + public int read() + throws IOException + { + return stream.read(); + } + + @Override + public int read(byte[] b) + throws IOException + { + return stream.read(b); + } + + @Override + public int read(byte[] b, int off, int len) + throws IOException + { + return stream.read(b, off, len); + } + + @Override + public void close() + throws IOException + { + stream.close(); + } + } +} diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/hdfs/HdfsInputFile.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/hdfs/HdfsInputFile.java new file mode 100644 index 000000000000..1915b208d659 --- /dev/null +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/hdfs/HdfsInputFile.java @@ -0,0 +1,100 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.iceberg.io.hdfs; + +import io.trino.plugin.hive.HdfsEnvironment; +import io.trino.plugin.hive.HdfsEnvironment.HdfsContext; +import io.trino.plugin.iceberg.io.TrinoInput; +import io.trino.plugin.iceberg.io.TrinoInputFile; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +import java.io.IOException; + +import static com.google.common.base.Preconditions.checkArgument; +import static io.trino.plugin.iceberg.io.hdfs.HadoopPaths.hadoopPath; +import static java.util.Objects.requireNonNull; + +class HdfsInputFile + implements TrinoInputFile +{ + private final String path; + private final HdfsEnvironment environment; + private final HdfsContext context; + private final Path file; + private Long length; + private FileStatus status; + + public HdfsInputFile(String path, Long length, HdfsEnvironment environment, HdfsContext context) + { + this.path = requireNonNull(path, "path is null"); + this.environment = requireNonNull(environment, "environment is null"); + this.context = requireNonNull(context, "context is null"); + this.file = hadoopPath(path); + this.length = length; + checkArgument(length == null || length >= 0, "length is negative"); + } + + @Override + public TrinoInput newInput() + throws IOException + { + FileSystem fileSystem = environment.getFileSystem(context, file); + FSDataInputStream input = environment.doAs(context.getIdentity(), () -> fileSystem.open(file)); + return new HdfsInput(input, this); + } + + @Override + public long length() + throws IOException + { + if (length == null) { + length = lazyStatus().getLen(); + } + return length; + } + + @Override + public long modificationTime() + throws IOException + { + return lazyStatus().getModificationTime(); + } + + @Override + public boolean exists() + throws IOException + { + FileSystem fileSystem = environment.getFileSystem(context, file); + return environment.doAs(context.getIdentity(), () -> fileSystem.exists(file)); + } + + @Override + public String location() + { + return path; + } + + private FileStatus lazyStatus() + throws IOException + { + if (status == null) { + FileSystem fileSystem = environment.getFileSystem(context, file); + status = environment.doAs(context.getIdentity(), () -> fileSystem.getFileStatus(file)); + } + return status; + } +} diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/HdfsFileIo.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/hdfs/HdfsOutputFile.java similarity index 51% rename from plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/HdfsFileIo.java rename to plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/hdfs/HdfsOutputFile.java index 3c7da5533772..ded7f564c063 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/HdfsFileIo.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/io/hdfs/HdfsOutputFile.java @@ -11,54 +11,46 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.trino.plugin.iceberg; +package io.trino.plugin.iceberg.io.hdfs; import io.trino.plugin.hive.HdfsEnvironment; import io.trino.plugin.hive.HdfsEnvironment.HdfsContext; -import io.trino.spi.TrinoException; +import io.trino.plugin.iceberg.io.TrinoOutputFile; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.iceberg.io.FileIO; -import org.apache.iceberg.io.InputFile; -import org.apache.iceberg.io.OutputFile; import java.io.IOException; +import java.io.OutputStream; -import static io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_FILESYSTEM_ERROR; +import static io.trino.plugin.iceberg.io.hdfs.HadoopPaths.hadoopPath; import static java.util.Objects.requireNonNull; -public class HdfsFileIo - implements FileIO +class HdfsOutputFile + implements TrinoOutputFile { + private final String path; private final HdfsEnvironment environment; private final HdfsContext context; - public HdfsFileIo(HdfsEnvironment environment, HdfsContext context) + public HdfsOutputFile(String path, HdfsEnvironment environment, HdfsContext context) { + this.path = requireNonNull(path, "path is null"); this.environment = requireNonNull(environment, "environment is null"); this.context = requireNonNull(context, "context is null"); } @Override - public InputFile newInputFile(String path) + public OutputStream create() + throws IOException { - return new HdfsInputFile(new Path(path), environment, context); + Path file = hadoopPath(path); + FileSystem fileSystem = environment.getFileSystem(context, file); + return environment.doAs(context.getIdentity(), () -> fileSystem.create(file, false)); } @Override - public OutputFile newOutputFile(String path) + public String location() { - return new HdfsOutputFile(new Path(path), environment, context); - } - - @Override - public void deleteFile(String pathString) - { - Path path = new Path(pathString); - try { - environment.doAs(context.getIdentity(), () -> environment.getFileSystem(context, path).delete(path, false)); - } - catch (IOException e) { - throw new TrinoException(ICEBERG_FILESYSTEM_ERROR, "Failed to delete file: " + path, e); - } + return path; } } diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/BaseIcebergConnectorTest.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/BaseIcebergConnectorTest.java index 9c8e83ee60df..de5fc71253c8 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/BaseIcebergConnectorTest.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/BaseIcebergConnectorTest.java @@ -3486,10 +3486,7 @@ public void testIncorrectIcebergFileSizes() assertQuery(session, "SELECT * FROM test_iceberg_file_size", "VALUES (123), (456), (758)"); // Using Iceberg provided file size fails the query - assertQueryFails("SELECT * FROM test_iceberg_file_size", - format == ORC - ? format(".*Error opening Iceberg split.*\\QIncorrect file size (%s) for file (end of stream not reached)\\E.*", alteredValue) - : format("Error reading tail from .* with length %d", alteredValue)); + assertQueryFails("SELECT * FROM test_iceberg_file_size", ".*Error opening Iceberg split.*\\QIncorrect file size (%d) for file (end of stream not reached)\\E.*".formatted(alteredValue)); dropTable("test_iceberg_file_size"); } @@ -4892,17 +4889,13 @@ public void testIfRemoveOrphanFilesCleansUnnecessaryMetadataFilesInPartitionedTa } @Test - public void testCleaningUpWithTableWithSpecifiedLocationWithSlashAtTheEnd() - throws IOException - { - testCleaningUpWithTableWithSpecifiedLocation("/"); - } - - @Test - public void testCleaningUpWithTableWithSpecifiedLocationWithoutSlashAtTheEnd() + public void testCleaningUpWithTableWithSpecifiedLocation() throws IOException { testCleaningUpWithTableWithSpecifiedLocation(""); + testCleaningUpWithTableWithSpecifiedLocation("/"); + testCleaningUpWithTableWithSpecifiedLocation("//"); + testCleaningUpWithTableWithSpecifiedLocation("///"); } private void testCleaningUpWithTableWithSpecifiedLocation(String suffix) diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergAvroConnectorTest.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergAvroConnectorTest.java index 6877009b8fdf..7c3eff683d2c 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergAvroConnectorTest.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergAvroConnectorTest.java @@ -47,6 +47,6 @@ protected Session withSmallRowGroups(Session session) @Override public void testIncorrectIcebergFileSizes() { - throw new SkipException("TODO: Enable after supporting 'use_file_size_from_metadata' session property"); + throw new SkipException("Avro does not do tail reads"); } } diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergMergeAppend.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergMergeAppend.java index b97042bcf95a..086d2db9cde6 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergMergeAppend.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergMergeAppend.java @@ -13,15 +13,8 @@ */ package io.trino.plugin.iceberg; -import com.google.common.collect.ImmutableSet; import io.trino.plugin.base.CatalogName; -import io.trino.plugin.hive.HdfsConfig; -import io.trino.plugin.hive.HdfsConfiguration; -import io.trino.plugin.hive.HdfsConfigurationInitializer; -import io.trino.plugin.hive.HdfsEnvironment; -import io.trino.plugin.hive.HiveHdfsConfiguration; import io.trino.plugin.hive.NodeVersion; -import io.trino.plugin.hive.authentication.NoHdfsAuthentication; import io.trino.plugin.hive.metastore.HiveMetastore; import io.trino.plugin.hive.metastore.file.FileHiveMetastore; import io.trino.plugin.hive.metastore.file.FileHiveMetastoreConfig; @@ -29,6 +22,8 @@ import io.trino.plugin.iceberg.catalog.TrinoCatalog; import io.trino.plugin.iceberg.catalog.file.FileMetastoreTableOperationsProvider; import io.trino.plugin.iceberg.catalog.hms.TrinoHiveCatalog; +import io.trino.plugin.iceberg.io.TrinoFileSystemFactory; +import io.trino.plugin.iceberg.io.hdfs.HdfsFileSystemFactory; import io.trino.spi.connector.SchemaTableName; import io.trino.spi.type.TestingTypeManager; import io.trino.testing.AbstractTestQueryFramework; @@ -41,6 +36,7 @@ import java.io.File; +import static io.trino.plugin.hive.HiveTestUtils.HDFS_ENVIRONMENT; import static io.trino.plugin.hive.metastore.cache.CachingHiveMetastore.memoizeMetastore; import static io.trino.plugin.iceberg.catalog.hms.IcebergHiveMetastoreCatalogModule.HIDE_DELTA_LAKE_TABLES_IN_ICEBERG; import static org.testng.Assert.assertEquals; @@ -55,23 +51,20 @@ public class TestIcebergMergeAppend protected QueryRunner createQueryRunner() throws Exception { DistributedQueryRunner queryRunner = IcebergQueryRunner.createIcebergQueryRunner(); - HdfsConfig hdfsConfig = new HdfsConfig(); - HdfsConfiguration hdfsConfiguration = new HiveHdfsConfiguration(new HdfsConfigurationInitializer(hdfsConfig), ImmutableSet.of()); - HdfsEnvironment hdfsEnvironment = new HdfsEnvironment(hdfsConfiguration, hdfsConfig, new NoHdfsAuthentication()); - File baseDir = queryRunner.getCoordinator().getBaseDataDir().resolve("iceberg_data").toFile(); HiveMetastore metastore = new FileHiveMetastore( new NodeVersion("testversion"), - hdfsEnvironment, + HDFS_ENVIRONMENT, HIDE_DELTA_LAKE_TABLES_IN_ICEBERG, new FileHiveMetastoreConfig() .setCatalogDirectory(baseDir.toURI().toString()) .setMetastoreUser("test")); - tableOperationsProvider = new FileMetastoreTableOperationsProvider(new HdfsFileIoProvider(hdfsEnvironment)); + TrinoFileSystemFactory fileSystemFactory = new HdfsFileSystemFactory(HDFS_ENVIRONMENT); + tableOperationsProvider = new FileMetastoreTableOperationsProvider(fileSystemFactory); trinoCatalog = new TrinoHiveCatalog( new CatalogName("catalog"), memoizeMetastore(metastore, 1000), - hdfsEnvironment, + fileSystemFactory, new TestingTypeManager(), tableOperationsProvider, "trino-version", diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergMetadataFileOperations.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergMetadataFileOperations.java index c6fce1a13b06..eab5e7ad5e21 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergMetadataFileOperations.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergMetadataFileOperations.java @@ -18,8 +18,9 @@ import com.google.common.collect.Multiset; import io.trino.Session; import io.trino.plugin.hive.metastore.HiveMetastore; -import io.trino.plugin.iceberg.TrackingFileIoProvider.OperationContext; -import io.trino.plugin.iceberg.TrackingFileIoProvider.OperationType; +import io.trino.plugin.iceberg.TrackingFileSystemFactory.OperationContext; +import io.trino.plugin.iceberg.TrackingFileSystemFactory.OperationType; +import io.trino.plugin.iceberg.io.hdfs.HdfsFileSystemFactory; import io.trino.plugin.tpch.TpchPlugin; import io.trino.testing.AbstractTestQueryFramework; import io.trino.testing.DistributedQueryRunner; @@ -39,11 +40,11 @@ import static io.trino.plugin.iceberg.TestIcebergMetadataFileOperations.FileType.METADATA_JSON; import static io.trino.plugin.iceberg.TestIcebergMetadataFileOperations.FileType.SNAPSHOT; import static io.trino.plugin.iceberg.TestIcebergMetadataFileOperations.FileType.fromFilePath; -import static io.trino.plugin.iceberg.TrackingFileIoProvider.OperationType.INPUT_FILE_GET_LENGTH; -import static io.trino.plugin.iceberg.TrackingFileIoProvider.OperationType.INPUT_FILE_NEW_STREAM; -import static io.trino.plugin.iceberg.TrackingFileIoProvider.OperationType.OUTPUT_FILE_CREATE; -import static io.trino.plugin.iceberg.TrackingFileIoProvider.OperationType.OUTPUT_FILE_CREATE_OR_OVERWRITE; -import static io.trino.plugin.iceberg.TrackingFileIoProvider.OperationType.OUTPUT_FILE_LOCATION; +import static io.trino.plugin.iceberg.TrackingFileSystemFactory.OperationType.INPUT_FILE_GET_LENGTH; +import static io.trino.plugin.iceberg.TrackingFileSystemFactory.OperationType.INPUT_FILE_NEW_STREAM; +import static io.trino.plugin.iceberg.TrackingFileSystemFactory.OperationType.OUTPUT_FILE_CREATE; +import static io.trino.plugin.iceberg.TrackingFileSystemFactory.OperationType.OUTPUT_FILE_CREATE_OR_OVERWRITE; +import static io.trino.plugin.iceberg.TrackingFileSystemFactory.OperationType.OUTPUT_FILE_LOCATION; import static io.trino.plugin.tpch.TpchMetadata.TINY_SCHEMA_NAME; import static io.trino.testing.QueryAssertions.copyTpchTables; import static io.trino.testing.TestingSession.testSessionBuilder; @@ -53,7 +54,7 @@ import static java.util.stream.Collectors.toCollection; import static org.assertj.core.api.Assertions.assertThat; -@Test(singleThreaded = true) // e.g. trackingFileIoProvider is shared mutable state +@Test(singleThreaded = true) // e.g. trackingFileSystemFactory is shared mutable state public class TestIcebergMetadataFileOperations extends AbstractTestQueryFramework { @@ -62,7 +63,7 @@ public class TestIcebergMetadataFileOperations .setSchema("test_schema") .build(); - private TrackingFileIoProvider trackingFileIoProvider; + private TrackingFileSystemFactory trackingFileSystemFactory; @Override protected DistributedQueryRunner createQueryRunner() @@ -82,8 +83,8 @@ protected DistributedQueryRunner createQueryRunner() File baseDir = queryRunner.getCoordinator().getBaseDataDir().resolve("iceberg_data").toFile(); HiveMetastore metastore = createTestingFileHiveMetastore(baseDir); - trackingFileIoProvider = new TrackingFileIoProvider(new HdfsFileIoProvider(HDFS_ENVIRONMENT)); - queryRunner.installPlugin(new TestingIcebergPlugin(Optional.of(metastore), Optional.of(trackingFileIoProvider), EMPTY_MODULE)); + trackingFileSystemFactory = new TrackingFileSystemFactory(new HdfsFileSystemFactory(HDFS_ENVIRONMENT)); + queryRunner.installPlugin(new TestingIcebergPlugin(Optional.of(metastore), Optional.of(trackingFileSystemFactory), EMPTY_MODULE)); queryRunner.createCatalog("iceberg", "iceberg"); queryRunner.installPlugin(new TpchPlugin()); queryRunner.createCatalog("tpch", "tpch"); @@ -342,12 +343,12 @@ private void assertFileSystemAccesses(@Language("SQL") String query, Multiset getOperations() { - return trackingFileIoProvider.getOperationCounts() + return trackingFileSystemFactory.getOperationCounts() .entrySet().stream() .flatMap(entry -> nCopies(entry.getValue(), new FileOperation(entry.getKey())).stream()) .collect(toCollection(HashMultiset::create)); diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergNodeLocalDynamicSplitPruning.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergNodeLocalDynamicSplitPruning.java index 4c14018bc6d8..c95de5dd3440 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergNodeLocalDynamicSplitPruning.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergNodeLocalDynamicSplitPruning.java @@ -33,6 +33,7 @@ import io.trino.plugin.hive.orc.OrcWriterConfig; import io.trino.plugin.hive.parquet.ParquetReaderConfig; import io.trino.plugin.hive.parquet.ParquetWriterConfig; +import io.trino.plugin.iceberg.io.hdfs.HdfsFileSystemFactory; import io.trino.spi.Page; import io.trino.spi.SplitWeight; import io.trino.spi.block.BlockBuilder; @@ -188,14 +189,13 @@ private static ConnectorPageSource createTestingPageSource(HiveTransactionHandle FileFormatDataSourceStats stats = new FileFormatDataSourceStats(); IcebergPageSourceProvider provider = new IcebergPageSourceProvider( - HDFS_ENVIRONMENT, + new HdfsFileSystemFactory(HDFS_ENVIRONMENT), stats, ORC_READER_CONFIG, PARQUET_READER_CONFIG, TESTING_TYPE_MANAGER, - new HdfsFileIoProvider(HDFS_ENVIRONMENT), new JsonCodecFactory().jsonCodec(CommitTaskData.class), - new IcebergFileWriterFactory(HDFS_ENVIRONMENT, TESTING_TYPE_MANAGER, new NodeVersion("trino_test"), stats, ORC_WRITER_CONFIG), + new IcebergFileWriterFactory(TESTING_TYPE_MANAGER, new NodeVersion("trino_test"), stats, ORC_WRITER_CONFIG), new GroupByHashPageIndexerFactory(new JoinCompiler(TESTING_TYPE_MANAGER.getTypeOperators()), new BlockTypeOperators()), icebergConfig); diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergOrcMetricsCollection.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergOrcMetricsCollection.java index e82219cfefa2..214b89bb177b 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergOrcMetricsCollection.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergOrcMetricsCollection.java @@ -13,16 +13,9 @@ */ package io.trino.plugin.iceberg; -import com.google.common.collect.ImmutableSet; import io.trino.Session; import io.trino.plugin.base.CatalogName; -import io.trino.plugin.hive.HdfsConfig; -import io.trino.plugin.hive.HdfsConfiguration; -import io.trino.plugin.hive.HdfsConfigurationInitializer; -import io.trino.plugin.hive.HdfsEnvironment; -import io.trino.plugin.hive.HiveHdfsConfiguration; import io.trino.plugin.hive.NodeVersion; -import io.trino.plugin.hive.authentication.NoHdfsAuthentication; import io.trino.plugin.hive.metastore.HiveMetastore; import io.trino.plugin.hive.metastore.HiveMetastoreConfig; import io.trino.plugin.hive.metastore.file.FileHiveMetastore; @@ -31,6 +24,8 @@ import io.trino.plugin.iceberg.catalog.TrinoCatalog; import io.trino.plugin.iceberg.catalog.file.FileMetastoreTableOperationsProvider; import io.trino.plugin.iceberg.catalog.hms.TrinoHiveCatalog; +import io.trino.plugin.iceberg.io.TrinoFileSystemFactory; +import io.trino.plugin.iceberg.io.hdfs.HdfsFileSystemFactory; import io.trino.plugin.tpch.TpchPlugin; import io.trino.spi.connector.SchemaTableName; import io.trino.spi.type.TestingTypeManager; @@ -53,6 +48,7 @@ import static io.trino.SystemSessionProperties.MAX_DRIVERS_PER_TASK; import static io.trino.SystemSessionProperties.TASK_CONCURRENCY; import static io.trino.SystemSessionProperties.TASK_WRITER_COUNT; +import static io.trino.plugin.hive.HiveTestUtils.HDFS_ENVIRONMENT; import static io.trino.plugin.hive.metastore.cache.CachingHiveMetastore.memoizeMetastore; import static io.trino.plugin.iceberg.DataFileRecord.toDataFileRecord; import static io.trino.testing.TestingSession.testSessionBuilder; @@ -83,22 +79,19 @@ protected QueryRunner createQueryRunner() File baseDir = queryRunner.getCoordinator().getBaseDataDir().resolve("iceberg_data").toFile(); - HdfsConfig hdfsConfig = new HdfsConfig(); - HdfsConfiguration hdfsConfiguration = new HiveHdfsConfiguration(new HdfsConfigurationInitializer(hdfsConfig), ImmutableSet.of()); - HdfsEnvironment hdfsEnvironment = new HdfsEnvironment(hdfsConfiguration, hdfsConfig, new NoHdfsAuthentication()); - HiveMetastore metastore = new FileHiveMetastore( new NodeVersion("test_version"), - hdfsEnvironment, + HDFS_ENVIRONMENT, new HiveMetastoreConfig().isHideDeltaLakeTables(), new FileHiveMetastoreConfig() .setCatalogDirectory(baseDir.toURI().toString()) .setMetastoreUser("test")); - tableOperationsProvider = new FileMetastoreTableOperationsProvider(new HdfsFileIoProvider(hdfsEnvironment)); + TrinoFileSystemFactory fileSystemFactory = new HdfsFileSystemFactory(HDFS_ENVIRONMENT); + tableOperationsProvider = new FileMetastoreTableOperationsProvider(fileSystemFactory); trinoCatalog = new TrinoHiveCatalog( new CatalogName("catalog"), memoizeMetastore(metastore, 1000), - hdfsEnvironment, + fileSystemFactory, new TestingTypeManager(), tableOperationsProvider, "trino-version", diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergSplitSource.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergSplitSource.java index 3a8cc6cfaec5..85943dc5c3c2 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergSplitSource.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergSplitSource.java @@ -18,18 +18,13 @@ import com.google.common.collect.ImmutableSet; import io.airlift.units.Duration; import io.trino.plugin.base.CatalogName; -import io.trino.plugin.hive.HdfsConfig; -import io.trino.plugin.hive.HdfsConfiguration; -import io.trino.plugin.hive.HdfsConfigurationInitializer; -import io.trino.plugin.hive.HdfsEnvironment; import io.trino.plugin.hive.HdfsEnvironment.HdfsContext; -import io.trino.plugin.hive.HiveHdfsConfiguration; -import io.trino.plugin.hive.authentication.NoHdfsAuthentication; import io.trino.plugin.hive.metastore.HiveMetastore; -import io.trino.plugin.iceberg.catalog.IcebergTableOperationsProvider; import io.trino.plugin.iceberg.catalog.TrinoCatalog; import io.trino.plugin.iceberg.catalog.file.FileMetastoreTableOperationsProvider; import io.trino.plugin.iceberg.catalog.hms.TrinoHiveCatalog; +import io.trino.plugin.iceberg.io.TrinoFileSystemFactory; +import io.trino.plugin.iceberg.io.hdfs.HdfsFileSystemFactory; import io.trino.spi.connector.ColumnHandle; import io.trino.spi.connector.DynamicFilter; import io.trino.spi.connector.SchemaTableName; @@ -85,20 +80,16 @@ public class TestIcebergSplitSource protected QueryRunner createQueryRunner() throws Exception { - HdfsConfig config = new HdfsConfig(); - HdfsConfiguration configuration = new HiveHdfsConfiguration(new HdfsConfigurationInitializer(config), ImmutableSet.of()); - HdfsEnvironment hdfsEnvironment = new HdfsEnvironment(configuration, config, new NoHdfsAuthentication()); - File tempDir = Files.createTempDirectory("test_iceberg_split_source").toFile(); this.metastoreDir = new File(tempDir, "iceberg_data"); HiveMetastore metastore = createTestingFileHiveMetastore(metastoreDir); - IcebergTableOperationsProvider operationsProvider = new FileMetastoreTableOperationsProvider(new HdfsFileIoProvider(hdfsEnvironment)); + TrinoFileSystemFactory fileSystemFactory = new HdfsFileSystemFactory(HDFS_ENVIRONMENT); this.catalog = new TrinoHiveCatalog( new CatalogName("hive"), memoizeMetastore(metastore, 1000), - hdfsEnvironment, + fileSystemFactory, new TestingTypeManager(), - operationsProvider, + new FileMetastoreTableOperationsProvider(fileSystemFactory), "test", false, false, diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergV2.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergV2.java index 5fcb65d9a62e..590f03e6ab58 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergV2.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergV2.java @@ -28,6 +28,8 @@ import io.trino.plugin.iceberg.catalog.TrinoCatalog; import io.trino.plugin.iceberg.catalog.file.FileMetastoreTableOperationsProvider; import io.trino.plugin.iceberg.catalog.hms.TrinoHiveCatalog; +import io.trino.plugin.iceberg.io.TrinoFileSystemFactory; +import io.trino.plugin.iceberg.io.hdfs.HdfsFileSystemFactory; import io.trino.spi.connector.SchemaTableName; import io.trino.spi.type.TestingTypeManager; import io.trino.testing.AbstractTestQueryFramework; @@ -65,6 +67,7 @@ import static com.google.common.io.MoreFiles.deleteRecursively; import static com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE; import static io.trino.plugin.hive.HdfsEnvironment.HdfsContext; +import static io.trino.plugin.hive.HiveTestUtils.HDFS_ENVIRONMENT; import static io.trino.plugin.hive.metastore.file.FileHiveMetastore.createTestingFileHiveMetastore; import static io.trino.plugin.iceberg.IcebergUtil.loadIcebergTable; import static io.trino.testing.TestingConnectorSession.SESSION; @@ -510,11 +513,12 @@ private Table updateTableToV2(String tableName) private BaseTable loadTable(String tableName) { - IcebergTableOperationsProvider tableOperationsProvider = new FileMetastoreTableOperationsProvider(new HdfsFileIoProvider(hdfsEnvironment)); + TrinoFileSystemFactory fileSystemFactory = new HdfsFileSystemFactory(HDFS_ENVIRONMENT); + IcebergTableOperationsProvider tableOperationsProvider = new FileMetastoreTableOperationsProvider(fileSystemFactory); TrinoCatalog catalog = new TrinoHiveCatalog( new CatalogName("hive"), CachingHiveMetastore.memoizeMetastore(metastore, 1000), - hdfsEnvironment, + fileSystemFactory, new TestingTypeManager(), tableOperationsProvider, "test", diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestTrinoHiveCatalogTest.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestTrinoHiveCatalogTest.java index 826a78e81a2d..033d6420b0d2 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestTrinoHiveCatalogTest.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestTrinoHiveCatalogTest.java @@ -13,18 +13,13 @@ */ package io.trino.plugin.iceberg; -import com.google.common.collect.ImmutableSet; import io.trino.plugin.base.CatalogName; -import io.trino.plugin.hive.HdfsConfig; -import io.trino.plugin.hive.HdfsConfiguration; -import io.trino.plugin.hive.HdfsConfigurationInitializer; -import io.trino.plugin.hive.HdfsEnvironment; -import io.trino.plugin.hive.HiveHdfsConfiguration; -import io.trino.plugin.hive.authentication.NoHdfsAuthentication; import io.trino.plugin.hive.metastore.HiveMetastore; import io.trino.plugin.iceberg.catalog.TrinoCatalog; import io.trino.plugin.iceberg.catalog.file.FileMetastoreTableOperationsProvider; import io.trino.plugin.iceberg.catalog.hms.TrinoHiveCatalog; +import io.trino.plugin.iceberg.io.TrinoFileSystemFactory; +import io.trino.plugin.iceberg.io.hdfs.HdfsFileSystemFactory; import io.trino.spi.type.TestingTypeManager; import org.testng.annotations.AfterClass; import org.testng.annotations.Test; @@ -35,6 +30,7 @@ import static com.google.common.io.MoreFiles.deleteRecursively; import static com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE; +import static io.trino.plugin.hive.HiveTestUtils.HDFS_ENVIRONMENT; import static io.trino.plugin.hive.metastore.cache.CachingHiveMetastore.memoizeMetastore; import static io.trino.plugin.hive.metastore.file.FileHiveMetastore.createTestingFileHiveMetastore; import static org.assertj.core.api.Assertions.assertThatThrownBy; @@ -43,17 +39,12 @@ public class TestTrinoHiveCatalogTest extends BaseTrinoCatalogTest { private final HiveMetastore metastore; - private final HdfsEnvironment hdfsEnvironment; private final java.nio.file.Path tempDir; private final File metastoreDir; public TestTrinoHiveCatalogTest() throws IOException { - HdfsConfig config = new HdfsConfig(); - HdfsConfiguration configuration = new HiveHdfsConfiguration(new HdfsConfigurationInitializer(config), ImmutableSet.of()); - hdfsEnvironment = new HdfsEnvironment(configuration, config, new NoHdfsAuthentication()); - tempDir = Files.createTempDirectory("test_trino_hive_catalog"); metastoreDir = tempDir.resolve("iceberg_data").toFile(); metastore = createTestingFileHiveMetastore(metastoreDir); @@ -69,12 +60,13 @@ public void tearDown() @Override protected TrinoCatalog createTrinoCatalog(boolean useUniqueTableLocations) { + TrinoFileSystemFactory fileSystemFactory = new HdfsFileSystemFactory(HDFS_ENVIRONMENT); return new TrinoHiveCatalog( new CatalogName("catalog"), memoizeMetastore(metastore, 1000), - hdfsEnvironment, + fileSystemFactory, new TestingTypeManager(), - new FileMetastoreTableOperationsProvider(new HdfsFileIoProvider(hdfsEnvironment)), + new FileMetastoreTableOperationsProvider(fileSystemFactory), "trino-version", useUniqueTableLocations, false, diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestingIcebergConnectorFactory.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestingIcebergConnectorFactory.java index c5e043a61d78..e46a2727c521 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestingIcebergConnectorFactory.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestingIcebergConnectorFactory.java @@ -15,6 +15,7 @@ import com.google.inject.Module; import io.trino.plugin.hive.metastore.HiveMetastore; +import io.trino.plugin.iceberg.io.TrinoFileSystemFactory; import io.trino.spi.connector.Connector; import io.trino.spi.connector.ConnectorContext; import io.trino.spi.connector.ConnectorFactory; @@ -29,13 +30,13 @@ public class TestingIcebergConnectorFactory implements ConnectorFactory { private final Optional metastore; - private final Optional fileIoProvider; + private final Optional fileSystemFactory; private final Module module; - public TestingIcebergConnectorFactory(Optional metastore, Optional fileIoProvider, Module module) + public TestingIcebergConnectorFactory(Optional metastore, Optional fileSystemFactory, Module module) { this.metastore = requireNonNull(metastore, "metastore is null"); - this.fileIoProvider = requireNonNull(fileIoProvider, "fileIoProvider is null"); + this.fileSystemFactory = requireNonNull(fileSystemFactory, "fileSystemFactory is null"); this.module = requireNonNull(module, "module is null"); } @@ -48,6 +49,6 @@ public String getName() @Override public Connector create(String catalogName, Map config, ConnectorContext context) { - return createConnector(catalogName, config, context, module, metastore, fileIoProvider); + return createConnector(catalogName, config, context, module, metastore, fileSystemFactory); } } diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestingIcebergPlugin.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestingIcebergPlugin.java index d915236855c0..3bb82875d4a1 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestingIcebergPlugin.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestingIcebergPlugin.java @@ -16,6 +16,7 @@ import com.google.common.collect.ImmutableList; import com.google.inject.Module; import io.trino.plugin.hive.metastore.HiveMetastore; +import io.trino.plugin.iceberg.io.TrinoFileSystemFactory; import io.trino.spi.connector.ConnectorFactory; import java.util.List; @@ -28,13 +29,13 @@ public class TestingIcebergPlugin extends IcebergPlugin { private final Optional metastore; - private final Optional fileIoProvider; + private final Optional fileSystemFactory; private final Module module; - public TestingIcebergPlugin(Optional metastore, Optional fileIoProvider, Module module) + public TestingIcebergPlugin(Optional metastore, Optional fileSystemFactory, Module module) { this.metastore = requireNonNull(metastore, "metastore is null"); - this.fileIoProvider = requireNonNull(fileIoProvider, "fileIoProvider is null"); + this.fileSystemFactory = requireNonNull(fileSystemFactory, "fileSystemFactory is null"); this.module = requireNonNull(module, "module is null"); } @@ -44,6 +45,6 @@ public Iterable getConnectorFactories() List connectorFactories = ImmutableList.copyOf(super.getConnectorFactories()); verify(connectorFactories.size() == 1, "Unexpected connector factories: %s", connectorFactories); - return ImmutableList.of(new TestingIcebergConnectorFactory(metastore, fileIoProvider, module)); + return ImmutableList.of(new TestingIcebergConnectorFactory(metastore, fileSystemFactory, module)); } } diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TrackingFileIoProvider.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TrackingFileSystemFactory.java similarity index 71% rename from plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TrackingFileIoProvider.java rename to plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TrackingFileSystemFactory.java index 59e0d846467e..eeba58e635bf 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TrackingFileIoProvider.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TrackingFileSystemFactory.java @@ -14,7 +14,12 @@ package io.trino.plugin.iceberg; import com.google.common.collect.ImmutableMap; -import io.trino.plugin.hive.HdfsEnvironment.HdfsContext; +import io.trino.plugin.iceberg.io.FileIterator; +import io.trino.plugin.iceberg.io.TrinoFileSystem; +import io.trino.plugin.iceberg.io.TrinoFileSystemFactory; +import io.trino.plugin.iceberg.io.TrinoInputFile; +import io.trino.plugin.iceberg.io.TrinoOutputFile; +import io.trino.spi.security.ConnectorIdentity; import org.apache.iceberg.io.FileIO; import org.apache.iceberg.io.InputFile; import org.apache.iceberg.io.OutputFile; @@ -23,6 +28,7 @@ import javax.annotation.concurrent.Immutable; +import java.io.IOException; import java.util.Map; import java.util.Objects; import java.util.concurrent.ConcurrentHashMap; @@ -30,17 +36,17 @@ import java.util.function.Consumer; import static com.google.common.base.MoreObjects.toStringHelper; -import static io.trino.plugin.iceberg.TrackingFileIoProvider.OperationType.INPUT_FILE_EXISTS; -import static io.trino.plugin.iceberg.TrackingFileIoProvider.OperationType.INPUT_FILE_GET_LENGTH; -import static io.trino.plugin.iceberg.TrackingFileIoProvider.OperationType.INPUT_FILE_NEW_STREAM; -import static io.trino.plugin.iceberg.TrackingFileIoProvider.OperationType.OUTPUT_FILE_CREATE; -import static io.trino.plugin.iceberg.TrackingFileIoProvider.OperationType.OUTPUT_FILE_CREATE_OR_OVERWRITE; -import static io.trino.plugin.iceberg.TrackingFileIoProvider.OperationType.OUTPUT_FILE_LOCATION; -import static io.trino.plugin.iceberg.TrackingFileIoProvider.OperationType.OUTPUT_FILE_TO_INPUT_FILE; +import static io.trino.plugin.iceberg.TrackingFileSystemFactory.OperationType.INPUT_FILE_EXISTS; +import static io.trino.plugin.iceberg.TrackingFileSystemFactory.OperationType.INPUT_FILE_GET_LENGTH; +import static io.trino.plugin.iceberg.TrackingFileSystemFactory.OperationType.INPUT_FILE_NEW_STREAM; +import static io.trino.plugin.iceberg.TrackingFileSystemFactory.OperationType.OUTPUT_FILE_CREATE; +import static io.trino.plugin.iceberg.TrackingFileSystemFactory.OperationType.OUTPUT_FILE_CREATE_OR_OVERWRITE; +import static io.trino.plugin.iceberg.TrackingFileSystemFactory.OperationType.OUTPUT_FILE_LOCATION; +import static io.trino.plugin.iceberg.TrackingFileSystemFactory.OperationType.OUTPUT_FILE_TO_INPUT_FILE; import static java.util.Objects.requireNonNull; -public class TrackingFileIoProvider - implements FileIoProvider +public class TrackingFileSystemFactory + implements TrinoFileSystemFactory { public enum OperationType { @@ -54,11 +60,11 @@ public enum OperationType } private final AtomicInteger fileId = new AtomicInteger(); - private final FileIoProvider delegate; + private final TrinoFileSystemFactory delegate; private final Map operationCounts = new ConcurrentHashMap<>(); - public TrackingFileIoProvider(FileIoProvider delegate) + public TrackingFileSystemFactory(TrinoFileSystemFactory delegate) { this.delegate = requireNonNull(delegate, "delegate is null"); } @@ -80,11 +86,9 @@ private void increment(String path, int fileId, OperationType operationType) } @Override - public FileIO createFileIo(HdfsContext hdfsContext, String queryId) + public TrinoFileSystem create(ConnectorIdentity identity) { - return new TrackingFileIo( - delegate.createFileIo(hdfsContext, queryId), - this::increment); + return new TrackingFileSystem(delegate.create(identity), this::increment); } private interface Tracker @@ -92,6 +96,64 @@ private interface Tracker void track(String path, int fileId, OperationType operationType); } + private class TrackingFileSystem + implements TrinoFileSystem + { + private final TrinoFileSystem delegate; + private final Tracker tracker; + + private TrackingFileSystem(TrinoFileSystem delegate, Tracker tracker) + { + this.delegate = requireNonNull(delegate, "delegate is null"); + this.tracker = requireNonNull(tracker, "tracker is null"); + } + + @Override + public TrinoInputFile newInputFile(String path) + { + return delegate.newInputFile(path); + } + + @Override + public TrinoInputFile newInputFile(String path, long length) + { + return delegate.newInputFile(path, length); + } + + @Override + public TrinoOutputFile newOutputFile(String path) + { + return delegate.newOutputFile(path); + } + + @Override + public void deleteFile(String path) + throws IOException + { + delegate.deleteFile(path); + } + + @Override + public void deleteDirectory(String path) + throws IOException + { + delegate.deleteDirectory(path); + } + + @Override + public FileIterator listFiles(String path) + throws IOException + { + return delegate.listFiles(path); + } + + @Override + public FileIO toFileIo() + { + return new TrackingFileIo(delegate.toFileIo(), tracker); + } + } + private class TrackingFileIo implements FileIO { diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestTrinoGlueCatalogTest.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestTrinoGlueCatalogTest.java index 5d00c3ba492f..a1aefc324e6b 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestTrinoGlueCatalogTest.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestTrinoGlueCatalogTest.java @@ -16,19 +16,14 @@ import com.amazonaws.auth.DefaultAWSCredentialsProviderChain; import com.amazonaws.services.glue.AWSGlueAsyncClientBuilder; import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; import io.airlift.log.Logger; import io.trino.plugin.base.CatalogName; -import io.trino.plugin.hive.HdfsConfig; -import io.trino.plugin.hive.HdfsConfigurationInitializer; -import io.trino.plugin.hive.HdfsEnvironment; -import io.trino.plugin.hive.HiveHdfsConfiguration; -import io.trino.plugin.hive.authentication.NoHdfsAuthentication; import io.trino.plugin.hive.metastore.glue.GlueHiveMetastoreConfig; import io.trino.plugin.hive.metastore.glue.GlueMetastoreStats; import io.trino.plugin.iceberg.BaseTrinoCatalogTest; -import io.trino.plugin.iceberg.HdfsFileIoProvider; import io.trino.plugin.iceberg.catalog.TrinoCatalog; +import io.trino.plugin.iceberg.io.TrinoFileSystemFactory; +import io.trino.plugin.iceberg.io.hdfs.HdfsFileSystemFactory; import io.trino.spi.connector.SchemaTableName; import io.trino.spi.security.PrincipalType; import io.trino.spi.security.TrinoPrincipal; @@ -41,6 +36,7 @@ import java.nio.file.Path; import java.util.Optional; +import static io.trino.plugin.hive.HiveTestUtils.HDFS_ENVIRONMENT; import static io.trino.testing.TestingConnectorSession.SESSION; import static io.trino.testing.sql.TestTable.randomTableSuffix; import static org.testng.Assert.assertEquals; @@ -53,18 +49,16 @@ public class TestTrinoGlueCatalogTest @Override protected TrinoCatalog createTrinoCatalog(boolean useUniqueTableLocations) { - HdfsEnvironment hdfsEnvironment = new HdfsEnvironment(new HiveHdfsConfiguration( - new HdfsConfigurationInitializer( - new HdfsConfig(), - ImmutableSet.of()), - ImmutableSet.of()), - new HdfsConfig(), - new NoHdfsAuthentication()); + TrinoFileSystemFactory fileSystemFactory = new HdfsFileSystemFactory(HDFS_ENVIRONMENT); return new TrinoGlueCatalog( new CatalogName("catalog_name"), - hdfsEnvironment, + fileSystemFactory, new TestingTypeManager(), - new GlueIcebergTableOperationsProvider(new HdfsFileIoProvider(hdfsEnvironment), new GlueMetastoreStats(), new GlueHiveMetastoreConfig(), DefaultAWSCredentialsProviderChain.getInstance()), + new GlueIcebergTableOperationsProvider( + fileSystemFactory, + new GlueMetastoreStats(), + new GlueHiveMetastoreConfig(), + DefaultAWSCredentialsProviderChain.getInstance()), "test", AWSGlueAsyncClientBuilder.defaultClient(), new GlueMetastoreStats(), @@ -79,18 +73,16 @@ public void testDefaultLocation() Path tmpDirectory = Files.createTempDirectory("test_glue_catalog_default_location_"); tmpDirectory.toFile().deleteOnExit(); - HdfsEnvironment hdfsEnvironment = new HdfsEnvironment(new HiveHdfsConfiguration( - new HdfsConfigurationInitializer( - new HdfsConfig(), - ImmutableSet.of()), - ImmutableSet.of()), - new HdfsConfig(), - new NoHdfsAuthentication()); + TrinoFileSystemFactory fileSystemFactory = new HdfsFileSystemFactory(HDFS_ENVIRONMENT); TrinoCatalog catalogWithDefaultLocation = new TrinoGlueCatalog( new CatalogName("catalog_name"), - hdfsEnvironment, + fileSystemFactory, new TestingTypeManager(), - new GlueIcebergTableOperationsProvider(new HdfsFileIoProvider(hdfsEnvironment), new GlueMetastoreStats(), new GlueHiveMetastoreConfig(), DefaultAWSCredentialsProviderChain.getInstance()), + new GlueIcebergTableOperationsProvider( + fileSystemFactory, + new GlueMetastoreStats(), + new GlueHiveMetastoreConfig(), + DefaultAWSCredentialsProviderChain.getInstance()), "test", AWSGlueAsyncClientBuilder.defaultClient(), new GlueMetastoreStats(), diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/io/hdfs/TestHdfsFileSystem.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/io/hdfs/TestHdfsFileSystem.java new file mode 100644 index 000000000000..043b26f9ecb1 --- /dev/null +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/io/hdfs/TestHdfsFileSystem.java @@ -0,0 +1,87 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.iceberg.io.hdfs; + +import com.google.common.collect.ImmutableList; +import io.trino.plugin.iceberg.io.FileIterator; +import io.trino.plugin.iceberg.io.TrinoFileSystem; +import io.trino.plugin.iceberg.io.TrinoFileSystemFactory; +import io.trino.spi.security.ConnectorIdentity; +import org.testng.annotations.Test; + +import java.io.IOException; +import java.nio.file.Path; +import java.util.List; + +import static com.google.common.io.MoreFiles.deleteRecursively; +import static com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE; +import static io.trino.plugin.hive.HiveTestUtils.HDFS_ENVIRONMENT; +import static java.nio.file.Files.createDirectory; +import static java.nio.file.Files.createFile; +import static java.nio.file.Files.createTempDirectory; +import static org.assertj.core.api.Assertions.assertThat; + +public class TestHdfsFileSystem +{ + @Test + public void testListing() + throws IOException + { + TrinoFileSystemFactory factory = new HdfsFileSystemFactory(HDFS_ENVIRONMENT); + TrinoFileSystem fileSystem = factory.create(ConnectorIdentity.ofUser("test")); + + Path tempDir = createTempDirectory("testListing"); + String root = tempDir.toString(); + + assertThat(listFiles(fileSystem, root)).isEmpty(); + + createFile(tempDir.resolve("abc")); + createFile(tempDir.resolve("xyz")); + createDirectory(tempDir.resolve("mydir")); + + assertThat(listFiles(fileSystem, root)).containsExactlyInAnyOrder( + root + "/abc", + root + "/xyz"); + + assertThat(listFiles(fileSystem, root + "/abc")).containsExactly(root + "/abc"); + assertThat(listFiles(fileSystem, root + "/abc/")).containsExactly(root + "/abc/"); + assertThat(listFiles(fileSystem, root + "/abc//")).containsExactly(root + "/abc//"); + assertThat(listFiles(fileSystem, root + "///abc")).containsExactly(root + "///abc"); + + createFile(tempDir.resolve("mydir").resolve("qqq")); + + assertThat(listFiles(fileSystem, root)).containsExactlyInAnyOrder( + root + "/abc", + root + "/xyz", + root + "/mydir/qqq"); + + assertThat(listFiles(fileSystem, root + "/mydir")).containsExactly(root + "/mydir/qqq"); + assertThat(listFiles(fileSystem, root + "/mydir/")).containsExactly(root + "/mydir/qqq"); + assertThat(listFiles(fileSystem, root + "/mydir//")).containsExactly(root + "/mydir//qqq"); + assertThat(listFiles(fileSystem, root + "///mydir")).containsExactly(root + "///mydir/qqq"); + + deleteRecursively(tempDir, ALLOW_INSECURE); + } + + private static List listFiles(TrinoFileSystem fileSystem, String path) + throws IOException + { + FileIterator iterator = fileSystem.listFiles(path); + ImmutableList.Builder files = ImmutableList.builder(); + while (iterator.hasNext()) { + files.add(iterator.next().path()); + } + return files.build(); + } +} diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/iceberg/TestCreateDropSchema.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/iceberg/TestCreateDropSchema.java index ec9228c0da7c..d373554e8c65 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/iceberg/TestCreateDropSchema.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/iceberg/TestCreateDropSchema.java @@ -90,23 +90,6 @@ public void testDropWithExternalFilesInSubdirectory() hdfsClient.delete(schemaDir); } - @Test(groups = ICEBERG) // make sure empty directories are noticed as well - public void testDropSchemaFilesWithEmptyExternalSubdir() - { - String schemaName = "schema_with_empty_subdirectory_" + randomTableSuffix(); - String schemaDir = format("%s/%s.db/", warehouseDirectory, schemaName); - String externalSubdir = schemaDir + "external-subdir/"; - - hdfsClient.createDirectory(externalSubdir); - - onTrino().executeQuery("CREATE SCHEMA " + schemaName); - assertFileExistence(externalSubdir, true, "external subdirectory exists after creating schema"); - onTrino().executeQuery("DROP SCHEMA " + schemaName); - assertFileExistence(externalSubdir, true, "external subdirectory exists after dropping schema"); - - hdfsClient.delete(schemaDir); - } - @Test(groups = ICEBERG) // default location, external file at top level public void testDropWithExternalFiles() {