Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions api/src/main/java/org/apache/iceberg/io/FileIO.java
Original file line number Diff line number Diff line change
Expand Up @@ -49,25 +49,25 @@ default InputFile newInputFile(String path, long length) {
default InputFile newInputFile(DataFile file) {
Preconditions.checkArgument(
file.keyMetadata() == null,
"Cannot decrypt data file: {} (use EncryptingFileIO)",
"Cannot decrypt data file: %s (use EncryptingFileIO)",
file.path());
return newInputFile(file.path().toString());
return newInputFile(file.path().toString(), file.fileSizeInBytes());
}

default InputFile newInputFile(DeleteFile file) {
Preconditions.checkArgument(
file.keyMetadata() == null,
"Cannot decrypt delete file: {} (use EncryptingFileIO)",
"Cannot decrypt delete file: %s (use EncryptingFileIO)",
file.path());
return newInputFile(file.path().toString());
return newInputFile(file.path().toString(), file.fileSizeInBytes());
}

default InputFile newInputFile(ManifestFile manifest) {
Preconditions.checkArgument(
manifest.keyMetadata() == null,
"Cannot decrypt manifest: {} (use EncryptingFileIO)",
"Cannot decrypt manifest: %s (use EncryptingFileIO)",
manifest.path());
return newInputFile(manifest.path());
return newInputFile(manifest.path(), manifest.length());
}

/** Get a {@link OutputFile} instance to write bytes to the file at the given path. */
Expand Down
58 changes: 58 additions & 0 deletions aws/src/test/java/org/apache/iceberg/aws/s3/TestS3FileIO.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.Mockito.doReturn;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.never;
import static org.mockito.Mockito.reset;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;

Expand All @@ -38,6 +40,13 @@
import org.apache.hadoop.conf.Configuration;
import org.apache.iceberg.BaseTable;
import org.apache.iceberg.CatalogProperties;
import org.apache.iceberg.DataFile;
import org.apache.iceberg.DataFiles;
import org.apache.iceberg.FileFormat;
import org.apache.iceberg.ManifestFile;
import org.apache.iceberg.ManifestFiles;
import org.apache.iceberg.ManifestWriter;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.Schema;
import org.apache.iceberg.TestHelpers;
import org.apache.iceberg.aws.AwsProperties;
Expand Down Expand Up @@ -74,6 +83,7 @@
import software.amazon.awssdk.services.s3.model.CreateBucketRequest;
import software.amazon.awssdk.services.s3.model.DeleteObjectsRequest;
import software.amazon.awssdk.services.s3.model.DeleteObjectsResponse;
import software.amazon.awssdk.services.s3.model.HeadObjectRequest;
import software.amazon.awssdk.services.s3.model.S3Error;

@ExtendWith(S3MockExtension.class)
Expand Down Expand Up @@ -377,6 +387,54 @@ public void testResolvingFileIOLoad() {
Assertions.assertThat(result).isInstanceOf(S3FileIO.class);
}

@Test
public void testInputFileWithDataFile() throws IOException {
String location = "s3://bucket/path/to/data-file.parquet";
DataFile dataFile =
DataFiles.builder(PartitionSpec.unpartitioned())
.withPath(location)
.withFileSizeInBytes(123L)
.withFormat(FileFormat.PARQUET)
.withRecordCount(123L)
.build();
OutputStream outputStream = s3FileIO.newOutputFile(location).create();
byte[] data = "testing".getBytes();
outputStream.write(data);
outputStream.close();

InputFile inputFile = s3FileIO.newInputFile(dataFile);
reset(s3mock);

Assertions.assertThat(inputFile.getLength())
.as("Data file length should be determined from the file size stats")
.isEqualTo(123L);
verify(s3mock, never()).headObject(any(HeadObjectRequest.class));
}

@Test
public void testInputFileWithManifest() throws IOException {
String dataFileLocation = "s3://bucket/path/to/data-file-2.parquet";
DataFile dataFile =
DataFiles.builder(PartitionSpec.unpartitioned())
.withPath(dataFileLocation)
.withFileSizeInBytes(123L)
.withFormat(FileFormat.PARQUET)
.withRecordCount(123L)
.build();
String manifestLocation = "s3://bucket/path/to/manifest.avro";
OutputFile outputFile = s3FileIO.newOutputFile(manifestLocation);
ManifestWriter<DataFile> writer =
ManifestFiles.write(PartitionSpec.unpartitioned(), outputFile);
writer.add(dataFile);
writer.close();
ManifestFile manifest = writer.toManifestFile();
InputFile inputFile = s3FileIO.newInputFile(manifest);
reset(s3mock);

Assertions.assertThat(inputFile.getLength()).isEqualTo(manifest.length());
verify(s3mock, never()).headObject(any(HeadObjectRequest.class));
}

private void createRandomObjects(String prefix, int count) {
S3URI s3URI = new S3URI(prefix);

Expand Down