diff --git a/aws/src/integration/java/org/apache/iceberg/aws/s3/S3FileIOTest.java b/aws/src/integration/java/org/apache/iceberg/aws/s3/S3FileIOTest.java index e97154cfb4e7..50b5d279b2b7 100644 --- a/aws/src/integration/java/org/apache/iceberg/aws/s3/S3FileIOTest.java +++ b/aws/src/integration/java/org/apache/iceberg/aws/s3/S3FileIOTest.java @@ -29,9 +29,11 @@ import java.util.UUID; import javax.crypto.KeyGenerator; import javax.crypto.SecretKey; +import org.apache.iceberg.AssertHelpers; import org.apache.iceberg.aws.AwsClientUtil; import org.apache.iceberg.aws.AwsIntegTestUtil; import org.apache.iceberg.aws.AwsProperties; +import org.apache.iceberg.exceptions.NotFoundException; import org.apache.iceberg.io.InputFile; import org.apache.iceberg.io.OutputFile; import org.junit.AfterClass; @@ -90,6 +92,39 @@ public void before() { objectUri = String.format("s3://%s/%s", bucketName, objectKey); } + @Test + public void testExists_noFile() { + S3FileIO s3FileIO = new S3FileIO(AwsClientUtil::defaultS3Client); + InputFile file = s3FileIO.newInputFile(objectUri); + Assert.assertFalse("file should not exist", file.exists()); + AssertHelpers.assertThrows("get length should throw exception", + NotFoundException.class, + String.format("Cannot retrieve file length because file %s does not exist", objectUri), + file::getLength); + } + + @Test + public void testExists_wrongFileSamePrefix() { + s3.putObject(PutObjectRequest.builder().bucket(bucketName).key(objectKey + "suffix").build(), + RequestBody.fromBytes(contentBytes)); + S3FileIO s3FileIO = new S3FileIO(AwsClientUtil::defaultS3Client); + InputFile file = s3FileIO.newInputFile(objectUri); + Assert.assertFalse("file should not exist", file.exists()); + } + + @Test + public void testExists_multipleFilesSamePrefix() { + s3.putObject(PutObjectRequest.builder().bucket(bucketName).key(objectKey).build(), + RequestBody.fromBytes(contentBytes)); + s3.putObject(PutObjectRequest.builder().bucket(bucketName).key(objectKey + "suffix").build(), + RequestBody.fromBytes(new byte[1024 * 1024])); + S3FileIO s3FileIO = new S3FileIO(AwsClientUtil::defaultS3Client); + InputFile file = s3FileIO.newInputFile(objectUri); + Assert.assertTrue("file should exist", file.exists()); + Assert.assertEquals("List results are always returned in UTF-8 binary order", + contentBytes.length, file.getLength()); + } + @Test public void testNewInputStream() throws Exception { s3.putObject(PutObjectRequest.builder().bucket(bucketName).key(objectKey).build(), diff --git a/aws/src/main/java/org/apache/iceberg/aws/s3/BaseS3File.java b/aws/src/main/java/org/apache/iceberg/aws/s3/BaseS3File.java index 42b0f52fc0e0..f7d81d49ae8e 100644 --- a/aws/src/main/java/org/apache/iceberg/aws/s3/BaseS3File.java +++ b/aws/src/main/java/org/apache/iceberg/aws/s3/BaseS3File.java @@ -22,15 +22,16 @@ import org.apache.iceberg.aws.AwsProperties; import software.amazon.awssdk.http.HttpStatusCode; import software.amazon.awssdk.services.s3.S3Client; -import software.amazon.awssdk.services.s3.model.HeadObjectRequest; -import software.amazon.awssdk.services.s3.model.HeadObjectResponse; +import software.amazon.awssdk.services.s3.model.ListObjectsV2Request; +import software.amazon.awssdk.services.s3.model.ListObjectsV2Response; import software.amazon.awssdk.services.s3.model.S3Exception; +import software.amazon.awssdk.services.s3.model.S3Object; abstract class BaseS3File { private final S3Client client; private final S3URI uri; private final AwsProperties awsProperties; - private HeadObjectResponse metadata; + private S3Object metadata; BaseS3File(S3Client client, S3URI uri) { this(client, uri, new AwsProperties()); @@ -75,13 +76,24 @@ public boolean exists() { } } - protected HeadObjectResponse getObjectMetadata() throws S3Exception { + protected S3Object getObjectMetadata() throws S3Exception { if (metadata == null) { - HeadObjectRequest.Builder requestBuilder = HeadObjectRequest.builder() + ListObjectsV2Response response = client().listObjectsV2(ListObjectsV2Request.builder() .bucket(uri().bucket()) - .key(uri().key()); - S3RequestUtil.configureEncryption(awsProperties, requestBuilder); - metadata = client().headObject(requestBuilder.build()); + .prefix(uri().key()) + .maxKeys(1) + .build()); + + if (!response.hasContents()) { + metadata = null; + } else { + S3Object s3Object = response.contents().get(0); + if (uri().key().equals(s3Object.key())) { + metadata = s3Object; + } else { + metadata = null; + } + } } return metadata; diff --git a/aws/src/main/java/org/apache/iceberg/aws/s3/S3InputFile.java b/aws/src/main/java/org/apache/iceberg/aws/s3/S3InputFile.java index 93d86c3feadf..eac47962c491 100644 --- a/aws/src/main/java/org/apache/iceberg/aws/s3/S3InputFile.java +++ b/aws/src/main/java/org/apache/iceberg/aws/s3/S3InputFile.java @@ -20,6 +20,7 @@ package org.apache.iceberg.aws.s3; import org.apache.iceberg.aws.AwsProperties; +import org.apache.iceberg.exceptions.NotFoundException; import org.apache.iceberg.io.InputFile; import org.apache.iceberg.io.SeekableInputStream; import software.amazon.awssdk.services.s3.S3Client; @@ -40,7 +41,11 @@ public S3InputFile(S3Client client, S3URI uri, AwsProperties awsProperties) { */ @Override public long getLength() { - return getObjectMetadata().contentLength(); + if (!exists()) { + throw new NotFoundException("Cannot retrieve file length because file %s does not exist", uri()); + } + + return getObjectMetadata().size(); } @Override diff --git a/aws/src/main/java/org/apache/iceberg/aws/s3/S3RequestUtil.java b/aws/src/main/java/org/apache/iceberg/aws/s3/S3RequestUtil.java index d603b43234aa..83c2dfd6009f 100644 --- a/aws/src/main/java/org/apache/iceberg/aws/s3/S3RequestUtil.java +++ b/aws/src/main/java/org/apache/iceberg/aws/s3/S3RequestUtil.java @@ -24,7 +24,6 @@ import org.apache.iceberg.aws.AwsProperties; import software.amazon.awssdk.services.s3.model.CreateMultipartUploadRequest; import software.amazon.awssdk.services.s3.model.GetObjectRequest; -import software.amazon.awssdk.services.s3.model.HeadObjectRequest; import software.amazon.awssdk.services.s3.model.ObjectCannedACL; import software.amazon.awssdk.services.s3.model.PutObjectRequest; import software.amazon.awssdk.services.s3.model.S3Request; @@ -59,11 +58,6 @@ static void configureEncryption(AwsProperties awsProperties, GetObjectRequest.Bu requestBuilder::sseCustomerAlgorithm, requestBuilder::sseCustomerKey, requestBuilder::sseCustomerKeyMD5); } - static void configureEncryption(AwsProperties awsProperties, HeadObjectRequest.Builder requestBuilder) { - configureEncryption(awsProperties, NULL_SSE_SETTER, NULL_STRING_SETTER, - requestBuilder::sseCustomerAlgorithm, requestBuilder::sseCustomerKey, requestBuilder::sseCustomerKeyMD5); - } - @SuppressWarnings("ReturnValueIgnored") static void configureEncryption( AwsProperties awsProperties, diff --git a/aws/src/test/java/org/apache/iceberg/aws/s3/S3FileIOTest.java b/aws/src/test/java/org/apache/iceberg/aws/s3/S3FileIOTest.java index 883fab1468ce..d8a20acd2765 100644 --- a/aws/src/test/java/org/apache/iceberg/aws/s3/S3FileIOTest.java +++ b/aws/src/test/java/org/apache/iceberg/aws/s3/S3FileIOTest.java @@ -26,16 +26,20 @@ import java.util.Random; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.SerializationUtils; +import org.apache.iceberg.AssertHelpers; +import org.apache.iceberg.exceptions.NotFoundException; import org.apache.iceberg.io.InputFile; import org.apache.iceberg.io.OutputFile; import org.apache.iceberg.util.SerializableSupplier; import org.junit.Before; import org.junit.ClassRule; import org.junit.Test; +import software.amazon.awssdk.core.sync.RequestBody; import software.amazon.awssdk.http.urlconnection.UrlConnectionHttpClient; import software.amazon.awssdk.regions.Region; import software.amazon.awssdk.services.s3.S3Client; import software.amazon.awssdk.services.s3.model.CreateBucketRequest; +import software.amazon.awssdk.services.s3.model.PutObjectRequest; import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; @@ -64,6 +68,10 @@ public void newInputFile() throws IOException { InputFile in = s3FileIO.newInputFile(location); assertFalse(in.exists()); + AssertHelpers.assertThrows("get length should throw exception", + NotFoundException.class, + "Cannot retrieve file length because file s3://bucket/path/to/file.txt does not exist", + in::getLength); OutputFile out = s3FileIO.newOutputFile(location); try (OutputStream os = out.createOrOverwrite()) { @@ -84,6 +92,31 @@ public void newInputFile() throws IOException { assertFalse(s3FileIO.newInputFile(location).exists()); } + @Test + public void testExists_wrongFileWithSamePrefix() { + String location = "s3://bucket/file.txt"; + byte [] data = new byte[1024 * 1024]; + random.nextBytes(data); + s3.get().putObject(PutObjectRequest.builder().bucket("bucket").key("file.txt.dup").build(), + RequestBody.fromBytes(data)); + InputFile in = s3FileIO.newInputFile(location); + assertFalse("file should not exist", in.exists()); + } + + @Test + public void testExists_multipleFilesSamePrefix() { + String location = "s3://bucket/file.txt"; + byte [] data = new byte[1024 * 1024]; + random.nextBytes(data); + s3.get().putObject(PutObjectRequest.builder().bucket("bucket").key("file.txt.dup").build(), + RequestBody.fromBytes(new byte[1024 * 1024 * 2])); + s3.get().putObject(PutObjectRequest.builder().bucket("bucket").key("file.txt").build(), + RequestBody.fromBytes(data)); + InputFile in = s3FileIO.newInputFile(location); + assertTrue("file should exist", in.exists()); + assertEquals("List results are always returned in UTF-8 binary order", data.length, in.getLength()); + } + @Test public void serializeClient() { SerializableSupplier pre =