diff --git a/aws/src/integration/java/org/apache/iceberg/aws/s3/S3FileIOTest.java b/aws/src/integration/java/org/apache/iceberg/aws/s3/S3FileIOTest.java index 50b5d279b2b7..e97154cfb4e7 100644 --- a/aws/src/integration/java/org/apache/iceberg/aws/s3/S3FileIOTest.java +++ b/aws/src/integration/java/org/apache/iceberg/aws/s3/S3FileIOTest.java @@ -29,11 +29,9 @@ import java.util.UUID; import javax.crypto.KeyGenerator; import javax.crypto.SecretKey; -import org.apache.iceberg.AssertHelpers; import org.apache.iceberg.aws.AwsClientUtil; import org.apache.iceberg.aws.AwsIntegTestUtil; import org.apache.iceberg.aws.AwsProperties; -import org.apache.iceberg.exceptions.NotFoundException; import org.apache.iceberg.io.InputFile; import org.apache.iceberg.io.OutputFile; import org.junit.AfterClass; @@ -92,39 +90,6 @@ public void before() { objectUri = String.format("s3://%s/%s", bucketName, objectKey); } - @Test - public void testExists_noFile() { - S3FileIO s3FileIO = new S3FileIO(AwsClientUtil::defaultS3Client); - InputFile file = s3FileIO.newInputFile(objectUri); - Assert.assertFalse("file should not exist", file.exists()); - AssertHelpers.assertThrows("get length should throw exception", - NotFoundException.class, - String.format("Cannot retrieve file length because file %s does not exist", objectUri), - file::getLength); - } - - @Test - public void testExists_wrongFileSamePrefix() { - s3.putObject(PutObjectRequest.builder().bucket(bucketName).key(objectKey + "suffix").build(), - RequestBody.fromBytes(contentBytes)); - S3FileIO s3FileIO = new S3FileIO(AwsClientUtil::defaultS3Client); - InputFile file = s3FileIO.newInputFile(objectUri); - Assert.assertFalse("file should not exist", file.exists()); - } - - @Test - public void testExists_multipleFilesSamePrefix() { - s3.putObject(PutObjectRequest.builder().bucket(bucketName).key(objectKey).build(), - RequestBody.fromBytes(contentBytes)); - s3.putObject(PutObjectRequest.builder().bucket(bucketName).key(objectKey + "suffix").build(), - RequestBody.fromBytes(new byte[1024 * 1024])); - S3FileIO s3FileIO = new S3FileIO(AwsClientUtil::defaultS3Client); - InputFile file = s3FileIO.newInputFile(objectUri); - Assert.assertTrue("file should exist", file.exists()); - Assert.assertEquals("List results are always returned in UTF-8 binary order", - contentBytes.length, file.getLength()); - } - @Test public void testNewInputStream() throws Exception { s3.putObject(PutObjectRequest.builder().bucket(bucketName).key(objectKey).build(), diff --git a/aws/src/main/java/org/apache/iceberg/aws/s3/BaseS3File.java b/aws/src/main/java/org/apache/iceberg/aws/s3/BaseS3File.java index f7d81d49ae8e..42b0f52fc0e0 100644 --- a/aws/src/main/java/org/apache/iceberg/aws/s3/BaseS3File.java +++ b/aws/src/main/java/org/apache/iceberg/aws/s3/BaseS3File.java @@ -22,16 +22,15 @@ import org.apache.iceberg.aws.AwsProperties; import software.amazon.awssdk.http.HttpStatusCode; import software.amazon.awssdk.services.s3.S3Client; -import software.amazon.awssdk.services.s3.model.ListObjectsV2Request; -import software.amazon.awssdk.services.s3.model.ListObjectsV2Response; +import software.amazon.awssdk.services.s3.model.HeadObjectRequest; +import software.amazon.awssdk.services.s3.model.HeadObjectResponse; import software.amazon.awssdk.services.s3.model.S3Exception; -import software.amazon.awssdk.services.s3.model.S3Object; abstract class BaseS3File { private final S3Client client; private final S3URI uri; private final AwsProperties awsProperties; - private S3Object metadata; + private HeadObjectResponse metadata; BaseS3File(S3Client client, S3URI uri) { this(client, uri, new AwsProperties()); @@ -76,24 +75,13 @@ public boolean exists() { } } - protected S3Object getObjectMetadata() throws S3Exception { + protected HeadObjectResponse getObjectMetadata() throws S3Exception { if (metadata == null) { - ListObjectsV2Response response = client().listObjectsV2(ListObjectsV2Request.builder() + HeadObjectRequest.Builder requestBuilder = HeadObjectRequest.builder() .bucket(uri().bucket()) - .prefix(uri().key()) - .maxKeys(1) - .build()); - - if (!response.hasContents()) { - metadata = null; - } else { - S3Object s3Object = response.contents().get(0); - if (uri().key().equals(s3Object.key())) { - metadata = s3Object; - } else { - metadata = null; - } - } + .key(uri().key()); + S3RequestUtil.configureEncryption(awsProperties, requestBuilder); + metadata = client().headObject(requestBuilder.build()); } return metadata; diff --git a/aws/src/main/java/org/apache/iceberg/aws/s3/S3InputFile.java b/aws/src/main/java/org/apache/iceberg/aws/s3/S3InputFile.java index eac47962c491..93d86c3feadf 100644 --- a/aws/src/main/java/org/apache/iceberg/aws/s3/S3InputFile.java +++ b/aws/src/main/java/org/apache/iceberg/aws/s3/S3InputFile.java @@ -20,7 +20,6 @@ package org.apache.iceberg.aws.s3; import org.apache.iceberg.aws.AwsProperties; -import org.apache.iceberg.exceptions.NotFoundException; import org.apache.iceberg.io.InputFile; import org.apache.iceberg.io.SeekableInputStream; import software.amazon.awssdk.services.s3.S3Client; @@ -41,11 +40,7 @@ public S3InputFile(S3Client client, S3URI uri, AwsProperties awsProperties) { */ @Override public long getLength() { - if (!exists()) { - throw new NotFoundException("Cannot retrieve file length because file %s does not exist", uri()); - } - - return getObjectMetadata().size(); + return getObjectMetadata().contentLength(); } @Override diff --git a/aws/src/main/java/org/apache/iceberg/aws/s3/S3RequestUtil.java b/aws/src/main/java/org/apache/iceberg/aws/s3/S3RequestUtil.java index c44ce2913c7d..1a32ac42c840 100644 --- a/aws/src/main/java/org/apache/iceberg/aws/s3/S3RequestUtil.java +++ b/aws/src/main/java/org/apache/iceberg/aws/s3/S3RequestUtil.java @@ -24,6 +24,7 @@ import org.apache.iceberg.aws.AwsProperties; import software.amazon.awssdk.services.s3.model.CreateMultipartUploadRequest; import software.amazon.awssdk.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.services.s3.model.HeadObjectRequest; import software.amazon.awssdk.services.s3.model.ObjectCannedACL; import software.amazon.awssdk.services.s3.model.PutObjectRequest; import software.amazon.awssdk.services.s3.model.S3Request; @@ -59,6 +60,11 @@ static void configureEncryption(AwsProperties awsProperties, GetObjectRequest.Bu requestBuilder::sseCustomerAlgorithm, requestBuilder::sseCustomerKey, requestBuilder::sseCustomerKeyMD5); } + static void configureEncryption(AwsProperties awsProperties, HeadObjectRequest.Builder requestBuilder) { + configureEncryption(awsProperties, NULL_SSE_SETTER, NULL_STRING_SETTER, + requestBuilder::sseCustomerAlgorithm, requestBuilder::sseCustomerKey, requestBuilder::sseCustomerKeyMD5); + } + @SuppressWarnings("ReturnValueIgnored") static void configureEncryption( AwsProperties awsProperties, diff --git a/aws/src/test/java/org/apache/iceberg/aws/s3/S3FileIOTest.java b/aws/src/test/java/org/apache/iceberg/aws/s3/S3FileIOTest.java index d8a20acd2765..883fab1468ce 100644 --- a/aws/src/test/java/org/apache/iceberg/aws/s3/S3FileIOTest.java +++ b/aws/src/test/java/org/apache/iceberg/aws/s3/S3FileIOTest.java @@ -26,20 +26,16 @@ import java.util.Random; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.SerializationUtils; -import org.apache.iceberg.AssertHelpers; -import org.apache.iceberg.exceptions.NotFoundException; import org.apache.iceberg.io.InputFile; import org.apache.iceberg.io.OutputFile; import org.apache.iceberg.util.SerializableSupplier; import org.junit.Before; import org.junit.ClassRule; import org.junit.Test; -import software.amazon.awssdk.core.sync.RequestBody; import software.amazon.awssdk.http.urlconnection.UrlConnectionHttpClient; import software.amazon.awssdk.regions.Region; import software.amazon.awssdk.services.s3.S3Client; import software.amazon.awssdk.services.s3.model.CreateBucketRequest; -import software.amazon.awssdk.services.s3.model.PutObjectRequest; import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; @@ -68,10 +64,6 @@ public void newInputFile() throws IOException { InputFile in = s3FileIO.newInputFile(location); assertFalse(in.exists()); - AssertHelpers.assertThrows("get length should throw exception", - NotFoundException.class, - "Cannot retrieve file length because file s3://bucket/path/to/file.txt does not exist", - in::getLength); OutputFile out = s3FileIO.newOutputFile(location); try (OutputStream os = out.createOrOverwrite()) { @@ -92,31 +84,6 @@ public void newInputFile() throws IOException { assertFalse(s3FileIO.newInputFile(location).exists()); } - @Test - public void testExists_wrongFileWithSamePrefix() { - String location = "s3://bucket/file.txt"; - byte [] data = new byte[1024 * 1024]; - random.nextBytes(data); - s3.get().putObject(PutObjectRequest.builder().bucket("bucket").key("file.txt.dup").build(), - RequestBody.fromBytes(data)); - InputFile in = s3FileIO.newInputFile(location); - assertFalse("file should not exist", in.exists()); - } - - @Test - public void testExists_multipleFilesSamePrefix() { - String location = "s3://bucket/file.txt"; - byte [] data = new byte[1024 * 1024]; - random.nextBytes(data); - s3.get().putObject(PutObjectRequest.builder().bucket("bucket").key("file.txt.dup").build(), - RequestBody.fromBytes(new byte[1024 * 1024 * 2])); - s3.get().putObject(PutObjectRequest.builder().bucket("bucket").key("file.txt").build(), - RequestBody.fromBytes(data)); - InputFile in = s3FileIO.newInputFile(location); - assertTrue("file should exist", in.exists()); - assertEquals("List results are always returned in UTF-8 binary order", data.length, in.getLength()); - } - @Test public void serializeClient() { SerializableSupplier pre =