Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,9 @@
import java.util.UUID;
import javax.crypto.KeyGenerator;
import javax.crypto.SecretKey;
import org.apache.iceberg.AssertHelpers;
import org.apache.iceberg.aws.AwsClientUtil;
import org.apache.iceberg.aws.AwsIntegTestUtil;
import org.apache.iceberg.aws.AwsProperties;
import org.apache.iceberg.exceptions.NotFoundException;
import org.apache.iceberg.io.InputFile;
import org.apache.iceberg.io.OutputFile;
import org.junit.AfterClass;
Expand Down Expand Up @@ -92,39 +90,6 @@ public void before() {
objectUri = String.format("s3://%s/%s", bucketName, objectKey);
}

@Test
public void testExists_noFile() {
S3FileIO s3FileIO = new S3FileIO(AwsClientUtil::defaultS3Client);
InputFile file = s3FileIO.newInputFile(objectUri);
Assert.assertFalse("file should not exist", file.exists());
AssertHelpers.assertThrows("get length should throw exception",
NotFoundException.class,
String.format("Cannot retrieve file length because file %s does not exist", objectUri),
file::getLength);
}

@Test
public void testExists_wrongFileSamePrefix() {
s3.putObject(PutObjectRequest.builder().bucket(bucketName).key(objectKey + "suffix").build(),
RequestBody.fromBytes(contentBytes));
S3FileIO s3FileIO = new S3FileIO(AwsClientUtil::defaultS3Client);
InputFile file = s3FileIO.newInputFile(objectUri);
Assert.assertFalse("file should not exist", file.exists());
}

@Test
public void testExists_multipleFilesSamePrefix() {
s3.putObject(PutObjectRequest.builder().bucket(bucketName).key(objectKey).build(),
RequestBody.fromBytes(contentBytes));
s3.putObject(PutObjectRequest.builder().bucket(bucketName).key(objectKey + "suffix").build(),
RequestBody.fromBytes(new byte[1024 * 1024]));
S3FileIO s3FileIO = new S3FileIO(AwsClientUtil::defaultS3Client);
InputFile file = s3FileIO.newInputFile(objectUri);
Assert.assertTrue("file should exist", file.exists());
Assert.assertEquals("List results are always returned in UTF-8 binary order",
contentBytes.length, file.getLength());
}

@Test
public void testNewInputStream() throws Exception {
s3.putObject(PutObjectRequest.builder().bucket(bucketName).key(objectKey).build(),
Expand Down
28 changes: 8 additions & 20 deletions aws/src/main/java/org/apache/iceberg/aws/s3/BaseS3File.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,15 @@
import org.apache.iceberg.aws.AwsProperties;
import software.amazon.awssdk.http.HttpStatusCode;
import software.amazon.awssdk.services.s3.S3Client;
import software.amazon.awssdk.services.s3.model.ListObjectsV2Request;
import software.amazon.awssdk.services.s3.model.ListObjectsV2Response;
import software.amazon.awssdk.services.s3.model.HeadObjectRequest;
import software.amazon.awssdk.services.s3.model.HeadObjectResponse;
import software.amazon.awssdk.services.s3.model.S3Exception;
import software.amazon.awssdk.services.s3.model.S3Object;

abstract class BaseS3File {
private final S3Client client;
private final S3URI uri;
private final AwsProperties awsProperties;
private S3Object metadata;
private HeadObjectResponse metadata;

BaseS3File(S3Client client, S3URI uri) {
this(client, uri, new AwsProperties());
Expand Down Expand Up @@ -76,24 +75,13 @@ public boolean exists() {
}
}

protected S3Object getObjectMetadata() throws S3Exception {
protected HeadObjectResponse getObjectMetadata() throws S3Exception {
if (metadata == null) {
ListObjectsV2Response response = client().listObjectsV2(ListObjectsV2Request.builder()
HeadObjectRequest.Builder requestBuilder = HeadObjectRequest.builder()
.bucket(uri().bucket())
.prefix(uri().key())
.maxKeys(1)
.build());

if (!response.hasContents()) {
metadata = null;
} else {
S3Object s3Object = response.contents().get(0);
if (uri().key().equals(s3Object.key())) {
metadata = s3Object;
} else {
metadata = null;
}
}
.key(uri().key());
S3RequestUtil.configureEncryption(awsProperties, requestBuilder);
metadata = client().headObject(requestBuilder.build());
}

return metadata;
Expand Down
7 changes: 1 addition & 6 deletions aws/src/main/java/org/apache/iceberg/aws/s3/S3InputFile.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
package org.apache.iceberg.aws.s3;

import org.apache.iceberg.aws.AwsProperties;
import org.apache.iceberg.exceptions.NotFoundException;
import org.apache.iceberg.io.InputFile;
import org.apache.iceberg.io.SeekableInputStream;
import software.amazon.awssdk.services.s3.S3Client;
Expand All @@ -41,11 +40,7 @@ public S3InputFile(S3Client client, S3URI uri, AwsProperties awsProperties) {
*/
@Override
public long getLength() {
if (!exists()) {
throw new NotFoundException("Cannot retrieve file length because file %s does not exist", uri());
}

return getObjectMetadata().size();
return getObjectMetadata().contentLength();
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import org.apache.iceberg.aws.AwsProperties;
import software.amazon.awssdk.services.s3.model.CreateMultipartUploadRequest;
import software.amazon.awssdk.services.s3.model.GetObjectRequest;
import software.amazon.awssdk.services.s3.model.HeadObjectRequest;
import software.amazon.awssdk.services.s3.model.ObjectCannedACL;
import software.amazon.awssdk.services.s3.model.PutObjectRequest;
import software.amazon.awssdk.services.s3.model.S3Request;
Expand Down Expand Up @@ -59,6 +60,11 @@ static void configureEncryption(AwsProperties awsProperties, GetObjectRequest.Bu
requestBuilder::sseCustomerAlgorithm, requestBuilder::sseCustomerKey, requestBuilder::sseCustomerKeyMD5);
}

static void configureEncryption(AwsProperties awsProperties, HeadObjectRequest.Builder requestBuilder) {
configureEncryption(awsProperties, NULL_SSE_SETTER, NULL_STRING_SETTER,
requestBuilder::sseCustomerAlgorithm, requestBuilder::sseCustomerKey, requestBuilder::sseCustomerKeyMD5);
}

@SuppressWarnings("ReturnValueIgnored")
static void configureEncryption(
AwsProperties awsProperties,
Expand Down
33 changes: 0 additions & 33 deletions aws/src/test/java/org/apache/iceberg/aws/s3/S3FileIOTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,20 +26,16 @@
import java.util.Random;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.SerializationUtils;
import org.apache.iceberg.AssertHelpers;
import org.apache.iceberg.exceptions.NotFoundException;
import org.apache.iceberg.io.InputFile;
import org.apache.iceberg.io.OutputFile;
import org.apache.iceberg.util.SerializableSupplier;
import org.junit.Before;
import org.junit.ClassRule;
import org.junit.Test;
import software.amazon.awssdk.core.sync.RequestBody;
import software.amazon.awssdk.http.urlconnection.UrlConnectionHttpClient;
import software.amazon.awssdk.regions.Region;
import software.amazon.awssdk.services.s3.S3Client;
import software.amazon.awssdk.services.s3.model.CreateBucketRequest;
import software.amazon.awssdk.services.s3.model.PutObjectRequest;

import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
Expand Down Expand Up @@ -68,10 +64,6 @@ public void newInputFile() throws IOException {

InputFile in = s3FileIO.newInputFile(location);
assertFalse(in.exists());
AssertHelpers.assertThrows("get length should throw exception",
NotFoundException.class,
"Cannot retrieve file length because file s3://bucket/path/to/file.txt does not exist",
in::getLength);

OutputFile out = s3FileIO.newOutputFile(location);
try (OutputStream os = out.createOrOverwrite()) {
Expand All @@ -92,31 +84,6 @@ public void newInputFile() throws IOException {
assertFalse(s3FileIO.newInputFile(location).exists());
}

@Test
public void testExists_wrongFileWithSamePrefix() {
String location = "s3://bucket/file.txt";
byte [] data = new byte[1024 * 1024];
random.nextBytes(data);
s3.get().putObject(PutObjectRequest.builder().bucket("bucket").key("file.txt.dup").build(),
RequestBody.fromBytes(data));
InputFile in = s3FileIO.newInputFile(location);
assertFalse("file should not exist", in.exists());
}

@Test
public void testExists_multipleFilesSamePrefix() {
String location = "s3://bucket/file.txt";
byte [] data = new byte[1024 * 1024];
random.nextBytes(data);
s3.get().putObject(PutObjectRequest.builder().bucket("bucket").key("file.txt.dup").build(),
RequestBody.fromBytes(new byte[1024 * 1024 * 2]));
s3.get().putObject(PutObjectRequest.builder().bucket("bucket").key("file.txt").build(),
RequestBody.fromBytes(data));
InputFile in = s3FileIO.newInputFile(location);
assertTrue("file should exist", in.exists());
assertEquals("List results are always returned in UTF-8 binary order", data.length, in.getLength());
}

@Test
public void serializeClient() {
SerializableSupplier<S3Client> pre =
Expand Down