diff --git a/aws/src/main/java/org/apache/iceberg/aws/AwsClientUtil.java b/aws/src/main/java/org/apache/iceberg/aws/AwsClientUtil.java new file mode 100644 index 000000000000..aa2d1f31bfea --- /dev/null +++ b/aws/src/main/java/org/apache/iceberg/aws/AwsClientUtil.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.aws; + +import software.amazon.awssdk.http.urlconnection.UrlConnectionHttpClient; +import software.amazon.awssdk.services.s3.S3Client; + +/** + * Provide factory methods to get default AWS clients. + * The clients use a default {@link UrlConnectionHttpClient} to avoid multiple versions of AWS HTTP client builders + * existing in the Java classpath, causing non-deterministic behavior of the AWS client. + * The credential and region information are both loaded from the default chain. + * For more details, see https://docs.aws.amazon.com/sdk-for-java/v2/developer-guide/credentials.html and + * https://docs.aws.amazon.com/sdk-for-java/v2/developer-guide/java-dg-region-selection.html + */ +public class AwsClientUtil { + + private AwsClientUtil() { + } + + public static S3Client defaultS3Client() { + return S3Client.builder() + .httpClient(UrlConnectionHttpClient.create()) + .build(); + } +} diff --git a/aws/src/main/java/org/apache/iceberg/aws/s3/S3FileIO.java b/aws/src/main/java/org/apache/iceberg/aws/s3/S3FileIO.java index e889bc52ed05..1f69ec534006 100644 --- a/aws/src/main/java/org/apache/iceberg/aws/s3/S3FileIO.java +++ b/aws/src/main/java/org/apache/iceberg/aws/s3/S3FileIO.java @@ -19,6 +19,7 @@ package org.apache.iceberg.aws.s3; +import org.apache.iceberg.aws.AwsClientUtil; import org.apache.iceberg.io.FileIO; import org.apache.iceberg.io.InputFile; import org.apache.iceberg.io.OutputFile; @@ -29,14 +30,16 @@ import software.amazon.awssdk.services.s3.model.ObjectIdentifier; /** - * FileIO implementation backed by S3. Locations used must follow the conventions for URIs (e.g. s3://bucket/path/..). + * FileIO implementation backed by S3. + * Locations used must follow the conventions for S3 URIs (e.g. s3://bucket/path...). + * See {@link S3URI#VALID_SCHEMES} for the list of supported S3 URI schemes. */ public class S3FileIO implements FileIO { private final SerializableSupplier s3; private transient S3Client client; public S3FileIO() { - this.s3 = S3Client::create; + this.s3 = AwsClientUtil::defaultS3Client; } public S3FileIO(SerializableSupplier s3) { diff --git a/aws/src/main/java/org/apache/iceberg/aws/s3/S3OutputStream.java b/aws/src/main/java/org/apache/iceberg/aws/s3/S3OutputStream.java index c9ed27490c4e..2ae94a2f698b 100644 --- a/aws/src/main/java/org/apache/iceberg/aws/s3/S3OutputStream.java +++ b/aws/src/main/java/org/apache/iceberg/aws/s3/S3OutputStream.java @@ -81,6 +81,10 @@ public void write(byte[] b, int off, int len) throws IOException { @Override public void close() throws IOException { + if (closed) { + return; + } + super.close(); closed = true; diff --git a/aws/src/test/java/org/apache/iceberg/aws/s3/S3OutputStreamTest.java b/aws/src/test/java/org/apache/iceberg/aws/s3/S3OutputStreamTest.java index 6e1dfd2b1e6d..3e1b355daabd 100644 --- a/aws/src/test/java/org/apache/iceberg/aws/s3/S3OutputStreamTest.java +++ b/aws/src/test/java/org/apache/iceberg/aws/s3/S3OutputStreamTest.java @@ -93,6 +93,14 @@ public void testWriteArray() throws IOException { assertArrayEquals(expected, actual); } + @Test + public void testMultipleClose() throws IOException { + S3URI uri = new S3URI("s3://bucket/path/to/array-out.dat"); + S3OutputStream stream = new S3OutputStream(s3, uri); + stream.close(); + stream.close(); + } + private byte[] readS3Data(S3URI uri) { ResponseBytes data = s3.getObject(GetObjectRequest.builder().bucket(uri.bucket()).key(uri.key()).build(), diff --git a/aws/src/test/java/org/apache/iceberg/aws/s3/S3URITest.java b/aws/src/test/java/org/apache/iceberg/aws/s3/S3URITest.java index 80ffa7f81336..a11b8286a9f2 100644 --- a/aws/src/test/java/org/apache/iceberg/aws/s3/S3URITest.java +++ b/aws/src/test/java/org/apache/iceberg/aws/s3/S3URITest.java @@ -20,6 +20,7 @@ package org.apache.iceberg.aws.s3; import org.apache.iceberg.exceptions.ValidationException; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.junit.Test; import static org.junit.Assert.assertEquals; @@ -71,4 +72,13 @@ public void testQueryAndFragment() { assertEquals("path/to/file", uri1.key()); assertEquals(p1, uri1.toString()); } + + @Test + public void testValidSchemes() { + for (String scheme : Lists.newArrayList("https", "s3", "s3a", "s3n")) { + S3URI uri = new S3URI(scheme + "://bucket/path/to/file"); + assertEquals("bucket", uri.bucket()); + assertEquals("path/to/file", uri.key()); + } + } } diff --git a/build.gradle b/build.gradle index 39ab43d9488f..0a8bc406ec12 100644 --- a/build.gradle +++ b/build.gradle @@ -249,6 +249,7 @@ project(':iceberg-aws') { compile project(':iceberg-api') compile project(':iceberg-core') + compile 'software.amazon.awssdk:url-connection-client' compile 'software.amazon.awssdk:s3' testCompile("com.adobe.testing:s3mock-junit4") { diff --git a/versions.props b/versions.props index 88ee22db5150..6e748b705234 100644 --- a/versions.props +++ b/versions.props @@ -17,7 +17,7 @@ com.github.ben-manes.caffeine:caffeine = 2.7.0 org.apache.arrow:arrow-vector = 2.0.0 org.apache.arrow:arrow-memory-netty = 2.0.0 com.github.stephenc.findbugs:findbugs-annotations = 1.3.9-1 -software.amazon.awssdk:s3 = 2.15.7 +software.amazon.awssdk:* = 2.15.7 org.scala-lang:scala-library = 2.12.10 # test deps