diff --git a/azure/src/main/java/org/apache/iceberg/azure/adlsv2/ADLSFileIO.java b/azure/src/main/java/org/apache/iceberg/azure/adlsv2/ADLSFileIO.java index 555b395e0d0e..0bfce9d6055b 100644 --- a/azure/src/main/java/org/apache/iceberg/azure/adlsv2/ADLSFileIO.java +++ b/azure/src/main/java/org/apache/iceberg/azure/adlsv2/ADLSFileIO.java @@ -111,7 +111,7 @@ DataLakeFileSystemClient client(ADLSLocation location) { new DataLakeFileSystemClientBuilder().httpClient(HTTP); location.container().ifPresent(clientBuilder::fileSystemName); - azureProperties.applyClientConfiguration(location.storageEndpoint(), clientBuilder); + azureProperties.applyClientConfiguration(location.storageAccount(), clientBuilder); return clientBuilder.buildClient(); } diff --git a/azure/src/main/java/org/apache/iceberg/azure/adlsv2/ADLSLocation.java b/azure/src/main/java/org/apache/iceberg/azure/adlsv2/ADLSLocation.java index e024a5149343..e73093512b82 100644 --- a/azure/src/main/java/org/apache/iceberg/azure/adlsv2/ADLSLocation.java +++ b/azure/src/main/java/org/apache/iceberg/azure/adlsv2/ADLSLocation.java @@ -18,35 +18,26 @@ */ package org.apache.iceberg.azure.adlsv2; -import java.net.URI; -import java.net.URISyntaxException; import java.util.Optional; import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.apache.iceberg.exceptions.ValidationException; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; /** - * This class represents a fully qualified location in Azure Data Lake Storage, expressed as a URI. + * This class represents a fully qualified location in Azure expressed as a URI. * *

Locations follow the conventions used by Hadoop's Azure support, i.e. * - *

{@code abfs[s]://[@].dfs.core.windows.net/}
+ *
{@code abfs[s]://[@]/}
* - * or - * - *
{@code wasb[s]://@.blob.core.windows.net/}
- * - * For compatibility, paths using the wasb scheme are also accepted but will be processed via the - * Azure Data Lake Storage Gen2 APIs and not the Blob Storage APIs. - * - *

See Hadoop - * Azure Support + *

See Hadoop Azure + * Support */ class ADLSLocation { - private static final Pattern URI_PATTERN = Pattern.compile("^(abfss?|wasbs?)://[^/?#]+.*$"); + private static final Pattern URI_PATTERN = Pattern.compile("^abfss?://([^/?#]+)(.*)?$"); - private final String storageEndpoint; + private final String storageAccount; private final String container; private final String path; @@ -59,23 +50,27 @@ class ADLSLocation { Preconditions.checkArgument(location != null, "Invalid location: null"); Matcher matcher = URI_PATTERN.matcher(location); - if (!matcher.matches()) { - throw new IllegalArgumentException(String.format("Invalid ADLS URI: %s", location)); - } - try { - URI uri = new URI(location); - this.container = uri.getUserInfo(); - this.storageEndpoint = uri.getHost(); - this.path = stripLeadingSlash(uri.getRawPath()); - } catch (URISyntaxException e) { - throw new IllegalArgumentException(String.format("Invalid ADLS URI: %s", location), e); + ValidationException.check(matcher.matches(), "Invalid ADLS URI: %s", location); + + String authority = matcher.group(1); + String[] parts = authority.split("@", -1); + if (parts.length > 1) { + this.container = parts[0]; + this.storageAccount = parts[1]; + } else { + this.container = null; + this.storageAccount = authority; } + + String uriPath = matcher.group(2); + uriPath = uriPath == null ? "" : uriPath.startsWith("/") ? uriPath.substring(1) : uriPath; + this.path = uriPath.split("\\?", -1)[0].split("#", -1)[0]; } - /** Returns Azure storage service endpoint. */ - public String storageEndpoint() { - return storageEndpoint; + /** Returns Azure storage account. */ + public String storageAccount() { + return storageAccount; } /** Returns Azure container name. */ @@ -87,12 +82,4 @@ public Optional container() { public String path() { return path; } - - private static String stripLeadingSlash(String path) { - if (path.startsWith("/")) { - return path.substring(1); - } else { - return path; - } - } } diff --git a/azure/src/test/java/org/apache/iceberg/azure/adlsv2/ADLSLocationTest.java b/azure/src/test/java/org/apache/iceberg/azure/adlsv2/ADLSLocationTest.java index 6edede187153..867b54b4c7e3 100644 --- a/azure/src/test/java/org/apache/iceberg/azure/adlsv2/ADLSLocationTest.java +++ b/azure/src/test/java/org/apache/iceberg/azure/adlsv2/ADLSLocationTest.java @@ -21,8 +21,7 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; -import java.net.URI; -import java.net.URISyntaxException; +import org.apache.iceberg.exceptions.ValidationException; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.ValueSource; @@ -34,33 +33,17 @@ public void testLocationParsing(String scheme) { String p1 = scheme + "://container@account.dfs.core.windows.net/path/to/file"; ADLSLocation location = new ADLSLocation(p1); - assertThat(location.storageEndpoint()).isEqualTo("account.dfs.core.windows.net"); + assertThat(location.storageAccount()).isEqualTo("account.dfs.core.windows.net"); assertThat(location.container().get()).isEqualTo("container"); assertThat(location.path()).isEqualTo("path/to/file"); } - @ParameterizedTest - @ValueSource(strings = {"wasb", "wasbs"}) - public void testWasbLocationParsing(String scheme) { - String p1 = scheme + "://container@account.blob.core.windows.net/path/to/file"; + @Test + public void testEncodedString() { + String p1 = "abfs://container@account.dfs.core.windows.net/path%20to%20file"; ADLSLocation location = new ADLSLocation(p1); - assertThat(location.storageEndpoint()).isEqualTo("account.blob.core.windows.net"); - assertThat(location.container().get()).isEqualTo("container"); - assertThat(location.path()).isEqualTo("path/to/file"); - } - - @ParameterizedTest - @ValueSource( - strings = { - "abfs://container@account.dfs.core.windows.net/path%20to%20file", - "wasb://container@account.blob.core.windows.net/path%20to%20file" - }) - public void testEncodedString(String path) throws URISyntaxException { - ADLSLocation location = new ADLSLocation(path); - String expectedEndpoint = new URI(path).getHost(); - - assertThat(location.storageEndpoint()).isEqualTo(expectedEndpoint); + assertThat(location.storageAccount()).isEqualTo("account.dfs.core.windows.net"); assertThat(location.container().get()).isEqualTo("container"); assertThat(location.path()).isEqualTo("path%20to%20file"); } @@ -68,81 +51,53 @@ public void testEncodedString(String path) throws URISyntaxException { @Test public void testMissingScheme() { assertThatThrownBy(() -> new ADLSLocation("/path/to/file")) - .isInstanceOf(IllegalArgumentException.class) + .isInstanceOf(ValidationException.class) .hasMessage("Invalid ADLS URI: /path/to/file"); } @Test public void testInvalidScheme() { assertThatThrownBy(() -> new ADLSLocation("s3://bucket/path/to/file")) - .isInstanceOf(IllegalArgumentException.class) + .isInstanceOf(ValidationException.class) .hasMessage("Invalid ADLS URI: s3://bucket/path/to/file"); } @Test - public void testInvalidURI() { - String invalidUri = "abfs://container@account.dfs.core.windows.net/#invalidPath#"; - assertThatThrownBy(() -> new ADLSLocation(invalidUri)) - .isInstanceOf(IllegalArgumentException.class) - .hasMessage(String.format("Invalid ADLS URI: %s", invalidUri)); - } + public void testNoContainer() { + String p1 = "abfs://account.dfs.core.windows.net/path/to/file"; + ADLSLocation location = new ADLSLocation(p1); - @ParameterizedTest - @ValueSource( - strings = { - "abfs://account.dfs.core.windows.net/path/to/file", - "wasb://account.blob.core.windows.net/path/to/file" - }) - public void testNoContainer(String path) throws URISyntaxException { - ADLSLocation location = new ADLSLocation(path); - String expectedEndpoint = new URI(path).getHost(); - - assertThat(location.storageEndpoint()).isEqualTo(expectedEndpoint); + assertThat(location.storageAccount()).isEqualTo("account.dfs.core.windows.net"); assertThat(location.container().isPresent()).isFalse(); assertThat(location.path()).isEqualTo("path/to/file"); } - @ParameterizedTest - @ValueSource( - strings = { - "abfs://container@account.dfs.core.windows.net", - "wasb://container@account.blob.core.windows.net" - }) - public void testNoPath(String path) throws URISyntaxException { - ADLSLocation location = new ADLSLocation(path); - String expectedEndpoint = new URI(path).getHost(); - - assertThat(location.storageEndpoint()).isEqualTo(expectedEndpoint); + @Test + public void testNoPath() { + String p1 = "abfs://container@account.dfs.core.windows.net"; + ADLSLocation location = new ADLSLocation(p1); + + assertThat(location.storageAccount()).isEqualTo("account.dfs.core.windows.net"); assertThat(location.container().get()).isEqualTo("container"); assertThat(location.path()).isEqualTo(""); } - @ParameterizedTest - @ValueSource( - strings = { - "abfs://container@account.dfs.core.windows.net/path/to/file?query=foo#123", - "wasb://container@account.blob.core.windows.net/path/to/file?query=foo#123" - }) - public void testQueryAndFragment(String path) throws URISyntaxException { - ADLSLocation location = new ADLSLocation(path); - String expectedEndpoint = new URI(path).getHost(); - - assertThat(location.storageEndpoint()).isEqualTo(expectedEndpoint); + @Test + public void testQueryAndFragment() { + String p1 = "abfs://container@account.dfs.core.windows.net/path/to/file?query=foo#123"; + ADLSLocation location = new ADLSLocation(p1); + + assertThat(location.storageAccount()).isEqualTo("account.dfs.core.windows.net"); assertThat(location.container().get()).isEqualTo("container"); assertThat(location.path()).isEqualTo("path/to/file"); } - @ParameterizedTest - @ValueSource( - strings = { - "abfs://container@account.dfs.core.windows.net?query=foo#123", - "wasb://container@account.blob.core.windows.net?query=foo#123" - }) - public void testQueryAndFragmentNoPath(String path) throws URISyntaxException { - ADLSLocation location = new ADLSLocation(path); - String expectedEndpoint = new URI(path).getHost(); - - assertThat(location.storageEndpoint()).isEqualTo(expectedEndpoint); + @Test + public void testQueryAndFragmentNoPath() { + String p1 = "abfs://container@account.dfs.core.windows.net?query=foo#123"; + ADLSLocation location = new ADLSLocation(p1); + + assertThat(location.storageAccount()).isEqualTo("account.dfs.core.windows.net"); assertThat(location.container().get()).isEqualTo("container"); assertThat(location.path()).isEqualTo(""); } diff --git a/core/src/main/java/org/apache/iceberg/io/ResolvingFileIO.java b/core/src/main/java/org/apache/iceberg/io/ResolvingFileIO.java index a8adf979f85a..a858045aab8b 100644 --- a/core/src/main/java/org/apache/iceberg/io/ResolvingFileIO.java +++ b/core/src/main/java/org/apache/iceberg/io/ResolvingFileIO.java @@ -62,9 +62,7 @@ public class ResolvingFileIO implements HadoopConfigurable, DelegateFileIO { "s3n", S3_FILE_IO_IMPL, "gs", GCS_FILE_IO_IMPL, "abfs", ADLS_FILE_IO_IMPL, - "abfss", ADLS_FILE_IO_IMPL, - "wasb", ADLS_FILE_IO_IMPL, - "wasbs", ADLS_FILE_IO_IMPL); + "abfss", ADLS_FILE_IO_IMPL); private final Map ioInstances = Maps.newConcurrentMap(); private final AtomicBoolean isClosed = new AtomicBoolean(false);