diff --git a/azure/src/main/java/org/apache/iceberg/azure/adlsv2/ADLSFileIO.java b/azure/src/main/java/org/apache/iceberg/azure/adlsv2/ADLSFileIO.java
index 0bfce9d6055b..555b395e0d0e 100644
--- a/azure/src/main/java/org/apache/iceberg/azure/adlsv2/ADLSFileIO.java
+++ b/azure/src/main/java/org/apache/iceberg/azure/adlsv2/ADLSFileIO.java
@@ -111,7 +111,7 @@ DataLakeFileSystemClient client(ADLSLocation location) {
new DataLakeFileSystemClientBuilder().httpClient(HTTP);
location.container().ifPresent(clientBuilder::fileSystemName);
- azureProperties.applyClientConfiguration(location.storageAccount(), clientBuilder);
+ azureProperties.applyClientConfiguration(location.storageEndpoint(), clientBuilder);
return clientBuilder.buildClient();
}
diff --git a/azure/src/main/java/org/apache/iceberg/azure/adlsv2/ADLSLocation.java b/azure/src/main/java/org/apache/iceberg/azure/adlsv2/ADLSLocation.java
index e73093512b82..e024a5149343 100644
--- a/azure/src/main/java/org/apache/iceberg/azure/adlsv2/ADLSLocation.java
+++ b/azure/src/main/java/org/apache/iceberg/azure/adlsv2/ADLSLocation.java
@@ -18,26 +18,35 @@
*/
package org.apache.iceberg.azure.adlsv2;
+import java.net.URI;
+import java.net.URISyntaxException;
import java.util.Optional;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-import org.apache.iceberg.exceptions.ValidationException;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
/**
- * This class represents a fully qualified location in Azure expressed as a URI.
+ * This class represents a fully qualified location in Azure Data Lake Storage, expressed as a URI.
*
*
Locations follow the conventions used by Hadoop's Azure support, i.e.
*
- *
{@code abfs[s]://[@]/}
+ * {@code abfs[s]://[@].dfs.core.windows.net/}
*
- * See Hadoop Azure
- * Support
+ * or
+ *
+ *
{@code wasb[s]://@.blob.core.windows.net/}
+ *
+ * For compatibility, paths using the wasb scheme are also accepted but will be processed via the
+ * Azure Data Lake Storage Gen2 APIs and not the Blob Storage APIs.
+ *
+ * See Hadoop
+ * Azure Support
*/
class ADLSLocation {
- private static final Pattern URI_PATTERN = Pattern.compile("^abfss?://([^/?#]+)(.*)?$");
+ private static final Pattern URI_PATTERN = Pattern.compile("^(abfss?|wasbs?)://[^/?#]+.*$");
- private final String storageAccount;
+ private final String storageEndpoint;
private final String container;
private final String path;
@@ -50,27 +59,23 @@ class ADLSLocation {
Preconditions.checkArgument(location != null, "Invalid location: null");
Matcher matcher = URI_PATTERN.matcher(location);
-
- ValidationException.check(matcher.matches(), "Invalid ADLS URI: %s", location);
-
- String authority = matcher.group(1);
- String[] parts = authority.split("@", -1);
- if (parts.length > 1) {
- this.container = parts[0];
- this.storageAccount = parts[1];
- } else {
- this.container = null;
- this.storageAccount = authority;
+ if (!matcher.matches()) {
+ throw new IllegalArgumentException(String.format("Invalid ADLS URI: %s", location));
}
- String uriPath = matcher.group(2);
- uriPath = uriPath == null ? "" : uriPath.startsWith("/") ? uriPath.substring(1) : uriPath;
- this.path = uriPath.split("\\?", -1)[0].split("#", -1)[0];
+ try {
+ URI uri = new URI(location);
+ this.container = uri.getUserInfo();
+ this.storageEndpoint = uri.getHost();
+ this.path = stripLeadingSlash(uri.getRawPath());
+ } catch (URISyntaxException e) {
+ throw new IllegalArgumentException(String.format("Invalid ADLS URI: %s", location), e);
+ }
}
- /** Returns Azure storage account. */
- public String storageAccount() {
- return storageAccount;
+ /** Returns Azure storage service endpoint. */
+ public String storageEndpoint() {
+ return storageEndpoint;
}
/** Returns Azure container name. */
@@ -82,4 +87,12 @@ public Optional container() {
public String path() {
return path;
}
+
+ private static String stripLeadingSlash(String path) {
+ if (path.startsWith("/")) {
+ return path.substring(1);
+ } else {
+ return path;
+ }
+ }
}
diff --git a/azure/src/test/java/org/apache/iceberg/azure/adlsv2/ADLSLocationTest.java b/azure/src/test/java/org/apache/iceberg/azure/adlsv2/ADLSLocationTest.java
index 867b54b4c7e3..6edede187153 100644
--- a/azure/src/test/java/org/apache/iceberg/azure/adlsv2/ADLSLocationTest.java
+++ b/azure/src/test/java/org/apache/iceberg/azure/adlsv2/ADLSLocationTest.java
@@ -21,7 +21,8 @@
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
-import org.apache.iceberg.exceptions.ValidationException;
+import java.net.URI;
+import java.net.URISyntaxException;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;
@@ -33,17 +34,33 @@ public void testLocationParsing(String scheme) {
String p1 = scheme + "://container@account.dfs.core.windows.net/path/to/file";
ADLSLocation location = new ADLSLocation(p1);
- assertThat(location.storageAccount()).isEqualTo("account.dfs.core.windows.net");
+ assertThat(location.storageEndpoint()).isEqualTo("account.dfs.core.windows.net");
assertThat(location.container().get()).isEqualTo("container");
assertThat(location.path()).isEqualTo("path/to/file");
}
- @Test
- public void testEncodedString() {
- String p1 = "abfs://container@account.dfs.core.windows.net/path%20to%20file";
+ @ParameterizedTest
+ @ValueSource(strings = {"wasb", "wasbs"})
+ public void testWasbLocationParsing(String scheme) {
+ String p1 = scheme + "://container@account.blob.core.windows.net/path/to/file";
ADLSLocation location = new ADLSLocation(p1);
- assertThat(location.storageAccount()).isEqualTo("account.dfs.core.windows.net");
+ assertThat(location.storageEndpoint()).isEqualTo("account.blob.core.windows.net");
+ assertThat(location.container().get()).isEqualTo("container");
+ assertThat(location.path()).isEqualTo("path/to/file");
+ }
+
+ @ParameterizedTest
+ @ValueSource(
+ strings = {
+ "abfs://container@account.dfs.core.windows.net/path%20to%20file",
+ "wasb://container@account.blob.core.windows.net/path%20to%20file"
+ })
+ public void testEncodedString(String path) throws URISyntaxException {
+ ADLSLocation location = new ADLSLocation(path);
+ String expectedEndpoint = new URI(path).getHost();
+
+ assertThat(location.storageEndpoint()).isEqualTo(expectedEndpoint);
assertThat(location.container().get()).isEqualTo("container");
assertThat(location.path()).isEqualTo("path%20to%20file");
}
@@ -51,53 +68,81 @@ public void testEncodedString() {
@Test
public void testMissingScheme() {
assertThatThrownBy(() -> new ADLSLocation("/path/to/file"))
- .isInstanceOf(ValidationException.class)
+ .isInstanceOf(IllegalArgumentException.class)
.hasMessage("Invalid ADLS URI: /path/to/file");
}
@Test
public void testInvalidScheme() {
assertThatThrownBy(() -> new ADLSLocation("s3://bucket/path/to/file"))
- .isInstanceOf(ValidationException.class)
+ .isInstanceOf(IllegalArgumentException.class)
.hasMessage("Invalid ADLS URI: s3://bucket/path/to/file");
}
@Test
- public void testNoContainer() {
- String p1 = "abfs://account.dfs.core.windows.net/path/to/file";
- ADLSLocation location = new ADLSLocation(p1);
+ public void testInvalidURI() {
+ String invalidUri = "abfs://container@account.dfs.core.windows.net/#invalidPath#";
+ assertThatThrownBy(() -> new ADLSLocation(invalidUri))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessage(String.format("Invalid ADLS URI: %s", invalidUri));
+ }
- assertThat(location.storageAccount()).isEqualTo("account.dfs.core.windows.net");
+ @ParameterizedTest
+ @ValueSource(
+ strings = {
+ "abfs://account.dfs.core.windows.net/path/to/file",
+ "wasb://account.blob.core.windows.net/path/to/file"
+ })
+ public void testNoContainer(String path) throws URISyntaxException {
+ ADLSLocation location = new ADLSLocation(path);
+ String expectedEndpoint = new URI(path).getHost();
+
+ assertThat(location.storageEndpoint()).isEqualTo(expectedEndpoint);
assertThat(location.container().isPresent()).isFalse();
assertThat(location.path()).isEqualTo("path/to/file");
}
- @Test
- public void testNoPath() {
- String p1 = "abfs://container@account.dfs.core.windows.net";
- ADLSLocation location = new ADLSLocation(p1);
-
- assertThat(location.storageAccount()).isEqualTo("account.dfs.core.windows.net");
+ @ParameterizedTest
+ @ValueSource(
+ strings = {
+ "abfs://container@account.dfs.core.windows.net",
+ "wasb://container@account.blob.core.windows.net"
+ })
+ public void testNoPath(String path) throws URISyntaxException {
+ ADLSLocation location = new ADLSLocation(path);
+ String expectedEndpoint = new URI(path).getHost();
+
+ assertThat(location.storageEndpoint()).isEqualTo(expectedEndpoint);
assertThat(location.container().get()).isEqualTo("container");
assertThat(location.path()).isEqualTo("");
}
- @Test
- public void testQueryAndFragment() {
- String p1 = "abfs://container@account.dfs.core.windows.net/path/to/file?query=foo#123";
- ADLSLocation location = new ADLSLocation(p1);
-
- assertThat(location.storageAccount()).isEqualTo("account.dfs.core.windows.net");
+ @ParameterizedTest
+ @ValueSource(
+ strings = {
+ "abfs://container@account.dfs.core.windows.net/path/to/file?query=foo#123",
+ "wasb://container@account.blob.core.windows.net/path/to/file?query=foo#123"
+ })
+ public void testQueryAndFragment(String path) throws URISyntaxException {
+ ADLSLocation location = new ADLSLocation(path);
+ String expectedEndpoint = new URI(path).getHost();
+
+ assertThat(location.storageEndpoint()).isEqualTo(expectedEndpoint);
assertThat(location.container().get()).isEqualTo("container");
assertThat(location.path()).isEqualTo("path/to/file");
}
- @Test
- public void testQueryAndFragmentNoPath() {
- String p1 = "abfs://container@account.dfs.core.windows.net?query=foo#123";
- ADLSLocation location = new ADLSLocation(p1);
-
- assertThat(location.storageAccount()).isEqualTo("account.dfs.core.windows.net");
+ @ParameterizedTest
+ @ValueSource(
+ strings = {
+ "abfs://container@account.dfs.core.windows.net?query=foo#123",
+ "wasb://container@account.blob.core.windows.net?query=foo#123"
+ })
+ public void testQueryAndFragmentNoPath(String path) throws URISyntaxException {
+ ADLSLocation location = new ADLSLocation(path);
+ String expectedEndpoint = new URI(path).getHost();
+
+ assertThat(location.storageEndpoint()).isEqualTo(expectedEndpoint);
assertThat(location.container().get()).isEqualTo("container");
assertThat(location.path()).isEqualTo("");
}
diff --git a/core/src/main/java/org/apache/iceberg/io/ResolvingFileIO.java b/core/src/main/java/org/apache/iceberg/io/ResolvingFileIO.java
index a858045aab8b..a8adf979f85a 100644
--- a/core/src/main/java/org/apache/iceberg/io/ResolvingFileIO.java
+++ b/core/src/main/java/org/apache/iceberg/io/ResolvingFileIO.java
@@ -62,7 +62,9 @@ public class ResolvingFileIO implements HadoopConfigurable, DelegateFileIO {
"s3n", S3_FILE_IO_IMPL,
"gs", GCS_FILE_IO_IMPL,
"abfs", ADLS_FILE_IO_IMPL,
- "abfss", ADLS_FILE_IO_IMPL);
+ "abfss", ADLS_FILE_IO_IMPL,
+ "wasb", ADLS_FILE_IO_IMPL,
+ "wasbs", ADLS_FILE_IO_IMPL);
private final Map ioInstances = Maps.newConcurrentMap();
private final AtomicBoolean isClosed = new AtomicBoolean(false);