From 5fd38fd7a0f56b784e1dcf7830925ab07ad9e9ac Mon Sep 17 00:00:00 2001 From: Marc Cenac Date: Fri, 25 Oct 2024 07:17:35 -0500 Subject: [PATCH 1/3] Azure: Fix ADLSLocation file parsing --- .../org/apache/iceberg/azure/adlsv2/ADLSLocation.java | 3 +-- .../org/apache/iceberg/azure/adlsv2/ADLSLocationTest.java | 8 ++++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/azure/src/main/java/org/apache/iceberg/azure/adlsv2/ADLSLocation.java b/azure/src/main/java/org/apache/iceberg/azure/adlsv2/ADLSLocation.java index e73093512b82..876384e44f5f 100644 --- a/azure/src/main/java/org/apache/iceberg/azure/adlsv2/ADLSLocation.java +++ b/azure/src/main/java/org/apache/iceberg/azure/adlsv2/ADLSLocation.java @@ -64,8 +64,7 @@ class ADLSLocation { } String uriPath = matcher.group(2); - uriPath = uriPath == null ? "" : uriPath.startsWith("/") ? uriPath.substring(1) : uriPath; - this.path = uriPath.split("\\?", -1)[0].split("#", -1)[0]; + this.path = uriPath == null ? "" : uriPath.startsWith("/") ? uriPath.substring(1) : uriPath; } /** Returns Azure storage account. */ diff --git a/azure/src/test/java/org/apache/iceberg/azure/adlsv2/ADLSLocationTest.java b/azure/src/test/java/org/apache/iceberg/azure/adlsv2/ADLSLocationTest.java index 867b54b4c7e3..037799ed6b02 100644 --- a/azure/src/test/java/org/apache/iceberg/azure/adlsv2/ADLSLocationTest.java +++ b/azure/src/test/java/org/apache/iceberg/azure/adlsv2/ADLSLocationTest.java @@ -101,4 +101,12 @@ public void testQueryAndFragmentNoPath() { assertThat(location.container().get()).isEqualTo("container"); assertThat(location.path()).isEqualTo(""); } + + @ParameterizedTest + @ValueSource(strings = {"file?.txt", "file%3F.txt"}) + public void testQuestionMarkInFileName(String path) { + String fullPath = String.format("abfs://container@account.dfs.core.windows.net/%s", path); + ADLSLocation location = new ADLSLocation(fullPath); + assertThat(location.path()).contains(path); + } } From e9691c9ccfc8ffb538ceaed9f2455421cdf5dfb8 Mon Sep 17 00:00:00 2001 From: Marc Cenac Date: Fri, 25 Oct 2024 07:19:17 -0500 Subject: [PATCH 2/3] Azure: Remove invalid test cases from ADLSLocationTest --- .../azure/adlsv2/ADLSLocationTest.java | 20 ------------------- 1 file changed, 20 deletions(-) diff --git a/azure/src/test/java/org/apache/iceberg/azure/adlsv2/ADLSLocationTest.java b/azure/src/test/java/org/apache/iceberg/azure/adlsv2/ADLSLocationTest.java index 037799ed6b02..403886f4b28e 100644 --- a/azure/src/test/java/org/apache/iceberg/azure/adlsv2/ADLSLocationTest.java +++ b/azure/src/test/java/org/apache/iceberg/azure/adlsv2/ADLSLocationTest.java @@ -82,26 +82,6 @@ public void testNoPath() { assertThat(location.path()).isEqualTo(""); } - @Test - public void testQueryAndFragment() { - String p1 = "abfs://container@account.dfs.core.windows.net/path/to/file?query=foo#123"; - ADLSLocation location = new ADLSLocation(p1); - - assertThat(location.storageAccount()).isEqualTo("account.dfs.core.windows.net"); - assertThat(location.container().get()).isEqualTo("container"); - assertThat(location.path()).isEqualTo("path/to/file"); - } - - @Test - public void testQueryAndFragmentNoPath() { - String p1 = "abfs://container@account.dfs.core.windows.net?query=foo#123"; - ADLSLocation location = new ADLSLocation(p1); - - assertThat(location.storageAccount()).isEqualTo("account.dfs.core.windows.net"); - assertThat(location.container().get()).isEqualTo("container"); - assertThat(location.path()).isEqualTo(""); - } - @ParameterizedTest @ValueSource(strings = {"file?.txt", "file%3F.txt"}) public void testQuestionMarkInFileName(String path) { From b39772b3c1c024e93e68f082aa38573bd829c3b3 Mon Sep 17 00:00:00 2001 From: Marc Cenac Date: Mon, 28 Oct 2024 19:00:43 -0500 Subject: [PATCH 3/3] Update Javadocs with reference to ADLS URI --- .../org/apache/iceberg/azure/adlsv2/ADLSLocation.java | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/azure/src/main/java/org/apache/iceberg/azure/adlsv2/ADLSLocation.java b/azure/src/main/java/org/apache/iceberg/azure/adlsv2/ADLSLocation.java index 876384e44f5f..5af590628fe8 100644 --- a/azure/src/main/java/org/apache/iceberg/azure/adlsv2/ADLSLocation.java +++ b/azure/src/main/java/org/apache/iceberg/azure/adlsv2/ADLSLocation.java @@ -25,14 +25,16 @@ import org.apache.iceberg.relocated.com.google.common.base.Preconditions; /** - * This class represents a fully qualified location in Azure expressed as a URI. + * This class represents a fully qualified location to a file or directory in Azure Data Lake + * Storage Gen2 storage. * - *

Locations follow the conventions used by Hadoop's Azure support, i.e. + *

Locations follow a URI like structure to identify resources * *

{@code abfs[s]://[@]/}
* - *

See Hadoop Azure - * Support + *

See Azure + * Data Lake Storage URI */ class ADLSLocation { private static final Pattern URI_PATTERN = Pattern.compile("^abfss?://([^/?#]+)(.*)?$");