diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc index a3aa2c8e837..9b3c0c0c1d7 100644 --- a/cpp/src/arrow/filesystem/azurefs.cc +++ b/cpp/src/arrow/filesystem/azurefs.cc @@ -3199,4 +3199,31 @@ Result> AzureFileSystem::OpenAppendStream( return impl_->OpenAppendStream(location, metadata, false, this); } +Result AzureFileSystem::PathFromUri(const std::string& uri_string) const { + /// We can not use `internal::PathFromUriHelper` here because for Azure we have to + /// support different URI schemes where the authority is handled differently. + /// Example (both should yield the same path `container/some/path`): + /// - (1) abfss://storageacc.blob.core.windows.net/container/some/path + /// - (2) abfss://acc:pw@container/some/path + /// The authority handling is different with these two URIs. (1) requires no prepending + /// of the authority to the path, while (2) requires to preprend the authority to the + /// path. + std::string path; + Uri uri; + RETURN_NOT_OK(uri.Parse(uri_string)); + RETURN_NOT_OK(AzureOptions::FromUri(uri, &path)); + + std::vector supported_schemes = {"abfs", "abfss"}; + const auto scheme = uri.scheme(); + if (std::find(supported_schemes.begin(), supported_schemes.end(), scheme) == + supported_schemes.end()) { + std::string expected_schemes = + ::arrow::internal::JoinStrings(supported_schemes, ", "); + return Status::Invalid("The filesystem expected a URI with one of the schemes (", + expected_schemes, ") but received ", uri_string); + } + + return path; +} + } // namespace arrow::fs diff --git a/cpp/src/arrow/filesystem/azurefs.h b/cpp/src/arrow/filesystem/azurefs.h index 93d6ec2f945..072b061eeb2 100644 --- a/cpp/src/arrow/filesystem/azurefs.h +++ b/cpp/src/arrow/filesystem/azurefs.h @@ -367,6 +367,8 @@ class ARROW_EXPORT AzureFileSystem : public FileSystem { Result> OpenAppendStream( const std::string& path, const std::shared_ptr& metadata) override; + + Result PathFromUri(const std::string& uri_string) const override; }; } // namespace arrow::fs diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc index 9a11a6f2499..36646f417cb 100644 --- a/cpp/src/arrow/filesystem/azurefs_test.cc +++ b/cpp/src/arrow/filesystem/azurefs_test.cc @@ -2958,5 +2958,14 @@ TEST_F(TestAzuriteFileSystem, OpenInputFileClosed) { ASSERT_RAISES(Invalid, stream->ReadAt(1, 1)); ASSERT_RAISES(Invalid, stream->Seek(2)); } + +TEST_F(TestAzuriteFileSystem, PathFromUri) { + ASSERT_EQ( + "container/some/path", + fs()->PathFromUri("abfss://storageacc.blob.core.windows.net/container/some/path")); + ASSERT_EQ("container/some/path", + fs()->PathFromUri("abfss://acc:pw@container/some/path")); + ASSERT_RAISES(Invalid, fs()->PathFromUri("http://acc:pw@container/some/path")); +} } // namespace fs } // namespace arrow