Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ humantime = "2.1"
itertools = "0.14.0"
parking_lot = { version = "0.12" }
percent-encoding = "2.1"
regex = "1.11.1"
thiserror = "2.0.2"
tracing = { version = "0.1" }
url = "2.2"
Expand Down
141 changes: 117 additions & 24 deletions src/azure/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ use percent_encoding::percent_decode_str;
use serde::{Deserialize, Serialize};
use std::str::FromStr;
use std::sync::Arc;
use std::sync::OnceLock;
use regex::Regex;
use url::Url;

/// The well-known account used by Azurite and the legacy Azure Storage Emulator.
Expand Down Expand Up @@ -671,36 +673,87 @@ impl MicrosoftAzureBuilder {
self.container_name = Some(validate(parsed.username())?);
self.account_name = Some(validate(a)?);
self.use_fabric_endpoint = true.into();
} else if let Some(a) = host.strip_suffix(".blob.core.windows.net") {
self.container_name = Some(validate(parsed.username())?);
self.account_name = Some(validate(a)?);
} else if let Some(a) = host.strip_suffix(".blob.fabric.microsoft.com") {
self.container_name = Some(validate(parsed.username())?);
self.account_name = Some(validate(a)?);
self.use_fabric_endpoint = true.into();
} else if let Some(a) = host.strip_suffix("-api.onelake.fabric.microsoft.com") {
self.container_name = Some(validate(parsed.username())?);
self.account_name = Some(validate(a)?);
self.use_fabric_endpoint = true.into();
} else {
return Err(Error::UrlNotRecognised { url: url.into() }.into());
}
}
"https" => match host.split_once('.') {
Some((a, "dfs.core.windows.net")) | Some((a, "blob.core.windows.net")) => {
self.account_name = Some(validate(a)?);
let container = parsed.path_segments().unwrap().next().expect(
"iterator always contains at least one string (which may be empty)",
);
if !container.is_empty() {
self.container_name = Some(validate(container)?);
}
"https" => {
// Regex to match WS-PL FQDN:
// "{workspaceid}.z??.(onelake|dfs|blob).fabric.microsoft.com"
static WS_PL_REGEX: OnceLock<Regex> = OnceLock::new();
let ws_pl_regex = WS_PL_REGEX.get_or_init(|| {
Regex::new(
r"^(?P<workspaceid>[0-9a-f]{32})\.z(?P<xy>[0-9a-f]{2})\.(onelake|dfs|blob)\.fabric\.microsoft\.com$"
).unwrap()
});

// WS-PL Fabric endpoint
if let Some(captures) = ws_pl_regex.captures(host) {
let workspaceid = captures.name("workspaceid").unwrap().as_str();
let xy = captures.name("xy").unwrap().as_str();

self.account_name = Some(format!("{workspaceid}.z{xy}"));
self.container_name = Some(validate(workspaceid)?);
self.use_fabric_endpoint = true.into();
return Ok(());
}
Some((a, "dfs.fabric.microsoft.com")) | Some((a, "blob.fabric.microsoft.com")) => {
self.account_name = Some(validate(a)?);
// Attempt to infer the container name from the URL
// - https://onelake.dfs.fabric.microsoft.com/<workspaceGUID>/<itemGUID>/Files/test.csv
// - https://onelake.dfs.fabric.microsoft.com/<workspace>/<item>.<itemtype>/<path>/<fileName>
//
// See <https://learn.microsoft.com/en-us/fabric/onelake/onelake-access-api>
let workspace = parsed.path_segments().unwrap().next().expect(
"iterator always contains at least one string (which may be empty)",
);

// Api Onelake Fabric endpoint
if host.ends_with("-api.onelake.fabric.microsoft.com") {
let account = host.strip_suffix("-api.onelake.fabric.microsoft.com").unwrap();
self.account_name = Some(validate(account)?);
let workspace = parsed.path_segments().unwrap().next()
.expect("iterator always contains at least one string (which may be empty)");

if !workspace.is_empty() {
self.container_name = Some(workspace.to_string())
self.container_name = Some(workspace.to_string());
}

self.use_fabric_endpoint = true.into();
return Ok(());
}

match host.split_once('.') {
// Azure Storage public
Some((a, "dfs.core.windows.net")) | Some((a, "blob.core.windows.net")) => {
self.account_name = Some(validate(a)?);

let container = parsed.path_segments().unwrap().next()
.expect("iterator always contains at least one string (which may be empty)");

if !container.is_empty() {
self.container_name = Some(validate(container)?);
}
}

// Fabric endpoints
Some((a, "dfs.fabric.microsoft.com")) | Some((a, "blob.fabric.microsoft.com")) => {
self.account_name = Some(validate(a)?);

// Attempt to infer the container name from the URL
let workspace = parsed.path_segments().unwrap().next()
.expect("iterator always contains at least one string (which may be empty)");

if !workspace.is_empty() {
self.container_name = Some(workspace.to_string());
}

self.use_fabric_endpoint = true.into();
}

_ => return Err(Error::UrlNotRecognised { url: url.into() }.into()),
}
_ => return Err(Error::UrlNotRecognised { url: url.into() }.into()),
},
scheme => {
let scheme = scheme.into();
Expand Down Expand Up @@ -1119,6 +1172,14 @@ mod tests {
assert_eq!(builder.container_name, Some("file_system".to_string()));
assert!(builder.use_fabric_endpoint.get().unwrap());

let mut builder = MicrosoftAzureBuilder::new();
builder
.parse_url("abfss://[email protected]/")
.unwrap();
assert_eq!(builder.account_name, Some("account".to_string()));
assert_eq!(builder.container_name, Some("file_system".to_string()));
assert!(builder.use_fabric_endpoint.get().unwrap());

let mut builder = MicrosoftAzureBuilder::new();
builder.parse_url("abfs://container/path").unwrap();
assert_eq!(builder.container_name, Some("container".to_string()));
Expand Down Expand Up @@ -1166,6 +1227,14 @@ mod tests {
assert_eq!(builder.container_name, None);
assert!(builder.use_fabric_endpoint.get().unwrap());

let mut builder = MicrosoftAzureBuilder::new();
builder
.parse_url("https://account-api.onelake.fabric.microsoft.com/")
.unwrap();
assert_eq!(builder.account_name, Some("account".to_string()));
assert_eq!(builder.container_name, None);
assert!(builder.use_fabric_endpoint.get().unwrap());

let mut builder = MicrosoftAzureBuilder::new();
builder
.parse_url("https://account.dfs.fabric.microsoft.com/container")
Expand All @@ -1184,10 +1253,34 @@ mod tests {

let mut builder = MicrosoftAzureBuilder::new();
builder
.parse_url("https://account.blob.fabric.microsoft.com/container")
.parse_url("https://account.blob.fabric.microsoft.com/")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why did this test case change?

.unwrap();
assert_eq!(builder.account_name, Some("account".to_string()));
assert_eq!(builder.container_name.as_deref(), Some("container"));
assert_eq!(builder.container_name, None);
assert!(builder.use_fabric_endpoint.get().unwrap());

let mut builder = MicrosoftAzureBuilder::new();
builder
.parse_url("https://ab000000000000000000000000000000.zab.dfs.fabric.microsoft.com/")
.unwrap();
assert_eq!(builder.account_name, Some("ab000000000000000000000000000000.zab".to_string()));
assert_eq!(builder.container_name.as_deref(), Some("ab000000000000000000000000000000"));
assert!(builder.use_fabric_endpoint.get().unwrap());

let mut builder = MicrosoftAzureBuilder::new();
builder
.parse_url("https://ab000000000000000000000000000000.zab.blob.fabric.microsoft.com/")
.unwrap();
assert_eq!(builder.account_name, Some("ab000000000000000000000000000000.zab".to_string()));
assert_eq!(builder.container_name.as_deref(), Some("ab000000000000000000000000000000"));
assert!(builder.use_fabric_endpoint.get().unwrap());

let mut builder = MicrosoftAzureBuilder::new();
builder
.parse_url("https://ab000000000000000000000000000000.zab.onelake.fabric.microsoft.com/")
.unwrap();
assert_eq!(builder.account_name, Some("ab000000000000000000000000000000.zab".to_string()));
assert_eq!(builder.container_name.as_deref(), Some("ab000000000000000000000000000000"));
assert!(builder.use_fabric_endpoint.get().unwrap());

let err_cases = [
Expand Down Expand Up @@ -1256,4 +1349,4 @@ mod tests {
panic!("{key} not propagated as ClientConfigKey");
}
}
}
}
Loading