Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 100 additions & 10 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,7 @@ num-derive = "0.4.2"
num-format = "0.4.4"
num-traits = "0.2"
numpy = "0.27"
opendal = {version = "0.51", default-features = false}
Comment thread
universalmind303 marked this conversation as resolved.
Outdated
opentelemetry = {version = "0.31", features = ["trace", "metrics", "logs"]}
opentelemetry-otlp = {version = "0.31", features = ["grpc-tonic", "logs"]}
opentelemetry_sdk = {version = "0.31", features = ["logs"]}
Expand Down
3 changes: 3 additions & 0 deletions daft/daft/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -903,6 +903,7 @@ class IOConfig:
disable_suffix_range: bool
tos: TosConfig
gravitino: GravitinoConfig
backends: dict[str, dict[str, str]]
Comment thread
universalmind303 marked this conversation as resolved.
Outdated

def __init__(
self,
Expand All @@ -915,6 +916,7 @@ class IOConfig:
disable_suffix_range: bool | None = None,
tos: TosConfig | None = None,
gravitino: GravitinoConfig | None = None,
backends: dict[str, dict[str, str]] | None = None,
): ...
def replace(
self,
Expand All @@ -927,6 +929,7 @@ class IOConfig:
disable_suffix_range: bool | None = None,
tos: TosConfig | None = None,
gravitino: GravitinoConfig | None = None,
backends: dict[str, dict[str, str]] | None = None,
) -> IOConfig:
"""Replaces values if provided, returning a new IOConfig."""
...
Expand Down
11 changes: 10 additions & 1 deletion src/common/io-config/src/config.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
use std::fmt::{Display, Formatter};
use std::{
collections::BTreeMap,
fmt::{Display, Formatter},
};

use serde::{Deserialize, Serialize};

Expand All @@ -18,6 +21,9 @@ pub struct IOConfig {
/// disable suffix range requests, please use range with offset
pub disable_suffix_range: bool,
pub tos: TosConfig,
/// Additional backends configured via OpenDAL.
/// Keys are scheme names (e.g. "oss", "cos"), values are key-value config maps.
pub backends: BTreeMap<String, BTreeMap<String, String>>,
}

impl IOConfig {
Expand Down Expand Up @@ -60,6 +66,9 @@ impl IOConfig {
"TOS config = {{ {} }}",
self.tos.multiline_display().join(", ")
));
if !self.backends.is_empty() {
res.push(format!("OpenDAL backends = {:?}", self.backends));
}
res
}
}
Expand Down
37 changes: 35 additions & 2 deletions src/common/io-config/src/python.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use std::{
any::Any,
collections::HashMap,
hash::{Hash, Hasher},
sync::Arc,
};
Expand Down Expand Up @@ -198,7 +199,8 @@ impl IOConfig {
hf=None,
disable_suffix_range=None,
tos=None,
gravitino=None
gravitino=None,
backends=None
))]
#[allow(clippy::too_many_arguments)]
pub fn new(
Expand All @@ -211,6 +213,7 @@ impl IOConfig {
disable_suffix_range: Option<bool>,
tos: Option<TosConfig>,
gravitino: Option<GravitinoConfig>,
backends: Option<HashMap<String, HashMap<String, String>>>,
) -> Self {
Self {
config: config::IOConfig {
Expand All @@ -223,6 +226,11 @@ impl IOConfig {
disable_suffix_range: disable_suffix_range.unwrap_or_default(),
tos: tos.unwrap_or_default().config,
gravitino: gravitino.unwrap_or_default().config,
backends: backends
.unwrap_or_default()
.into_iter()
.map(|(k, v)| (k, v.into_iter().collect()))
.collect(),
},
}
}
Expand All @@ -238,7 +246,8 @@ impl IOConfig {
hf=None,
disable_suffix_range=None,
tos=None,
gravitino=None
gravitino=None,
backends=None
))]
#[allow(clippy::too_many_arguments)]
pub fn replace(
Expand All @@ -252,6 +261,7 @@ impl IOConfig {
disable_suffix_range: Option<bool>,
tos: Option<TosConfig>,
gravitino: Option<GravitinoConfig>,
backends: Option<HashMap<String, HashMap<String, String>>>,
) -> Self {
Self {
config: config::IOConfig {
Expand Down Expand Up @@ -281,6 +291,13 @@ impl IOConfig {
gravitino: gravitino
.map(|gravitino| gravitino.config)
.unwrap_or_else(|| self.config.gravitino.clone()),
backends: backends
.map(|b| {
b.into_iter()
.map(|(k, v)| (k, v.into_iter().collect()))
.collect()
})
.unwrap_or_else(|| self.config.backends.clone()),
},
}
}
Expand Down Expand Up @@ -349,6 +366,22 @@ impl IOConfig {
})
}

/// Additional backends configured via OpenDAL
#[getter]
pub fn backends(&self) -> PyResult<HashMap<String, HashMap<String, String>>> {
Ok(self
.config
.backends
.iter()
.map(|(k, v)| {
(
k.clone(),
v.iter().map(|(k2, v2)| (k2.clone(), v2.clone())).collect(),
)
})
.collect())
}

pub fn __hash__(&self) -> PyResult<u64> {
use std::{collections::hash_map::DefaultHasher, hash::Hash};

Expand Down
8 changes: 8 additions & 0 deletions src/daft-io/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,14 @@ google-cloud-token = {version = "0.1.2"}
home = "0.5.12"
itertools = {workspace = true}
log = {workspace = true}
opendal = {workspace = true, features = [

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looking at the list of features, had some questions

  • Why not enable executors-tokio or internal-tokio-rt? They are enabled by default, and seems reasonable?
  • Can we add comments as to what each service enables what? Cause I can't really tell what oss and obs are for
  • Do we want services-fs? Shouldn't that be covered by our file://?
  • Are there any other interesting ones that we should add a todo for any other interesting ones that you saw?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had services-fs enabled to make it easier/possible to integration test this.

"services-oss",
"services-cos",
"services-obs",
"services-memory",
"services-fs",
"services-github"
]}
pyo3 = {workspace = true, optional = true}
rand = "0.8.5"
regex = {version = "1.12.2"}
Expand Down
Loading
Loading