diff --git a/bindings/python/pyproject.toml b/bindings/python/pyproject.toml index a650f5c9586c..bd97dab13777 100644 --- a/bindings/python/pyproject.toml +++ b/bindings/python/pyproject.toml @@ -70,8 +70,22 @@ cache-keys = [ { file = "../../core/**/*.rs" }, ] +[tool.ruff] +line-length = 88 +fix = true + [tool.ruff.lint] ignore = ["E402", "F403", "F405"] select = ["E", "F", "I"] + +[tool.ruff.lint.pycodestyle] +max-doc-length = 88 + +[tool.ruff.lint.pydocstyle] +convention = "numpy" + +[tool.ruff.format] +docstring-code-format = true + [tool.ruff.lint.isort] known-first-party = ["opendal"] diff --git a/bindings/python/python/opendal/__init__.pyi b/bindings/python/python/opendal/__init__.pyi index c6bfaca2566f..994c91c65e5b 100644 --- a/bindings/python/python/opendal/__init__.pyi +++ b/bindings/python/python/opendal/__init__.pyi @@ -16,8 +16,9 @@ # under the License. import os +from collections.abc import AsyncIterable, Iterable from types import TracebackType -from typing import Any, AsyncIterable, Iterable, Optional, Type, Union, final +from typing import Any, Union, final from opendal import exceptions as exceptions from opendal import layers as layers @@ -38,12 +39,13 @@ class Operator(_Base): Example: ```python import opendal + op = opendal.Operator("s3", bucket="bucket", region="us-east-1") op.write("hello.txt", b"hello world") ``` """ def __init__(self, scheme: str, **options: Any) -> None: ... - def layer(self, layer: Layer) -> "Operator": + def layer(self, layer: Layer) -> Operator: """Add new layers upon the current operator. Args: @@ -52,58 +54,93 @@ class Operator(_Base): Returns: The new operator with the layer added. """ - def open(self, path: PathBuf, mode: str) -> File: + def open(self, path: PathBuf, mode: str, **options: Any) -> File: """Open a file at the given path for reading or writing. Args: - path (str|Path): The path to the file. - mode (str): The mode to open the file. Can be "rb" or "wb". + path (str | Path): The path to the file. + mode (str): The mode to open the file. Must be either `"rb"` for reading or + `"wb"` for writing. + **options (Any): Additional options passed to the underlying OpenDAL reader + or writer. + - If `mode == "rb"`: options match the + [OpenDAL `ReaderOptions`](https://opendal.apache.org/docs/rust/opendal/options/struct.ReaderOptions.html). + - If `mode == "wb"`: options match the + [OpenDAL `WriteOptions`](https://opendal.apache.org/docs/rust/opendal/options/struct.WriteOptions.html). Returns: - A file-like object that can be used to read or write the file. + File: A file-like object that can be used to read or write the file. Example: ```python import opendal + op = opendal.Operator("s3", bucket="bucket", region="us-east-1") with op.open("hello.txt", "wb") as f: f.write(b"hello world") ``` """ - def read(self, path: PathBuf) -> bytes: + def read(self, path: PathBuf, **options: Any) -> bytes: """Read the content of the object at the given path. Args: - path (str|Path): The path to the object. + path (str | Path): The path to the object. + **options (Any): Optional read parameters matching the + [OpenDAL `ReadOptions`](https://opendal.apache.org/docs/rust/opendal/options/struct.ReadOptions.html): + + - offset (int): Byte offset to start reading from. Defaults to 0 + if not specified. + - size (int): Number of bytes to read. If not specified, reads until + the end of the object. + Together, `offset` and `size` define the byte range for reading. + - version (str): Specify the version of the object to read, if + supported by the backend. + - concurrent (int): Level of concurrency for reading. Defaults to + backend-specific value. + - chunk (int): Read chunk size in bytes. + - gap (int): Minimum gap (in bytes) between chunks to consider + them separate. + - if_match (str): Read only if the ETag matches the given value. + - if_none_match (str): Read-only if the ETag does not match the + given value. + - if_modified_since (datetime): Only read if the object was modified + since this timestamp. This timestamp must be in UTC. + - if_unmodified_since (datetime): Only read if the object was not + modified since this timestamp. This timestamp must be in UTC. Returns: - The content of the object as bytes. + bytes: The content of the object as bytes. """ - def write( - self, - path: PathBuf, - bs: bytes, - *, - append: bool = ..., - chunk: int = ..., - content_type: str = ..., - content_disposition: str = ..., - cache_control: str = ..., - ) -> None: + def write(self, path: PathBuf, bs: bytes, **options: Any) -> None: """Write the content to the object at the given path. Args: - path (str|Path): The path to the object. + path (str | Path): The path to the object. bs (bytes): The content to write. - append (bool): Whether to append the content to the object. - Defaults to False. - chunk (int): The chunk size for writing. Defaults to write all. - content_type (str): The content type of the object. - Defaults to None. - content_disposition (str): The content disposition of the object. - Defaults to None. - cache_control (str): The cache control of the object. - Defaults to None. + **options (Any): Optional write parameters matching the + [OpenDAL `WriteOptions`](https://opendal.apache.org/docs/rust/opendal/options/struct.WriteOptions.html): + + - append (bool): If True, append to the object instead of overwriting. + - chunk (int): Specify the chunk size in bytes for multipart uploads. + - concurrent (int): Number of concurrent upload parts. Larger values can + improve performance. + - cache_control (str): Override the cache-control header for the object. + - content_type (str): Explicitly set the Content-Type header for + the object. + - content_disposition (str): Sets how the object should be presented + (e.g., as an attachment). + - content_encoding (str): Override the Content-Encoding header. + - if_match (str): Perform the write only if the object's current + ETag matches the given one. + - if_none_match (str): Perform the write only if the object's + current ETag does NOT match the given one. + - if_not_exists (bool): Only write the object if it doesn't + already exist. + - user_metadata (dict[str, str]): Custom user metadata to associate + with the object. + + Returns: + None """ def stat(self, path: PathBuf) -> Metadata: """Get the metadata of the object at the given path. @@ -175,8 +212,7 @@ class Operator(_Base): target (str|Path): The target path. """ def remove_all(self, path: PathBuf) -> None: - """Convert into an async operator - """ + """Convert into an async operator""" def to_async_operator(self) -> AsyncOperator: ... @final @@ -191,64 +227,101 @@ class AsyncOperator(_Base): Example: ```python import opendal + op = opendal.AsyncOperator("s3", bucket="bucket", region="us-east-1") await op.write("hello.txt", b"hello world") ``` """ def __init__(self, scheme: str, **options: Any) -> None: ... - def layer(self, layer: Layer) -> "AsyncOperator": ... - async def open(self, path: PathBuf, mode: str) -> AsyncFile: + def layer(self, layer: Layer) -> AsyncOperator: ... + async def open(self, path: PathBuf, mode: str, **options: Any) -> AsyncFile: """Open a file at the given path for reading or writing. Args: - path (str|Path): The path to the file. - mode (str): The mode to open the file. Can be "rb" or "wb". + path (str | Path): The path to the file. + mode (str): The mode to open the file. Must be either `"rb"` for reading or + `"wb"` for writing. + **options (Any): Additional options passed to the underlying OpenDAL reader + or writer. + - If `mode == "rb"`: options match the + [OpenDAL `ReaderOptions`](https://opendal.apache.org/docs/rust/opendal/options/struct.ReaderOptions.html). + - If `mode == "wb"`: options match the + [OpenDAL `WriteOptions`](https://opendal.apache.org/docs/rust/opendal/options/struct.WriteOptions.html). Returns: - A file-like object that can be used to read or write the file. + AsyncFile: A file-like object that can be used to read or write the file. Example: ```python import opendal + op = opendal.AsyncOperator("s3", bucket="bucket", region="us-east-1") async with await op.open("hello.txt", "wb") as f: await f.write(b"hello world") ``` """ - async def read(self, path: PathBuf) -> bytes: + async def read(self, path: PathBuf, **options: Any) -> bytes: """Read the content of the object at the given path. Args: - path (str|Path): The path to the object. + path (str | Path): The path to the object. + **options (Any): Optional read parameters matching the + [OpenDAL `ReadOptions`](https://opendal.apache.org/docs/rust/opendal/options/struct.ReadOptions.html): + + - offset (int): Byte offset to start reading from. Defaults to 0 + if not specified. + - size (int): Number of bytes to read. If not specified, reads until + the end of the object. + Together, `offset` and `size` define the byte range for reading. + - version (str): Specify the version of the object to read, if + supported by the backend. + - concurrent (int): Level of concurrency for reading. Defaults to + backend-specific value. + - chunk (int): Read chunk size in bytes. + - gap (int): Minimum gap (in bytes) between chunks to consider + them separate. + - override_content_type (str): Override the returned content type. + - if_match (str): Read only if the ETag matches the given value. + - if_none_match (str): Read-only if the ETag does not match the + given value. + - if_modified_since (datetime): Only read if the object was modified + since this timestamp. This timestamp must be in UTC. + - if_unmodified_since (datetime): Only read if the object was not + modified since this timestamp. This timestamp must be in UTC. Returns: The content of the object as bytes. """ - async def write( - self, - path: PathBuf, - bs: bytes, - *, - append: bool = ..., - chunk: int = ..., - content_type: str = ..., - content_disposition: str = ..., - cache_control: str = ..., - ) -> None: + async def write(self, path: PathBuf, bs: bytes, **options: Any) -> None: """Write the content to the object at the given path. Args: - path (str|Path): The path to the object. + path (str | Path): The path to the object. bs (bytes): The content to write. - append (bool): Whether to append the content to the object. - Defaults to False. - chunk (int): The chunk size for writing. Defaults to write all. - content_type (str): The content type of the object. - Defaults to None. - content_disposition (str): The content disposition of the object. - Defaults to None. - cache_control (str): The cache control of the object. - Defaults to None. + **options (Any): Optional write parameters matching the + [OpenDAL `WriteOptions`](https://opendal.apache.org/docs/rust/opendal/options/struct.WriteOptions.html): + + - append (bool): If True, append to the object instead of overwriting. + - chunk (int): Specify the chunk size in bytes for multipart uploads. + - concurrent (int): Number of concurrent upload parts. Larger values can + improve performance. + - cache_control (str): Override the cache-control header for the object. + - content_type (str): Explicitly set the Content-Type header for + the object. + - content_disposition (str): Sets how the object should be presented + (e.g., as an attachment). + - content_encoding (str): Override the Content-Encoding header. + - if_match (str): Perform the write only if the object's current + ETag matches the given one. + - if_none_match (str): Perform the write only if the object's + current ETag does NOT match the given one. + - if_not_exists (bool): Only write the object if it doesn't + already exist. + - user_metadata (dict[str, str]): Custom user metadata to associate + with the object. + + Returns: + None """ async def stat(self, path: PathBuf) -> Metadata: """Get the metadata of the object at the given path. @@ -375,7 +448,7 @@ class File: Created by the `open` method of the `Operator` class. """ - def read(self, size: Optional[int] = None) -> bytes: + def read(self, size: int | None = None) -> bytes: """Read the content of the file. Args: @@ -384,7 +457,7 @@ class File: Returns: The content of the file as bytes. """ - def readline(self, size: Optional[int] = None) -> bytes: + def readline(self, size: int | None = None) -> bytes: """Read a single line from the file. Args: @@ -421,9 +494,9 @@ class File: """Enter the runtime context related to this object.""" def __exit__( self, - exc_type: Optional[Type[BaseException]], - exc_value: Optional[BaseException], - traceback: Optional[TracebackType], + exc_type: type[BaseException] | None, + exc_value: BaseException | None, + traceback: TracebackType | None, ) -> None: """Exit the runtime context related to this object.""" @property @@ -433,7 +506,7 @@ class File: """Flush the internal buffer.""" def readable(self) -> bool: """Check if the file is readable.""" - def readinto(self, buffer: Union[bytes, bytearray]) -> int: + def readinto(self, buffer: bytes | bytearray) -> int: """Read bytes into a buffer. Args: @@ -454,7 +527,7 @@ class AsyncFile: Created by the `open` method of the `AsyncOperator` class. """ - async def read(self, size: Optional[int] = None) -> bytes: + async def read(self, size: int | None = None) -> bytes: """Read the content of the file. Args: @@ -491,9 +564,9 @@ class AsyncFile: """Enter the runtime context related to this object.""" def __aexit__( self, - exc_type: Optional[Type[BaseException]], - exc_value: Optional[BaseException], - traceback: Optional[TracebackType], + exc_type: type[BaseException] | None, + exc_value: BaseException | None, + traceback: TracebackType | None, ) -> None: """Exit the runtime context related to this object.""" @property @@ -516,19 +589,19 @@ class Entry: @final class Metadata: @property - def content_disposition(self) -> Optional[str]: + def content_disposition(self) -> str | None: """The content disposition of the object.""" @property def content_length(self) -> int: """The content length of the object.""" @property - def content_md5(self) -> Optional[str]: + def content_md5(self) -> str | None: """The MD5 checksum of the object.""" @property - def content_type(self) -> Optional[str]: + def content_type(self) -> str | None: """The mime type of the object.""" @property - def etag(self) -> Optional[str]: + def etag(self) -> str | None: """The ETag of the object.""" @property def mode(self) -> EntryMode: @@ -553,103 +626,124 @@ class Capability: """Storage capability information.""" stat: bool - """If operator supports stat""" + """If operator supports stat.""" stat_with_if_match: bool - """If operator supports stat with if match""" + """If operator supports stat with if match.""" stat_with_if_none_match: bool - """If operator supports stat with if none match""" + """If operator supports stat with if none match.""" read: bool - """If operator supports read""" + """Indicates if the operator supports read operations.""" read_with_if_match: bool - """If operator supports read with if match""" + """Indicates if conditional read operations using If-Match are supported.""" read_with_if_none_match: bool - """If operator supports read with if none match""" + """Indicates if conditional read operations using If-None-Match are supported.""" + + read_with_if_modified_since: bool + """If-Modified-Since condition supported for read.""" + + read_with_if_unmodified_since: bool + """If-Unmodified-Since condition supported for read.""" read_with_override_cache_control: bool - """If operator supports read with override cache control""" + """Cache-Control header override supported for read.""" read_with_override_content_disposition: bool - """If operator supports read with override content disposition""" + """Content-Disposition header override supported for read.""" read_with_override_content_type: bool - """If operator supports read with override content type""" + """Indicates if Content-Type header override is supported during read operations.""" + + read_with_version: bool + """Indicates if versions read operations are supported.""" write: bool - """If operator supports write""" + """Indicates if the operator supports write operations.""" write_can_multi: bool - """If operator supports write can be called in multi times""" + """Indicates if multiple write operations can be performed on the same object.""" write_can_empty: bool - """If operator supports write with empty content""" + """Indicates if writing empty content is supported.""" write_can_append: bool - """If operator supports write by append""" + """Indicates if append operations are supported.""" write_with_content_type: bool - """If operator supports write with content type""" + """Indicates if Content-Type can be specified during write operations.""" write_with_content_disposition: bool - """If operator supports write with content disposition""" + """Indicates if Content-Disposition can be specified during write operations.""" + + write_with_content_encoding: bool + """Indicates if Content-Encoding can be specified during write operations.""" write_with_cache_control: bool - """If operator supports write with cache control""" + """Indicates if Cache-Control can be specified during write operations.""" + + write_with_if_match: bool + """Indicates if conditional write operations using If-Match are supported.""" + + write_with_if_none_match: bool + """Indicates if conditional write operations using If-None-Match are supported.""" + + write_with_if_not_exists: bool + """Indicates if write operations can be conditional on object non-existence.""" + + write_with_user_metadata: bool + """Indicates if custom user metadata can be attached during write operations.""" - write_multi_max_size: Optional[int] - """Write_multi_max_size is the max size that services support in write_multi. - For example, AWS S3 supports 5GiB as max in write_multi.""" + write_multi_max_size: int | None + """Maximum part size for multipart uploads (e.g. 5GiB for AWS S3).""" - write_multi_min_size: Optional[int] - """Write_multi_min_size is the min size that services support in write_multi. - For example, AWS S3 requires at least 5MiB in write_multi expect the last one.""" + write_multi_min_size: int | None + """Minimum part size for multipart uploads (e.g. 5MiB for AWS S3).""" - write_total_max_size: Optional[int] - """Write_total_max_size is the max size that services support in write_total. - For example, Cloudflare D1 supports 1MB as max in write_total.""" + write_total_max_size: int | None + """Maximum total size for write operations (e.g. 1MB for Cloudflare D1).""" create_dir: bool - """If operator supports create dir""" + """If operator supports create dir.""" delete: bool - """If operator supports delete""" + """If operator supports delete.""" copy: bool - """If operator supports copy""" + """If operator supports copy.""" rename: bool - """If operator supports rename""" + """If operator supports rename.""" list: bool - """If operator supports list""" + """If operator supports list.""" list_with_limit: bool - """If backend supports list with limit""" + """If backend supports list with limit.""" list_with_start_after: bool - """If backend supports list with start after""" + """If backend supports list with start after.""" list_with_recursive: bool - """If backend supports list with recursive""" + """If backend supports list with recursive.""" presign: bool - """If operator supports presign""" + """If operator supports presign.""" presign_read: bool - """If operator supports presign read""" + """If operator supports presign read.""" presign_stat: bool - """If operator supports presign stat""" + """If operator supports presign stat.""" presign_write: bool - """If operator supports presign write""" + """If operator supports presign write.""" presign_delete: bool - """If operator supports presign delete""" + """If operator supports presign delete.""" shared: bool - """If operator supports shared""" + """If operator supports shared.""" diff --git a/bindings/python/python/opendal/layers.pyi b/bindings/python/python/opendal/layers.pyi index 609928ff0122..adcb543f4f76 100644 --- a/bindings/python/python/opendal/layers.pyi +++ b/bindings/python/python/opendal/layers.pyi @@ -37,4 +37,4 @@ class ConcurrentLimitLayer(Layer): @final class MimeGuessLayer(Layer): - def __init__(self) -> None: ... \ No newline at end of file + def __init__(self) -> None: ... diff --git a/bindings/python/src/capability.rs b/bindings/python/src/capability.rs index c828a605dc49..4edf92fb4ecf 100644 --- a/bindings/python/src/capability.rs +++ b/bindings/python/src/capability.rs @@ -28,44 +28,57 @@ pub struct Capability { /// If operator supports stat with if none match. pub stat_with_if_none_match: bool, - /// If operator supports read. + /// Indicates if the operator supports read operations. pub read: bool, - /// If operator supports read with if match. + /// Indicates if conditional read operations using If-Match are supported. pub read_with_if_match: bool, - /// If operator supports read with if none match. + /// Indicates if conditional read operations using If-None-Match are supported. pub read_with_if_none_match: bool, - /// if operator supports read with override cache control. + /// Indicates if conditional read operations using If-Modified-Since are supported. + pub read_with_if_modified_since: bool, + /// Indicates if conditional read operations using If-Unmodified-Since are supported. + pub read_with_if_unmodified_since: bool, + /// Indicates if Cache-Control header override is supported during read operations. pub read_with_override_cache_control: bool, - /// if operator supports read with override content disposition. + /// Indicates if Content-Disposition header override is supported during read operations. pub read_with_override_content_disposition: bool, - /// if operator supports read with override content type. + /// Indicates if Content-Type header override is supported during read operations. pub read_with_override_content_type: bool, + /// Indicates if versions read operations are supported. + pub read_with_version: bool, - /// If operator supports write. + /// Indicates if the operator supports write operations. pub write: bool, - /// If operator supports write can be called in multi times. + /// Indicates if multiple write operations can be performed on the same object. pub write_can_multi: bool, - /// If operator supports write with empty content. + /// Indicates if writing empty content is supported. pub write_can_empty: bool, - /// If operator supports write by append. + /// Indicates if append operations are supported. pub write_can_append: bool, - /// If operator supports write with content type. + /// Indicates if Content-Type can be specified during write operations. pub write_with_content_type: bool, - /// If operator supports write with content disposition. + /// Indicates if Content-Disposition can be specified during write operations. pub write_with_content_disposition: bool, - /// If operator supports write with cache control. + /// Indicates if Content-Encoding can be specified during write operations. + pub write_with_content_encoding: bool, + /// Indicates if Cache-Control can be specified during write operations. pub write_with_cache_control: bool, - /// write_multi_max_size is the max size that services support in write_multi. - /// - /// For example, AWS S3 supports 5GiB as max in write_multi. + /// Indicates if conditional write operations using If-Match are supported. + pub write_with_if_match: bool, + /// Indicates if conditional write operations using If-None-Match are supported. + pub write_with_if_none_match: bool, + /// Indicates if write operations can be conditional on object non-existence. + pub write_with_if_not_exists: bool, + /// Indicates if custom user metadata can be attached during write operations. + pub write_with_user_metadata: bool, + /// Maximum size supported for multipart uploads. + /// For example, AWS S3 supports up to 5GiB per part in multipart uploads. pub write_multi_max_size: Option, - /// write_multi_min_size is the min size that services support in write_multi. - /// - /// For example, AWS S3 requires at least 5MiB in write_multi expect the last one. + /// Minimum size required for multipart uploads (except for the last part). + /// For example, AWS S3 requires at least 5MiB per part. pub write_multi_min_size: Option, - /// write_total_max_size is the max size that services support in write_total. - /// - /// For example, Cloudflare D1 supports 1MB as max in write_total. + /// Maximum total size supported for write operations. + /// For example, Cloudflare D1 has a 1MB total size limit. pub write_total_max_size: Option, /// If operator supports create dir. @@ -117,6 +130,9 @@ impl Capability { read_with_override_content_disposition: capability .read_with_override_content_disposition, read_with_override_content_type: capability.read_with_override_content_type, + read_with_if_modified_since: capability.read_with_if_modified_since, + read_with_if_unmodified_since: capability.read_with_if_unmodified_since, + read_with_version: capability.read_with_version, write: capability.write, write_can_multi: capability.write_can_multi, write_can_empty: capability.write_can_empty, @@ -127,6 +143,11 @@ impl Capability { write_multi_max_size: capability.write_multi_max_size, write_multi_min_size: capability.write_multi_min_size, write_total_max_size: capability.write_total_max_size, + write_with_content_encoding: capability.write_with_content_encoding, + write_with_if_match: capability.write_with_if_match, + write_with_if_none_match: capability.write_with_if_none_match, + write_with_if_not_exists: capability.write_with_if_not_exists, + write_with_user_metadata: capability.write_with_user_metadata, create_dir: capability.create_dir, delete: capability.delete, copy: capability.copy, diff --git a/bindings/python/src/file.rs b/bindings/python/src/file.rs index 26ec4c6069ea..a84fa8ade62d 100644 --- a/bindings/python/src/file.rs +++ b/bindings/python/src/file.rs @@ -25,6 +25,7 @@ use std::sync::Arc; use futures::AsyncReadExt; use futures::AsyncSeekExt; +use futures::AsyncWriteExt; use pyo3::buffer::PyBuffer; use pyo3::exceptions::PyIOError; use pyo3::exceptions::PyValueError; @@ -335,7 +336,7 @@ pub struct AsyncFile(Arc>); enum AsyncFileState { Reader(ocore::FuturesAsyncReader), - Writer(ocore::Writer), + Writer(ocore::FuturesAsyncWriter), Closed, } @@ -344,7 +345,7 @@ impl AsyncFile { Self(Arc::new(Mutex::new(AsyncFileState::Reader(reader)))) } - pub fn new_writer(writer: ocore::Writer) -> Self { + pub fn new_writer(writer: ocore::FuturesAsyncWriter) -> Self { Self(Arc::new(Mutex::new(AsyncFileState::Writer(writer)))) } } @@ -422,7 +423,7 @@ impl AsyncFile { let len = bs.len(); writer - .write(bs) + .write_all(&bs) .await .map(|_| len) .map_err(|err| PyIOError::new_err(err.to_string())) diff --git a/bindings/python/src/lib.rs b/bindings/python/src/lib.rs index 7e1a99e60d57..456c3114e3e3 100644 --- a/bindings/python/src/lib.rs +++ b/bindings/python/src/lib.rs @@ -54,6 +54,7 @@ fn _opendal(py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; + m.add_class::()?; // Layer module let layers_module = PyModule::new(py, "layers")?; diff --git a/bindings/python/src/operator.rs b/bindings/python/src/operator.rs index 38d372d8ce00..166dbe5d9a7a 100644 --- a/bindings/python/src/operator.rs +++ b/bindings/python/src/operator.rs @@ -84,7 +84,7 @@ impl Operator { }) } - /// Add new layers upon existing operator + /// Add new layers upon the existing operator pub fn layer(&self, layer: &layers::Layer) -> PyResult { let op = layer.0.layer(self.core.clone().into()); @@ -99,19 +99,39 @@ impl Operator { } /// Open a file-like reader for the given path. - pub fn open(&self, path: PathBuf, mode: String) -> PyResult { - let path = path.to_string_lossy().to_string(); + #[pyo3(signature = (path, mode, *, **kwargs))] + pub fn open( + &self, + path: PathBuf, + mode: String, + kwargs: Option<&Bound>, + ) -> PyResult { let this = self.core.clone(); + let path = path.to_string_lossy().to_string(); + + let reader_opts = kwargs + .map(|v| v.extract::()) + .transpose()? + .unwrap_or_default(); + + let writer_opts = kwargs + .map(|v| v.extract::()) + .transpose()? + .unwrap_or_default(); + if mode == "rb" { - let r = this - .reader(&path) - .map_err(format_pyerr)? - .into_std_read(..) + let range = reader_opts.make_range(); + let reader = this + .reader_options(&path, reader_opts.into()) .map_err(format_pyerr)?; + + let r = reader.into_std_read(range).map_err(format_pyerr)?; Ok(File::new_reader(r)) } else if mode == "wb" { - let w = this.writer(&path).map_err(format_pyerr)?; - Ok(File::new_writer(w)) + let writer = this + .writer_options(&path, writer_opts.into()) + .map_err(format_pyerr)?; + Ok(File::new_writer(writer)) } else { Err(Unsupported::new_err(format!( "OpenDAL doesn't support mode: {mode}" @@ -120,13 +140,25 @@ impl Operator { } /// Read the whole path into bytes. - pub fn read<'p>(&'p self, py: Python<'p>, path: PathBuf) -> PyResult> { + #[pyo3(signature = (path, **kwargs))] + pub fn read<'p>( + &'p self, + py: Python<'p>, + path: PathBuf, + kwargs: Option, + ) -> PyResult> { let path = path.to_string_lossy().to_string(); - let buffer = self.core.read(&path).map_err(format_pyerr)?.to_vec(); + let kwargs = kwargs.unwrap_or_default(); + let buffer = self + .core + .read_options(&path, kwargs.into()) + .map_err(format_pyerr)? + .to_vec(); + Buffer::new(buffer).into_bytes_ref(py) } - /// Write bytes into given path. + /// Write bytes into a given path. #[pyo3(signature = (path, bs, **kwargs))] pub fn write(&self, path: PathBuf, bs: Vec, kwargs: Option) -> PyResult<()> { let path = path.to_string_lossy().to_string(); @@ -137,7 +169,7 @@ impl Operator { .map_err(format_pyerr) } - /// Get current path's metadata **without cache** directly. + /// Get the current path's metadata **without cache** directly. pub fn stat(&self, path: PathBuf) -> PyResult { let path = path.to_string_lossy().to_string(); self.core @@ -146,7 +178,7 @@ impl Operator { .map(Metadata::new) } - /// Copy source to target. + /// Copy the source to the target. pub fn copy(&self, source: PathBuf, target: PathBuf) -> PyResult<()> { let source = source.to_string_lossy().to_string(); let target = target.to_string_lossy().to_string(); @@ -160,19 +192,19 @@ impl Operator { self.core.rename(&source, &target).map_err(format_pyerr) } - /// Remove all file + /// Remove all files pub fn remove_all(&self, path: PathBuf) -> PyResult<()> { let path = path.to_string_lossy().to_string(); self.core.remove_all(&path).map_err(format_pyerr) } - /// Create a dir at given path. + /// Create a dir at the given path. /// /// # Notes /// /// To indicate that a path is a directory, it is compulsory to include /// a trailing / in the path. Failure to do so may result in - /// `NotADirectory` error being returned by OpenDAL. + /// a ` NotADirectory ` error being returned by OpenDAL. /// /// # Behavior /// @@ -193,7 +225,7 @@ impl Operator { self.core.delete(&path).map_err(format_pyerr) } - /// Check given path is exists. + /// Checks if the given path exists. /// /// # Notes /// @@ -220,7 +252,7 @@ impl Operator { Ok(BlockingLister::new(l)) } - /// List dir in flat way. + /// List dir in a flat way. pub fn scan(&self, path: PathBuf) -> PyResult { let path = path.to_string_lossy().to_string(); let l = self @@ -312,7 +344,7 @@ impl AsyncOperator { }) } - /// Add new layers upon existing operator + /// Add new layers upon the existing operator pub fn layer(&self, layer: &layers::Layer) -> PyResult { let op = layer.0.layer(self.core.clone()); Ok(Self { @@ -323,27 +355,46 @@ impl AsyncOperator { } /// Open a file-like reader for the given path. + #[pyo3(signature = (path, mode, *, **kwargs))] pub fn open<'p>( &'p self, py: Python<'p>, path: PathBuf, mode: String, + kwargs: Option<&Bound>, ) -> PyResult> { let this = self.core.clone(); let path = path.to_string_lossy().to_string(); + let reader_opts = kwargs + .map(|v| v.extract::()) + .transpose()? + .unwrap_or_default(); + + let writer_opts = kwargs + .map(|v| v.extract::()) + .transpose()? + .unwrap_or_default(); + future_into_py(py, async move { if mode == "rb" { - let r = this - .reader(&path) + let range = reader_opts.make_range(); + let reader = this + .reader_options(&path, reader_opts.into()) .await - .map_err(format_pyerr)? - .into_futures_async_read(..) + .map_err(format_pyerr)?; + + let r = reader + .into_futures_async_read(range) .await .map_err(format_pyerr)?; Ok(AsyncFile::new_reader(r)) } else if mode == "wb" { - let w = this.writer(&path).await.map_err(format_pyerr)?; + let writer = this + .writer_options(&path, writer_opts.into()) + .await + .map_err(format_pyerr)?; + let w = writer.into_futures_async_write(); Ok(AsyncFile::new_writer(w)) } else { Err(Unsupported::new_err(format!( @@ -354,11 +405,26 @@ impl AsyncOperator { } /// Read the whole path into bytes. - pub fn read<'p>(&'p self, py: Python<'p>, path: PathBuf) -> PyResult> { + #[pyo3(signature = (path, **kwargs))] + pub fn read<'p>( + &'p self, + py: Python<'p>, + path: PathBuf, + kwargs: Option, + ) -> PyResult> { let this = self.core.clone(); let path = path.to_string_lossy().to_string(); + let kwargs = kwargs.unwrap_or_default(); future_into_py(py, async move { - let res: Vec = this.read(&path).await.map_err(format_pyerr)?.to_vec(); + let range = kwargs.make_range(); + let res = this + .reader_options(&path, kwargs.into()) + .await + .map_err(format_pyerr)? + .read(range) + .await + .map_err(format_pyerr)? + .to_vec(); Python::with_gil(|py| Buffer::new(res).into_bytes(py)) }) } diff --git a/bindings/python/src/options.rs b/bindings/python/src/options.rs index a1c17b676470..f47fdb147c9e 100644 --- a/bindings/python/src/options.rs +++ b/bindings/python/src/options.rs @@ -20,27 +20,105 @@ use opendal as ocore; use pyo3::pyclass; use std::collections::HashMap; +use chrono::{DateTime, Utc}; +use std::ops::Bound as RangeBound; + +#[pyclass(module = "opendal")] +#[derive(FromPyObject, Default)] +pub struct ReadOptions { + pub version: Option, + pub concurrent: Option, + pub chunk: Option, + pub gap: Option, + pub offset: Option, + pub size: Option, + pub if_match: Option, + pub if_none_match: Option, + pub if_modified_since: Option>, + pub if_unmodified_since: Option>, + pub content_type: Option, + pub cache_control: Option, + pub content_disposition: Option, +} + +impl ReadOptions { + pub fn make_range(&self) -> (RangeBound, RangeBound) { + let start_bound = self + .offset + .map_or(RangeBound::Unbounded, |s| RangeBound::Included(s as u64)); + let end_bound = self + .size + .map_or(RangeBound::Unbounded, |e| RangeBound::Excluded(e as u64)); + + (start_bound, end_bound) + } +} + #[pyclass(module = "opendal")] #[derive(FromPyObject, Default)] pub struct WriteOptions { pub append: Option, pub chunk: Option, + pub concurrent: Option, + pub cache_control: Option, pub content_type: Option, pub content_disposition: Option, - pub cache_control: Option, + pub content_encoding: Option, + pub if_match: Option, + pub if_none_match: Option, + pub if_not_exists: Option, pub user_metadata: Option>, } +impl From for ocore::options::ReadOptions { + fn from(opts: ReadOptions) -> Self { + let r = opts.make_range(); + Self { + range: r.into(), + version: opts.version, + if_match: opts.if_match, + if_none_match: opts.if_none_match, + if_modified_since: opts.if_modified_since, + if_unmodified_since: opts.if_unmodified_since, + concurrent: opts.concurrent.unwrap_or_default(), + chunk: opts.chunk, + gap: opts.gap, + override_content_type: opts.content_type, + override_cache_control: opts.cache_control, + override_content_disposition: opts.content_disposition, + } + } +} + +impl From for ocore::options::ReaderOptions { + fn from(opts: ReadOptions) -> Self { + Self { + version: opts.version, + if_match: opts.if_match, + if_none_match: opts.if_none_match, + if_modified_since: opts.if_modified_since, + if_unmodified_since: opts.if_unmodified_since, + concurrent: opts.concurrent.unwrap_or_default(), + chunk: opts.chunk, + gap: opts.gap, + } + } +} + impl From for ocore::options::WriteOptions { fn from(opts: WriteOptions) -> Self { Self { append: opts.append.unwrap_or(false), + concurrent: opts.concurrent.unwrap_or_default(), chunk: opts.chunk, content_type: opts.content_type, content_disposition: opts.content_disposition, cache_control: opts.cache_control, + content_encoding: opts.content_encoding, user_metadata: opts.user_metadata, - ..Default::default() + if_match: opts.if_match, + if_none_match: opts.if_none_match, + if_not_exists: opts.if_not_exists.unwrap_or(false), } } } diff --git a/bindings/python/tests/test_read.py b/bindings/python/tests/test_read.py index ef2abb646125..5f78a4e1c58b 100644 --- a/bindings/python/tests/test_read.py +++ b/bindings/python/tests/test_read.py @@ -17,13 +17,14 @@ import io import os +from datetime import timedelta from pathlib import Path from random import choices, randint from uuid import uuid4 import pytest -from opendal.exceptions import NotFound +from opendal.exceptions import ConditionNotMatch, NotFound @pytest.mark.need_capability("read", "write", "delete") @@ -73,6 +74,18 @@ def test_sync_reader(service_name, operator, async_operator): reader.readinto(buf) assert buf == content[:1] + range_start = randint(0, len(content) - 1) + range_end = randint(range_start, len(content) - 1) + + with operator.open(filename, "rb", offset=range_start, size=range_end) as reader: + assert reader.readable() + assert not reader.writable() + assert not reader.closed + + read_content = reader.read() + assert read_content is not None + assert read_content == content[range_start:range_end] + operator.delete(filename) @@ -142,6 +155,10 @@ async def test_async_reader(service_name, operator, async_operator): await async_operator.write(filename, content) async with await async_operator.open(filename, "rb") as reader: + assert await reader.readable() + assert not await reader.writable() + assert not await reader.closed + read_content = await reader.read() assert read_content is not None assert read_content == content @@ -158,6 +175,20 @@ async def test_async_reader(service_name, operator, async_operator): assert read_content is not None assert read_content == content + range_start = randint(0, len(content) - 1) + range_end = randint(range_start, len(content) - 1) + + async with await async_operator.open( + filename, "rb", offset=range_start, size=range_end + ) as reader: + assert await reader.readable() + assert not await reader.writable() + assert not await reader.closed + + read_content = await reader.read() + assert read_content is not None + assert read_content == content[range_start:range_end] + await async_operator.delete(filename) @@ -222,3 +253,32 @@ def test_sync_read_not_exists(service_name, operator, async_operator): async def test_async_read_not_exists(service_name, operator, async_operator): with pytest.raises(NotFound): await async_operator.read(str(uuid4())) + + +@pytest.mark.need_capability( + "read", "read_with_if_modified_since", "read_with_if_unmodified_since" +) +def test_sync_conditional_reads(service_name, operator): + path = f"random_file_{str(uuid4())}" + content = b"test data" + operator.write(path, content) + + metadata = operator.stat(path) + mod_time = metadata.last_modified + assert mod_time is not None + + # Large delta: 1 minute earlier + before = mod_time - timedelta(minutes=1) + after = mod_time + timedelta(seconds=10) + + # Should succeed: file was modified after `before` + assert operator.read(path, if_modified_since=before) == content + + # Should succeed: file was unmodified since `after` + assert operator.read(path, if_unmodified_since=after) == content + + # Should fail: file was modified after `before` + with pytest.raises(ConditionNotMatch): + operator.read(path, if_unmodified_since=before) + + operator.delete(path) diff --git a/bindings/python/tests/test_sync_delete.py b/bindings/python/tests/test_sync_delete.py index 25e9d77d2add..0260bf826a55 100644 --- a/bindings/python/tests/test_sync_delete.py +++ b/bindings/python/tests/test_sync_delete.py @@ -23,9 +23,7 @@ from opendal.exceptions import NotFound -@pytest.mark.need_capability( - "read", "write", "delete", "list", "create_dir" -) +@pytest.mark.need_capability("read", "write", "delete", "list", "create_dir") def test_sync_remove_all(service_name, operator, async_operator): parent = f"random_dir_{str(uuid4())}" excepted = [ diff --git a/bindings/python/tests/test_sync_exists.py b/bindings/python/tests/test_sync_exists.py index f1e5a6f81f13..7c591f598345 100644 --- a/bindings/python/tests/test_sync_exists.py +++ b/bindings/python/tests/test_sync_exists.py @@ -21,9 +21,7 @@ import pytest -@pytest.mark.need_capability( - "read", "write", "delete", "list", "create_dir" -) +@pytest.mark.need_capability("read", "write", "delete", "list", "create_dir") def test_sync_exists(service_name, operator, async_operator): content = os.urandom(1024) target = f"random_{str(uuid4())}" diff --git a/bindings/python/tests/test_sync_list.py b/bindings/python/tests/test_sync_list.py index 75d4161337c0..aa21ddeabd58 100644 --- a/bindings/python/tests/test_sync_list.py +++ b/bindings/python/tests/test_sync_list.py @@ -44,9 +44,7 @@ def test_sync_list_with_start_after(service_name, operator, async_operator): start_after_file = files_to_create[2] # e.g., test_dir/file_2 entries_after = [] # Note: start_after expects the *full path* relative to the operator root - for entry in operator.list( - test_dir, start_after=start_after_file - ): + for entry in operator.list(test_dir, start_after=start_after_file): entries_after.append(entry.path) entries_after.sort() # Ensure order diff --git a/bindings/python/tests/test_write.py b/bindings/python/tests/test_write.py index ef651c8d11d9..571d0634418c 100644 --- a/bindings/python/tests/test_write.py +++ b/bindings/python/tests/test_write.py @@ -176,6 +176,23 @@ async def test_async_writer(service_name, operator, async_operator): await async_operator.stat(filename) +@pytest.mark.asyncio +@pytest.mark.need_capability("write", "delete", "write_with_if_not_exists") +async def test_async_writer_options(service_name, operator, async_operator): + size = randint(1, 1024) + filename = f"test_file_{str(uuid4())}.txt" + content = os.urandom(size) + f = await async_operator.open(filename, "wb") + written_bytes = await f.write(content) + assert written_bytes == size + await f.close() + + with pytest.raises(Exception) as excinfo: + async with await async_operator.open(filename, "wb", if_not_exists=True) as w: + w.write(content) + assert "ConditionNotMatch" in str(excinfo.value) + + @pytest.mark.need_capability("write", "delete") def test_sync_writer(service_name, operator, async_operator): size = randint(1, 1024) @@ -188,3 +205,19 @@ def test_sync_writer(service_name, operator, async_operator): operator.delete(filename) with pytest.raises(NotFound): operator.stat(filename) + + +@pytest.mark.need_capability("write", "delete", "write_with_if_not_exists") +def test_sync_writer_options(service_name, operator, async_operator): + size = randint(1, 1024) + filename = f"test_file_{str(uuid4())}.txt" + content = os.urandom(size) + f = operator.open(filename, "wb") + written_bytes = f.write(content) + assert written_bytes == size + f.close() + + with pytest.raises(Exception) as excinfo: + with operator.open(filename, "wb", if_not_exists=True) as w: + w.write(content) + assert "ConditionNotMatch" in str(excinfo.value)