-
Notifications
You must be signed in to change notification settings - Fork 1k
Remote IO support in cudf-polars #19921
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
f16c37d
a09c563
51ee3b2
f75e3dc
63807a0
8f015f4
f315954
bae6b6d
bccb387
8350456
5ea88c7
ab0d28c
513cfa0
9a9a423
d8869aa
cd15bc6
10bf781
3d9dc0d
0a908f4
67ef92b
0047a6d
0bcee77
a4c3321
c338a74
09378bf
2e84038
2f1b12e
c1c2d79
4fe15e3
6919b20
ec2eb92
e23bca8
fd9f140
1b94d1a
c5374f2
01890d3
675b33f
ef1e0a2
2249e15
4e4cb87
276585c
4cb3909
0d6922e
482e70d
30a0594
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -301,6 +301,7 @@ cdef class TableInputMetadata: | |||||
| for i in range(self.c_obj.column_metadata.size()) | ||||||
| ] | ||||||
|
|
||||||
|
|
||||||
| cdef class TableWithMetadata: | ||||||
| """A container holding a table and its associated metadata | ||||||
| (e.g. column names) | ||||||
|
|
@@ -467,8 +468,6 @@ cdef class SourceInfo: | |||||
| A homogeneous list of sources to read from. Mixing | ||||||
| different types of sources will raise a `ValueError`. | ||||||
| """ | ||||||
| # Regular expression that match remote file paths supported by libcudf | ||||||
Matt711 marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
| _is_remote_file_pattern = re.compile(r"^[a-zA-Z][a-zA-Z0-9+.-]*://", re.IGNORECASE) | ||||||
|
|
||||||
| def __init__(self, list sources): | ||||||
| if not sources: | ||||||
|
|
@@ -483,7 +482,7 @@ cdef class SourceInfo: | |||||
| for src in sources: | ||||||
| if not isinstance(src, (os.PathLike, str)): | ||||||
| raise ValueError("All sources must be of the same type!") | ||||||
| if not (os.path.isfile(src) or self._is_remote_file_pattern.match(src)): | ||||||
| if not (os.path.isfile(src) or SourceInfo._is_remote_uri(src)): | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Nit
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since _is_remote_uri is a
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure if there's a particular preference. Generally using Probably not worth another commit & CI run if this PR is close to merging, but good to consider in the future. |
||||||
| raise FileNotFoundError( | ||||||
| errno.ENOENT, os.strerror(errno.ENOENT), src | ||||||
| ) | ||||||
|
|
@@ -538,6 +537,13 @@ cdef class SourceInfo: | |||||
|
|
||||||
| self.c_obj = source_info(host_span[host_span[const_byte]](self._hspans)) | ||||||
|
|
||||||
| @staticmethod | ||||||
| def _is_remote_uri(path: str | os.PathLike) -> bool: | ||||||
| # Regular expression that match remote file paths supported by libcudf | ||||||
| return re.compile( | ||||||
| r"^[a-zA-Z][a-zA-Z0-9+.\-]*://", re.IGNORECASE | ||||||
| ).match(str(path)) is not None | ||||||
|
|
||||||
| def _init_byte_like_sources(self, list sources, type expected_type): | ||||||
| cdef const unsigned char[::1] c_buffer | ||||||
| cdef bint empty_buffer = True | ||||||
|
|
||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
CC @vuule
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
CC @TomAugspurger