-
Notifications
You must be signed in to change notification settings - Fork 4.1k
ARROW-9645: [Python] Deprecate pyarrow.filesystem in favor of pyarrow.fs #8149
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
370213e
17883b5
9b51079
9da29c5
192f754
dfdc622
a2ccf44
642c1d6
122b5c8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -32,6 +32,7 @@ | |
| import gc as _gc | ||
| import os as _os | ||
| import sys as _sys | ||
| import warnings as _warnings | ||
|
|
||
| try: | ||
| from ._generated_version import version as __version__ | ||
|
|
@@ -190,23 +191,58 @@ def show_versions(): | |
| SerializationCallbackError, | ||
| DeserializationCallbackError) | ||
|
|
||
| from pyarrow.filesystem import FileSystem, LocalFileSystem | ||
|
|
||
| from pyarrow.hdfs import HadoopFileSystem | ||
| import pyarrow.hdfs as hdfs | ||
|
|
||
| from pyarrow.ipc import serialize_pandas, deserialize_pandas | ||
| import pyarrow.ipc as ipc | ||
|
|
||
|
|
||
| localfs = LocalFileSystem.get_instance() | ||
|
|
||
| from pyarrow.serialization import (default_serialization_context, | ||
| register_default_serialization_handlers, | ||
| register_torch_serialization_handlers) | ||
|
|
||
| import pyarrow.types as types | ||
|
|
||
|
|
||
| # deprecated filesystems | ||
|
|
||
| from pyarrow.filesystem import FileSystem as _FileSystem, LocalFileSystem as _LocalFileSystem | ||
| from pyarrow.hdfs import HadoopFileSystem as _HadoopFileSystem | ||
|
|
||
| _localfs = _LocalFileSystem._get_instance() | ||
|
|
||
|
|
||
| _msg = "pyarrow.{0} is deprecated as of 2.0.0, please use pyarrow.fs.{1} instead." | ||
|
|
||
|
|
||
| if _sys.version_info >= (3, 7): | ||
| def __getattr__(name): | ||
| if name == "localfs": | ||
| _warnings.warn(_msg.format("localfs", "LocalFileSystem"), | ||
| DeprecationWarning, stacklevel=2) | ||
| return _localfs | ||
| elif name == "FileSystem": | ||
| _warnings.warn(_msg.format("FileSystem", "FileSystem"), | ||
| DeprecationWarning, stacklevel=2) | ||
| return _FileSystem | ||
| elif name == "LocalFileSystem": | ||
| _warnings.warn(_msg.format("LocalFileSystem", "LocalFileSystem"), | ||
| DeprecationWarning, stacklevel=2) | ||
| return _LocalFileSystem | ||
| elif name == "HadoopFileSystem": | ||
| _warnings.warn(_msg.format("HadoopFileSystem", "HadoopFileSystem"), | ||
| DeprecationWarning, stacklevel=2) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We use |
||
| return _HadoopFileSystem | ||
|
|
||
| raise AttributeError( | ||
| "module 'pyarrow' has no attribute '{0}'".format(name) | ||
| ) | ||
| else: | ||
| localfs = _localfs | ||
| FileSystem = _FileSystem | ||
| LocalFileSystem = _LocalFileSystem | ||
| HadoopFileSystem = _HadoopFileSystem | ||
|
|
||
|
|
||
| # Entry point for starting the plasma store | ||
|
|
||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,6 +20,7 @@ | |
| import inspect | ||
| import posixpath | ||
| import urllib.parse | ||
| import warnings | ||
|
|
||
| from os.path import join as pjoin | ||
|
|
||
|
|
@@ -237,12 +238,28 @@ class LocalFileSystem(FileSystem): | |
|
|
||
| _instance = None | ||
|
|
||
| def __init__(self): | ||
| warnings.warn( | ||
| "pyarrow.filesystem.LocalFileSystem is deprecated as of 2.0.0, " | ||
| "please use pyarrow.fs.LocalFileSystem instead", | ||
| DeprecationWarning, stacklevel=2) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Put a helper in
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is a And if it is just a helper for raising the warning, I am not sure that is worth it (compared to using a template message that can be filled in).
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Moved a template to |
||
| super().__init__() | ||
|
|
||
| @classmethod | ||
| def get_instance(cls): | ||
| def _get_instance(cls): | ||
| if cls._instance is None: | ||
| cls._instance = LocalFileSystem() | ||
| with warnings.catch_warnings(): | ||
| cls._instance = LocalFileSystem() | ||
| return cls._instance | ||
|
|
||
| @classmethod | ||
| def get_instance(cls): | ||
| warnings.warn( | ||
| "pyarrow.filesystem.LocalFileSystem is deprecated as of 2.0.0, " | ||
| "please use pyarrow.fs.LocalFileSystem instead", | ||
| DeprecationWarning, stacklevel=2) | ||
| return cls._get_instance() | ||
|
|
||
| @implements(FileSystem.ls) | ||
| def ls(self, path): | ||
| path = _stringify_path(path) | ||
|
|
@@ -431,7 +448,18 @@ def _ensure_filesystem(fs): | |
| # In case its a simple LocalFileSystem (e.g. dask) use native arrow | ||
| # FS | ||
| elif mro.__name__ == 'LocalFileSystem': | ||
| return LocalFileSystem.get_instance() | ||
| return LocalFileSystem._get_instance() | ||
|
|
||
| try: | ||
| import fsspec | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| except ImportError: | ||
| pass | ||
| else: | ||
| if isinstance(fs, fsspec.AbstractFileSystem): | ||
| # for recent fsspec versions that stop inheriting from | ||
| # pyarrow.filesystem.FileSystem, still allow fsspec | ||
| # filesystems (which should be compatible with our legacy fs) | ||
| return fs | ||
|
|
||
| raise OSError('Unrecognized filesystem: {}'.format(fs_type)) | ||
| else: | ||
|
|
@@ -476,15 +504,15 @@ def resolve_filesystem_and_path(where, filesystem=None): | |
| port = 0 | ||
| if len(netloc_split) == 2 and netloc_split[1].isnumeric(): | ||
| port = int(netloc_split[1]) | ||
| fs = pa.hdfs.connect(host=host, port=port) | ||
| fs = pa.hdfs._connect(host=host, port=port) | ||
| fs_path = parsed_uri.path | ||
| elif parsed_uri.scheme == 'file': | ||
| # Input is local URI such as file:///home/user/myfile.parquet | ||
| fs = LocalFileSystem.get_instance() | ||
| fs = LocalFileSystem._get_instance() | ||
| fs_path = parsed_uri.path | ||
| else: | ||
| # Input is local path such as /home/user/myfile.parquet | ||
| fs = LocalFileSystem.get_instance() | ||
| fs = LocalFileSystem._get_instance() | ||
| fs_path = path | ||
|
|
||
| return fs, fs_path | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks like you want to write a loop and/or helper function to avoid all this copy/pasting?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Put the info in a dict, and simplified this