Skip to content
6 changes: 3 additions & 3 deletions docs/source/python/filesystems_deprecated.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@
Filesystem Interface (legacy)
=============================

.. note::
This section documents the deprecated filesystem layer. It is highly
recommended to use the :ref:`new filesystem layer <filesystem>` instead.
.. warning::
This section documents the deprecated filesystem layer. You should
use the :ref:`new filesystem layer <filesystem>` instead.

.. _hdfs:

Expand Down
48 changes: 42 additions & 6 deletions python/pyarrow/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import gc as _gc
import os as _os
import sys as _sys
import warnings as _warnings

try:
from ._generated_version import version as __version__
Expand Down Expand Up @@ -190,23 +191,58 @@ def show_versions():
SerializationCallbackError,
DeserializationCallbackError)

from pyarrow.filesystem import FileSystem, LocalFileSystem

from pyarrow.hdfs import HadoopFileSystem
import pyarrow.hdfs as hdfs

from pyarrow.ipc import serialize_pandas, deserialize_pandas
import pyarrow.ipc as ipc


localfs = LocalFileSystem.get_instance()

from pyarrow.serialization import (default_serialization_context,
register_default_serialization_handlers,
register_torch_serialization_handlers)

import pyarrow.types as types


# deprecated filesystems

from pyarrow.filesystem import FileSystem as _FileSystem, LocalFileSystem as _LocalFileSystem
from pyarrow.hdfs import HadoopFileSystem as _HadoopFileSystem

_localfs = _LocalFileSystem._get_instance()


_msg = "pyarrow.{0} is deprecated as of 2.0.0, please use pyarrow.fs.{1} instead."


if _sys.version_info >= (3, 7):
def __getattr__(name):
if name == "localfs":
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like you want to write a loop and/or helper function to avoid all this copy/pasting?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Put the info in a dict, and simplified this

_warnings.warn(_msg.format("localfs", "LocalFileSystem"),
DeprecationWarning, stacklevel=2)
return _localfs
elif name == "FileSystem":
_warnings.warn(_msg.format("FileSystem", "FileSystem"),
DeprecationWarning, stacklevel=2)
return _FileSystem
elif name == "LocalFileSystem":
_warnings.warn(_msg.format("LocalFileSystem", "LocalFileSystem"),
DeprecationWarning, stacklevel=2)
return _LocalFileSystem
elif name == "HadoopFileSystem":
_warnings.warn(_msg.format("HadoopFileSystem", "HadoopFileSystem"),
DeprecationWarning, stacklevel=2)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We use FutureWarning elsewhere.

return _HadoopFileSystem

raise AttributeError(
"module 'pyarrow' has no attribute '{0}'".format(name)
)
else:
localfs = _localfs
FileSystem = _FileSystem
LocalFileSystem = _LocalFileSystem
HadoopFileSystem = _HadoopFileSystem


# Entry point for starting the plasma store


Expand Down
40 changes: 34 additions & 6 deletions python/pyarrow/filesystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import inspect
import posixpath
import urllib.parse
import warnings

from os.path import join as pjoin

Expand Down Expand Up @@ -237,12 +238,28 @@ class LocalFileSystem(FileSystem):

_instance = None

def __init__(self):
warnings.warn(
"pyarrow.filesystem.LocalFileSystem is deprecated as of 2.0.0, "
"please use pyarrow.fs.LocalFileSystem instead",
DeprecationWarning, stacklevel=2)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Put a helper in pyarrow.util? (and use FutureWarning?)

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is a _deprecate_class in pyarrow.util, but that serves a different purpose (it aliases the old name to the new class, but with warning).

And if it is just a helper for raising the warning, I am not sure that is worth it (compared to using a template message that can be filled in).

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Moved a template to pyarrow.util. Can also make it a function raising the warning if you prefer

super().__init__()

@classmethod
def get_instance(cls):
def _get_instance(cls):
if cls._instance is None:
cls._instance = LocalFileSystem()
with warnings.catch_warnings():
cls._instance = LocalFileSystem()
return cls._instance

@classmethod
def get_instance(cls):
warnings.warn(
"pyarrow.filesystem.LocalFileSystem is deprecated as of 2.0.0, "
"please use pyarrow.fs.LocalFileSystem instead",
DeprecationWarning, stacklevel=2)
return cls._get_instance()

@implements(FileSystem.ls)
def ls(self, path):
path = _stringify_path(path)
Expand Down Expand Up @@ -431,7 +448,18 @@ def _ensure_filesystem(fs):
# In case its a simple LocalFileSystem (e.g. dask) use native arrow
# FS
elif mro.__name__ == 'LocalFileSystem':
return LocalFileSystem.get_instance()
return LocalFileSystem._get_instance()

try:
import fsspec
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fsspec seems to take some time to import, we should probably lookup directly in sys.modules instead.

except ImportError:
pass
else:
if isinstance(fs, fsspec.AbstractFileSystem):
# for recent fsspec versions that stop inheriting from
# pyarrow.filesystem.FileSystem, still allow fsspec
# filesystems (which should be compatible with our legacy fs)
return fs

raise OSError('Unrecognized filesystem: {}'.format(fs_type))
else:
Expand Down Expand Up @@ -476,15 +504,15 @@ def resolve_filesystem_and_path(where, filesystem=None):
port = 0
if len(netloc_split) == 2 and netloc_split[1].isnumeric():
port = int(netloc_split[1])
fs = pa.hdfs.connect(host=host, port=port)
fs = pa.hdfs._connect(host=host, port=port)
fs_path = parsed_uri.path
elif parsed_uri.scheme == 'file':
# Input is local URI such as file:///home/user/myfile.parquet
fs = LocalFileSystem.get_instance()
fs = LocalFileSystem._get_instance()
fs_path = parsed_uri.path
else:
# Input is local path such as /home/user/myfile.parquet
fs = LocalFileSystem.get_instance()
fs = LocalFileSystem._get_instance()
fs_path = path

return fs, fs_path
26 changes: 23 additions & 3 deletions python/pyarrow/hdfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import os
import posixpath
import sys
import warnings

from pyarrow.util import implements
from pyarrow.filesystem import FileSystem
Expand All @@ -34,6 +35,10 @@ class HadoopFileSystem(lib.HadoopFileSystem, FileSystem):

def __init__(self, host="default", port=0, user=None, kerb_ticket=None,
driver='libhdfs', extra_conf=None):
warnings.warn(
"'pyarrow.hdfs.HadoopFileSystem' is deprecated as of 2.0.0, "
"please use pyarrow.fs.HadoopFileSystem instead",
DeprecationWarning, stacklevel=2)
if driver == 'libhdfs':
_maybe_set_hadoop_classpath()

Expand Down Expand Up @@ -205,7 +210,22 @@ def connect(host="default", port=0, user=None, kerb_ticket=None,
-------
filesystem : HadoopFileSystem
"""
fs = HadoopFileSystem(host=host, port=port, user=user,
kerb_ticket=kerb_ticket,
extra_conf=extra_conf)
warnings.warn(
"'pyarrow.hdfs.connect' is deprecated as of 2.0.0, "
"please use pyarrow.fs.HadoopFileSystem instead",
DeprecationWarning, stacklevel=2
)
return _connect(
host=host, port=port, user=user, kerb_ticket=kerb_ticket,
extra_conf=extra_conf
)


def _connect(host="default", port=0, user=None, kerb_ticket=None,
extra_conf=None):
with warnings.catch_warnings():
warnings.simplefilter("ignore")
fs = HadoopFileSystem(host=host, port=port, user=user,
kerb_ticket=kerb_ticket,
extra_conf=extra_conf)
return fs
2 changes: 1 addition & 1 deletion python/pyarrow/parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -1348,7 +1348,7 @@ def _make_manifest(path_or_paths, fs, pathsep='/', metadata_nthreads=1,
if _is_path_like(path_or_paths) and fs.isdir(path_or_paths):
manifest = ParquetManifest(path_or_paths, filesystem=fs,
open_file_func=open_file_func,
pathsep=fs.pathsep,
pathsep=getattr(fs, "pathsep", "/"),
metadata_nthreads=metadata_nthreads)
common_metadata_path = manifest.common_metadata_path
metadata_path = manifest.metadata_path
Expand Down
30 changes: 30 additions & 0 deletions python/pyarrow/tests/test_filesystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,38 @@
# specific language governing permissions and limitations
# under the License.

import sys

import pyarrow as pa
from pyarrow import filesystem

import pytest


def test_filesystem_deprecated():
with pytest.warns(DeprecationWarning):
filesystem.LocalFileSystem()

with pytest.warns(DeprecationWarning):
filesystem.LocalFileSystem.get_instance()


@pytest.mark.skipif(sys.version_info < (3, 7),
reason="getattr needs Python 3.7")
def test_filesystem_deprecated_toplevel():

with pytest.warns(DeprecationWarning):
pa.localfs

with pytest.warns(DeprecationWarning):
pa.FileSystem

with pytest.warns(DeprecationWarning):
pa.LocalFileSystem

with pytest.warns(DeprecationWarning):
pa.HadoopFileSystem


def test_resolve_uri():
uri = "file:///home/user/myfile.parquet"
Expand Down
3 changes: 2 additions & 1 deletion python/pyarrow/tests/test_hdfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ def hdfs_test_client():
raise ValueError('Env variable ARROW_HDFS_TEST_PORT was not '
'an integer')

return pa.hdfs.connect(host, port, user)
with pytest.warns(DeprecationWarning):
return pa.hdfs.connect(host, port, user)


@pytest.mark.hdfs
Expand Down
Loading