Skip to content
6 changes: 3 additions & 3 deletions docs/source/python/filesystems_deprecated.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@
Filesystem Interface (legacy)
=============================

.. note::
This section documents the deprecated filesystem layer. It is highly
recommended to use the :ref:`new filesystem layer <filesystem>` instead.
.. warning::
This section documents the deprecated filesystem layer. You should
use the :ref:`new filesystem layer <filesystem>` instead.

.. _hdfs:

Expand Down
43 changes: 37 additions & 6 deletions python/pyarrow/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import gc as _gc
import os as _os
import sys as _sys
import warnings as _warnings

try:
from ._generated_version import version as __version__
Expand Down Expand Up @@ -190,23 +191,53 @@ def show_versions():
SerializationCallbackError,
DeserializationCallbackError)

from pyarrow.filesystem import FileSystem, LocalFileSystem

from pyarrow.hdfs import HadoopFileSystem
import pyarrow.hdfs as hdfs

from pyarrow.ipc import serialize_pandas, deserialize_pandas
import pyarrow.ipc as ipc


localfs = LocalFileSystem.get_instance()

from pyarrow.serialization import (default_serialization_context,
register_default_serialization_handlers,
register_torch_serialization_handlers)

import pyarrow.types as types


# deprecated filesystems

from pyarrow.filesystem import FileSystem as _FileSystem, LocalFileSystem as _LocalFileSystem
from pyarrow.hdfs import HadoopFileSystem as _HadoopFileSystem

_localfs = _LocalFileSystem._get_instance()


_msg = "pyarrow.{0} is deprecated as of 2.0.0, please use pyarrow.fs.{1} instead."

_deprecated = {
"localfs": (_localfs, "LocalFileSystem"),
"FileSystem": (_FileSystem, "FileSystem"),
"LocalFileSystem": (_LocalFileSystem, "LocalFileSystem"),
"HadoopFileSystem": (_HadoopFileSystem, "HadoopFileSystem"),
}

if _sys.version_info >= (3, 7):
def __getattr__(name):
if name in _deprecated:
obj, new_name = _deprecated[name]
_warnings.warn(_msg.format(name, new_name),
DeprecationWarning, stacklevel=2)
return obj

raise AttributeError(
"module 'pyarrow' has no attribute '{0}'".format(name)
)
else:
localfs = _localfs
FileSystem = _FileSystem
LocalFileSystem = _LocalFileSystem
HadoopFileSystem = _HadoopFileSystem


# Entry point for starting the plasma store


Expand Down
39 changes: 32 additions & 7 deletions python/pyarrow/filesystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,19 @@
import os
import inspect
import posixpath
import sys
import urllib.parse
import warnings

from os.path import join as pjoin

import pyarrow as pa
from pyarrow.util import implements, _stringify_path, _is_path_like
from pyarrow.util import implements, _stringify_path, _is_path_like, _DEPR_MSG


_FS_DEPR_MSG = _DEPR_MSG.format(
"filesystem.LocalFileSystem", "2.0.0", "fs.LocalFileSystem"
)


class FileSystem:
Expand Down Expand Up @@ -237,12 +244,22 @@ class LocalFileSystem(FileSystem):

_instance = None

def __init__(self):
warnings.warn(_FS_DEPR_MSG, DeprecationWarning, stacklevel=2)
super().__init__()

@classmethod
def get_instance(cls):
def _get_instance(cls):
if cls._instance is None:
cls._instance = LocalFileSystem()
with warnings.catch_warnings():
cls._instance = LocalFileSystem()
return cls._instance

@classmethod
def get_instance(cls):
warnings.warn(_FS_DEPR_MSG, DeprecationWarning, stacklevel=2)
return cls._get_instance()

@implements(FileSystem.ls)
def ls(self, path):
path = _stringify_path(path)
Expand Down Expand Up @@ -431,7 +448,15 @@ def _ensure_filesystem(fs):
# In case its a simple LocalFileSystem (e.g. dask) use native arrow
# FS
elif mro.__name__ == 'LocalFileSystem':
return LocalFileSystem.get_instance()
return LocalFileSystem._get_instance()

if "fsspec" in sys.modules:
fsspec = sys.modules["fsspec"]
if isinstance(fs, fsspec.AbstractFileSystem):
# for recent fsspec versions that stop inheriting from
# pyarrow.filesystem.FileSystem, still allow fsspec
# filesystems (which should be compatible with our legacy fs)
return fs

raise OSError('Unrecognized filesystem: {}'.format(fs_type))
else:
Expand Down Expand Up @@ -476,15 +501,15 @@ def resolve_filesystem_and_path(where, filesystem=None):
port = 0
if len(netloc_split) == 2 and netloc_split[1].isnumeric():
port = int(netloc_split[1])
fs = pa.hdfs.connect(host=host, port=port)
fs = pa.hdfs._connect(host=host, port=port)
fs_path = parsed_uri.path
elif parsed_uri.scheme == 'file':
# Input is local URI such as file:///home/user/myfile.parquet
fs = LocalFileSystem.get_instance()
fs = LocalFileSystem._get_instance()
fs_path = parsed_uri.path
else:
# Input is local path such as /home/user/myfile.parquet
fs = LocalFileSystem.get_instance()
fs = LocalFileSystem._get_instance()
fs_path = path

return fs, fs_path
27 changes: 23 additions & 4 deletions python/pyarrow/hdfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,9 @@
import os
import posixpath
import sys
import warnings

from pyarrow.util import implements
from pyarrow.util import implements, _DEPR_MSG
from pyarrow.filesystem import FileSystem
import pyarrow.lib as lib

Expand All @@ -34,6 +35,10 @@ class HadoopFileSystem(lib.HadoopFileSystem, FileSystem):

def __init__(self, host="default", port=0, user=None, kerb_ticket=None,
driver='libhdfs', extra_conf=None):
warnings.warn(
_DEPR_MSG.format(
"hdfs.HadoopFileSystem", "2.0.0", "fs.HadoopFileSystem"),
DeprecationWarning, stacklevel=2)
if driver == 'libhdfs':
_maybe_set_hadoop_classpath()

Expand Down Expand Up @@ -205,7 +210,21 @@ def connect(host="default", port=0, user=None, kerb_ticket=None,
-------
filesystem : HadoopFileSystem
"""
fs = HadoopFileSystem(host=host, port=port, user=user,
kerb_ticket=kerb_ticket,
extra_conf=extra_conf)
warnings.warn(
_DEPR_MSG.format("hdfs.connect", "2.0.0", "fs.HadoopFileSystem"),
DeprecationWarning, stacklevel=2
)
return _connect(
host=host, port=port, user=user, kerb_ticket=kerb_ticket,
extra_conf=extra_conf
)


def _connect(host="default", port=0, user=None, kerb_ticket=None,
extra_conf=None):
with warnings.catch_warnings():
warnings.simplefilter("ignore")
fs = HadoopFileSystem(host=host, port=port, user=user,
kerb_ticket=kerb_ticket,
extra_conf=extra_conf)
return fs
2 changes: 1 addition & 1 deletion python/pyarrow/parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -1348,7 +1348,7 @@ def _make_manifest(path_or_paths, fs, pathsep='/', metadata_nthreads=1,
if _is_path_like(path_or_paths) and fs.isdir(path_or_paths):
manifest = ParquetManifest(path_or_paths, filesystem=fs,
open_file_func=open_file_func,
pathsep=fs.pathsep,
pathsep=getattr(fs, "pathsep", "/"),
metadata_nthreads=metadata_nthreads)
common_metadata_path = manifest.common_metadata_path
metadata_path = manifest.metadata_path
Expand Down
30 changes: 30 additions & 0 deletions python/pyarrow/tests/test_filesystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,38 @@
# specific language governing permissions and limitations
# under the License.

import sys

import pyarrow as pa
from pyarrow import filesystem

import pytest


def test_filesystem_deprecated():
with pytest.warns(DeprecationWarning):
filesystem.LocalFileSystem()

with pytest.warns(DeprecationWarning):
filesystem.LocalFileSystem.get_instance()


@pytest.mark.skipif(sys.version_info < (3, 7),
reason="getattr needs Python 3.7")
def test_filesystem_deprecated_toplevel():

with pytest.warns(DeprecationWarning):
pa.localfs

with pytest.warns(DeprecationWarning):
pa.FileSystem

with pytest.warns(DeprecationWarning):
pa.LocalFileSystem

with pytest.warns(DeprecationWarning):
pa.HadoopFileSystem


def test_resolve_uri():
uri = "file:///home/user/myfile.parquet"
Expand Down
3 changes: 2 additions & 1 deletion python/pyarrow/tests/test_hdfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ def hdfs_test_client():
raise ValueError('Env variable ARROW_HDFS_TEST_PORT was not '
'an integer')

return pa.hdfs.connect(host, port, user)
with pytest.warns(DeprecationWarning):
return pa.hdfs.connect(host, port, user)


@pytest.mark.hdfs
Expand Down
Loading