Skip to content

Commit

Permalink
Consistent recursive cp, get and put into non-existent target directo…
Browse files Browse the repository at this point in the history
…ry (#1148)

* Consistent recursive cp, get and put into non-existent target directory

* Fix copying 2+ named files to target directory

* Fix windows tests

* Separate cp/get/put memory fs tests
  • Loading branch information
ianthomas23 authored Jan 20, 2023
1 parent 2ae366b commit db79115
Show file tree
Hide file tree
Showing 4 changed files with 226 additions and 10 deletions.
70 changes: 67 additions & 3 deletions fsspec/implementations/tests/test_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -674,7 +674,7 @@ def test_mv_recursive(tmpdir):

@pytest.mark.xfail(WIN, reason="windows expand path to be revisited")
def test_copy_errors(tmpdir):
localfs = fsspec.filesystem("file")
localfs = fsspec.filesystem("file", auto_mkdir=True)

dest1 = os.path.join(str(tmpdir), "dest1")
dest2 = os.path.join(str(tmpdir), "dest2")
Expand All @@ -695,6 +695,7 @@ def test_copy_errors(tmpdir):
localfs.copy([file1, file2, dne], dest1)

localfs.copy([file1, file2, dne], dest1, on_error="ignore")

assert sorted(localfs.ls(dest1)) == [
make_path_posix(os.path.join(dest1, "afile1")),
make_path_posix(os.path.join(dest1, "afile2")),
Expand All @@ -706,9 +707,9 @@ def test_copy_errors(tmpdir):
current_files = localfs.expand_path(src, recursive=True)
with patch.object(localfs, "expand_path", return_value=current_files + [dne]):
with pytest.raises(FileNotFoundError):
localfs.copy(src, dest2, recursive=True, on_error="raise")
localfs.copy(src + "/", dest2, recursive=True, on_error="raise")

localfs.copy(src, dest2, recursive=True)
localfs.copy(src + "/", dest2, recursive=True)
assert sorted(localfs.ls(dest2)) == [
make_path_posix(os.path.join(dest2, "afile1")),
make_path_posix(os.path.join(dest2, "afile2")),
Expand Down Expand Up @@ -864,3 +865,66 @@ def test_du(tmpdir):
# Size of file only.
assert fs.du(file) == 4
assert fs.du(file, withdirs=True) == 4


@pytest.mark.parametrize("funcname", ["cp", "get", "put"])
def test_cp_get_put_directory_recursive(tmpdir, funcname):
# https://github.com/fsspec/filesystem_spec/issues/1062
# Recursive cp/get/put of source directory into non-existent target directory.
fs = LocalFileSystem()
src = os.path.join(str(tmpdir), "src")
fs.mkdir(src)
fs.touch(os.path.join(src, "file"))

target = os.path.join(str(tmpdir), "target")

if funcname == "cp":
func = fs.cp
elif funcname == "get":
func = fs.get
elif funcname == "put":
func = fs.put

# cp/get/put without slash
assert not fs.exists(target)
for loop in range(2):
func(src, target, recursive=True)
assert fs.isdir(target)

if loop == 0:
assert fs.find(target) == [make_path_posix(os.path.join(target, "file"))]
else:
assert sorted(fs.find(target)) == [
make_path_posix(os.path.join(target, "file")),
make_path_posix(os.path.join(target, "src", "file")),
]

fs.rm(target, recursive=True)

# cp/get/put with slash
assert not fs.exists(target)
for loop in range(2):
func(src + "/", target, recursive=True)
assert fs.isdir(target)
assert fs.find(target) == [make_path_posix(os.path.join(target, "file"))]


def test_cp_two_files(tmpdir):
fs = LocalFileSystem(auto_mkdir=True)
src = os.path.join(str(tmpdir), "src")
file0 = os.path.join(src, "file0")
file1 = os.path.join(src, "file1")
fs.mkdir(src)
fs.touch(file0)
fs.touch(file1)

target = os.path.join(str(tmpdir), "target")
assert not fs.exists(target)

fs.cp([file0, file1], target)

assert fs.isdir(target)
assert sorted(fs.find(target)) == [
make_path_posix(os.path.join(target, "file0")),
make_path_posix(os.path.join(target, "file1")),
]
133 changes: 132 additions & 1 deletion fsspec/implementations/tests/test_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

import pytest

from fsspec.implementations.local import LocalFileSystem, make_path_posix


def test_1(m):
m.touch("/somefile") # NB: is found with or without initial /
Expand All @@ -26,9 +28,11 @@ def test_put_single(m, tmpdir):
os.mkdir(fn)
open(os.path.join(fn, "abc"), "w").write("text")
m.put(fn, "/test") # no-op, no files
assert m.isdir("/test")
assert not m.exists("/test/abc")
assert not m.exists("/test/dir")
m.put(fn + "/", "/test", recursive=True)
m.put(fn, "/test", recursive=True)
assert m.isdir("/test/dir")
assert m.cat("/test/dir/abc") == b"text"


Expand Down Expand Up @@ -159,3 +163,130 @@ def test_remove_all(m):
m.touch("afile")
m.rm("/", recursive=True)
assert not m.ls("/")


def test_cp_directory_recursive(m):
# https://github.com/fsspec/filesystem_spec/issues/1062
# Recursive cp/get/put of source directory into non-existent target directory.
src = "/src"
src_file = src + "/file"
m.mkdir(src)
m.touch(src_file)

target = "/target"

# cp without slash
assert not m.exists(target)
for loop in range(2):
m.cp(src, target, recursive=True)
assert m.isdir(target)

if loop == 0:
correct = [target + "/file"]
assert m.find(target) == correct
else:
correct = [target + "/file", target + "/src/file"]
assert sorted(m.find(target)) == correct

m.rm(target, recursive=True)

# cp with slash
assert not m.exists(target)
for loop in range(2):
m.cp(src + "/", target, recursive=True)
assert m.isdir(target)
correct = [target + "/file"]
assert m.find(target) == correct


def test_get_directory_recursive(m, tmpdir):
# https://github.com/fsspec/filesystem_spec/issues/1062
# Recursive cp/get/put of source directory into non-existent target directory.
src = "/src"
src_file = src + "/file"
m.mkdir(src)
m.touch(src_file)

target = os.path.join(tmpdir, "target")
target_fs = LocalFileSystem()

# get without slash
assert not target_fs.exists(target)
for loop in range(2):
m.get(src, target, recursive=True)
assert target_fs.isdir(target)

if loop == 0:
correct = [make_path_posix(os.path.join(target, "file"))]
assert target_fs.find(target) == correct
else:
correct = [
make_path_posix(os.path.join(target, "file")),
make_path_posix(os.path.join(target, "src", "file")),
]
assert sorted(target_fs.find(target)) == correct

target_fs.rm(target, recursive=True)

# get with slash
assert not target_fs.exists(target)
for loop in range(2):
m.get(src + "/", target, recursive=True)
assert target_fs.isdir(target)
correct = [make_path_posix(os.path.join(target, "file"))]
assert target_fs.find(target) == correct


def test_put_directory_recursive(m, tmpdir):
# https://github.com/fsspec/filesystem_spec/issues/1062
# Recursive cp/get/put of source directory into non-existent target directory.
src = os.path.join(tmpdir, "src")
src_file = os.path.join(src, "file")
source_fs = LocalFileSystem()
source_fs.mkdir(src)
source_fs.touch(src_file)

target = "/target"

# put without slash
assert not m.exists(target)
for loop in range(2):
m.put(src, target, recursive=True)
assert m.isdir(target)

if loop == 0:
correct = [target + "/file"]
assert m.find(target) == correct
else:
correct = [target + "/file", target + "/src/file"]
assert sorted(m.find(target)) == correct

m.rm(target, recursive=True)

# put with slash
assert not m.exists(target)
for loop in range(2):
m.put(src + "/", target, recursive=True)
assert m.isdir(target)
correct = [target + "/file"]
assert m.find(target) == correct


def test_cp_two_files(m):
src = "/src"
file0 = src + "/file0"
file1 = src + "/file1"
m.mkdir(src)
m.touch(file0)
m.touch(file1)

target = "/target"
assert not m.exists(target)

m.cp([file0, file1], target)

assert m.isdir(target)
assert sorted(m.find(target)) == [
"/target/file0",
"/target/file1",
]
23 changes: 18 additions & 5 deletions fsspec/spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -878,12 +878,18 @@ def get(self, rpath, lpath, recursive=False, callback=_DEFAULT_CALLBACK, **kwarg
Calls get_file for each source.
"""
from .implementations.local import make_path_posix
from .implementations.local import LocalFileSystem, make_path_posix

if isinstance(lpath, str):
lpath = make_path_posix(lpath)
rpaths = self.expand_path(rpath, recursive=recursive)
lpaths = other_paths(rpaths, lpath)
isdir = isinstance(lpath, str) and LocalFileSystem().isdir(lpath)
lpaths = other_paths(
rpaths,
lpath,
exists=isdir and isinstance(rpath, str) and not rpath.endswith("/"),
is_dir=isdir,
)

callback.set_size(len(lpaths))
for lpath, rpath in callback.wrap(zip(lpaths, rpaths)):
Expand Down Expand Up @@ -934,8 +940,8 @@ def put(self, lpath, rpath, recursive=False, callback=_DEFAULT_CALLBACK, **kwarg
rpaths = other_paths(
lpaths,
rpath,
exists=isdir,
is_dir=isdir and len(lpaths) == 1,
exists=isdir and isinstance(lpath, str) and not lpath.endswith("/"),
is_dir=isdir,
)

callback.set_size(len(rpaths))
Expand Down Expand Up @@ -971,7 +977,14 @@ def copy(self, path1, path2, recursive=False, on_error=None, **kwargs):
on_error = "raise"

paths = self.expand_path(path1, recursive=recursive)
path2 = other_paths(paths, path2)
isdir = isinstance(path2, str) and self.isdir(path2)
path2 = other_paths(
paths,
path2,
exists=isdir and isinstance(path1, str) and not path1.endswith("/"),
is_dir=isdir,
)

for p1, p2 in zip(paths, path2):
try:
self.cp_file(p1, p2, **kwargs)
Expand Down
10 changes: 9 additions & 1 deletion fsspec/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,12 +162,20 @@ def test_recursive_get_put(tmpdir, m):

fs.put(str(tmpdir), "test", recursive=True)

# get to directory with slash
d = tempfile.mkdtemp()
fs.get("test", d, recursive=True)
fs.get("test/", d, recursive=True)
for file in ["one", "two", "nest/other"]:
with open(f"{d}/{file}", "rb") as f:
f.read() == b"data"

# get to directory without slash
d = tempfile.mkdtemp()
fs.get("test", d, recursive=True)
for file in ["test/one", "test/two", "test/nest/other"]:
with open(f"{d}/{file}", "rb") as f:
f.read() == b"data"


def test_pipe_cat(m):
fs = MemoryFileSystem()
Expand Down

0 comments on commit db79115

Please sign in to comment.