Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 43 additions & 3 deletions fsspec/implementations/cached.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import logging
import os
import hashlib
from shutil import move
from shutil import move, rmtree
import tempfile
import inspect
from fsspec import AbstractFileSystem, filesystem
Expand Down Expand Up @@ -171,14 +171,17 @@ def _check_cache(self):

def _check_file(self, path):
"""Is path in cache and still valid"""
path = self._strip_protocol(path)
self._check_cache()
if not path.startswith(self.target_protocol):
store_path = self.target_protocol + "://" + path
path = self.fs._strip_protocol(store_path)
else:
store_path = path
for storage, cache in zip(self.storage, self.cached_files):
if path not in cache:
if store_path not in cache:
continue
detail = cache[path].copy()
detail = cache[store_path].copy()
if self.check_files:
if detail["uid"] != self.fs.ukey(path):
continue
Expand All @@ -190,6 +193,41 @@ def _check_file(self, path):
return detail, fn
return False, None

def clear_cache(self):
"""Remove all files and metadat from the cache

In the case of multiple cache locations, this clears only the last one,
which is assumed to be the read/write one.
"""
rmtree(self.storage[-1])
self.load_cache()

def pop_from_cache(self, path):
"""Remove cached version of given file

Deletes local copy of the given (remote) path. If it is found in a cache
location which is not the last, it is assumed to be read-only, and
raises PermissionError
"""
path = self._strip_protocol(path)
if not path.startswith(self.target_protocol):
store_path = self.target_protocol + "://" + path
path = self.fs._strip_protocol(store_path)
else:
store_path = path
_, fn = self._check_file(path)
if fn is None:
return
if fn.startswith(self.storage[-1]):
# is in in writable cache
os.remove(fn)
self.cached_files[-1].pop(store_path)
self.save_cache()
else:
raise PermissionError(
"Can only delete cached file in last, writable cache location"
)

def _open(
self,
path,
Expand Down Expand Up @@ -311,6 +349,8 @@ def __getattribute__(self, item):
"head",
"_check_file",
"_check_cache",
"clear_cache",
"pop_from_cache",
]:
# all the methods defined in this class. Note `open` here, since
# it calls `_open`, but is actually in superclass
Expand Down
56 changes: 56 additions & 0 deletions fsspec/implementations/tests/test_cached.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,62 @@ def test_write():
assert open(fn, "rb").read() == b"hello"


def test_clear():
import tempfile

origin = tempfile.mkdtemp()
cache1 = tempfile.mkdtemp()
data = b"test data"
f1 = os.path.join(origin, "afile")
with open(f1, "wb") as f:
f.write(data)

# populates first cache
fs = fsspec.filesystem("filecache", target_protocol="file", cache_storage=cache1)
assert fs.cat(f1) == data

assert "cache" in os.listdir(cache1)
assert len(os.listdir(cache1)) == 2
assert fs._check_file(f1)

fs.clear_cache()
assert not fs._check_file(f1)[0]
assert len(os.listdir(cache1)) < 2


def test_pop():
import tempfile

origin = tempfile.mkdtemp()
cache1 = tempfile.mkdtemp()
cache2 = tempfile.mkdtemp()
data = b"test data"
f1 = os.path.join(origin, "afile")
f2 = os.path.join(origin, "bfile")
with open(f1, "wb") as f:
f.write(data)
with open(f2, "wb") as f:
f.write(data)

# populates first cache
fs = fsspec.filesystem("filecache", target_protocol="file", cache_storage=cache1)
fs.cat(f1)

# populates last cache if file not found in first cache
fs = fsspec.filesystem(
"filecache", target_protocol="file", cache_storage=[cache1, cache2]
)
assert fs.cat(f2) == data
assert len(os.listdir(cache2)) == 2
assert fs._check_file(f1)
with pytest.raises(PermissionError):
fs.pop_from_cache(f1)
fs.pop_from_cache(f2)
assert len(os.listdir(cache2)) == 1
assert fs._check_file(f2)[0] is False
assert fs._check_file(f1)


def test_write_pickle_context():
tmp = str(tempfile.mkdtemp())
fn = tmp + "afile"
Expand Down
3 changes: 3 additions & 0 deletions fsspec/implementations/tests/test_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,13 +302,16 @@ def test_globfind_dirs(tmpdir):


def test_touch(tmpdir):
import time

fn = tmpdir + "/in/file"
fs = fsspec.filesystem("file", auto_mkdir=False)
with pytest.raises(OSError):
fs.touch(fn)
fs = fsspec.filesystem("file", auto_mkdir=True)
fs.touch(fn)
info = fs.info(fn)
time.sleep(0.2)
fs.touch(fn)
info2 = fs.info(fn)
if not WIN:
Expand Down