diff --git a/fsspec/implementations/cached.py b/fsspec/implementations/cached.py index 1cb7451c0..5424c1d41 100644 --- a/fsspec/implementations/cached.py +++ b/fsspec/implementations/cached.py @@ -3,7 +3,7 @@ import logging import os import hashlib -from shutil import move +from shutil import move, rmtree import tempfile import inspect from fsspec import AbstractFileSystem, filesystem @@ -171,14 +171,17 @@ def _check_cache(self): def _check_file(self, path): """Is path in cache and still valid""" + path = self._strip_protocol(path) self._check_cache() if not path.startswith(self.target_protocol): store_path = self.target_protocol + "://" + path path = self.fs._strip_protocol(store_path) + else: + store_path = path for storage, cache in zip(self.storage, self.cached_files): - if path not in cache: + if store_path not in cache: continue - detail = cache[path].copy() + detail = cache[store_path].copy() if self.check_files: if detail["uid"] != self.fs.ukey(path): continue @@ -190,6 +193,41 @@ def _check_file(self, path): return detail, fn return False, None + def clear_cache(self): + """Remove all files and metadat from the cache + + In the case of multiple cache locations, this clears only the last one, + which is assumed to be the read/write one. + """ + rmtree(self.storage[-1]) + self.load_cache() + + def pop_from_cache(self, path): + """Remove cached version of given file + + Deletes local copy of the given (remote) path. If it is found in a cache + location which is not the last, it is assumed to be read-only, and + raises PermissionError + """ + path = self._strip_protocol(path) + if not path.startswith(self.target_protocol): + store_path = self.target_protocol + "://" + path + path = self.fs._strip_protocol(store_path) + else: + store_path = path + _, fn = self._check_file(path) + if fn is None: + return + if fn.startswith(self.storage[-1]): + # is in in writable cache + os.remove(fn) + self.cached_files[-1].pop(store_path) + self.save_cache() + else: + raise PermissionError( + "Can only delete cached file in last, writable cache location" + ) + def _open( self, path, @@ -311,6 +349,8 @@ def __getattribute__(self, item): "head", "_check_file", "_check_cache", + "clear_cache", + "pop_from_cache", ]: # all the methods defined in this class. Note `open` here, since # it calls `_open`, but is actually in superclass diff --git a/fsspec/implementations/tests/test_cached.py b/fsspec/implementations/tests/test_cached.py index b860ff618..c6f0a2968 100644 --- a/fsspec/implementations/tests/test_cached.py +++ b/fsspec/implementations/tests/test_cached.py @@ -91,6 +91,62 @@ def test_write(): assert open(fn, "rb").read() == b"hello" +def test_clear(): + import tempfile + + origin = tempfile.mkdtemp() + cache1 = tempfile.mkdtemp() + data = b"test data" + f1 = os.path.join(origin, "afile") + with open(f1, "wb") as f: + f.write(data) + + # populates first cache + fs = fsspec.filesystem("filecache", target_protocol="file", cache_storage=cache1) + assert fs.cat(f1) == data + + assert "cache" in os.listdir(cache1) + assert len(os.listdir(cache1)) == 2 + assert fs._check_file(f1) + + fs.clear_cache() + assert not fs._check_file(f1)[0] + assert len(os.listdir(cache1)) < 2 + + +def test_pop(): + import tempfile + + origin = tempfile.mkdtemp() + cache1 = tempfile.mkdtemp() + cache2 = tempfile.mkdtemp() + data = b"test data" + f1 = os.path.join(origin, "afile") + f2 = os.path.join(origin, "bfile") + with open(f1, "wb") as f: + f.write(data) + with open(f2, "wb") as f: + f.write(data) + + # populates first cache + fs = fsspec.filesystem("filecache", target_protocol="file", cache_storage=cache1) + fs.cat(f1) + + # populates last cache if file not found in first cache + fs = fsspec.filesystem( + "filecache", target_protocol="file", cache_storage=[cache1, cache2] + ) + assert fs.cat(f2) == data + assert len(os.listdir(cache2)) == 2 + assert fs._check_file(f1) + with pytest.raises(PermissionError): + fs.pop_from_cache(f1) + fs.pop_from_cache(f2) + assert len(os.listdir(cache2)) == 1 + assert fs._check_file(f2)[0] is False + assert fs._check_file(f1) + + def test_write_pickle_context(): tmp = str(tempfile.mkdtemp()) fn = tmp + "afile" diff --git a/fsspec/implementations/tests/test_local.py b/fsspec/implementations/tests/test_local.py index 3239d9b96..f33c9bfb6 100644 --- a/fsspec/implementations/tests/test_local.py +++ b/fsspec/implementations/tests/test_local.py @@ -302,6 +302,8 @@ def test_globfind_dirs(tmpdir): def test_touch(tmpdir): + import time + fn = tmpdir + "/in/file" fs = fsspec.filesystem("file", auto_mkdir=False) with pytest.raises(OSError): @@ -309,6 +311,7 @@ def test_touch(tmpdir): fs = fsspec.filesystem("file", auto_mkdir=True) fs.touch(fn) info = fs.info(fn) + time.sleep(0.2) fs.touch(fn) info2 = fs.info(fn) if not WIN: