From 30116fcabe4b8d92554727a3cdce906b568e9a5d Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Thu, 18 Jun 2020 13:22:23 -0400 Subject: [PATCH 1/4] start cache clear ops --- fsspec/implementations/cached.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/fsspec/implementations/cached.py b/fsspec/implementations/cached.py index 1cb7451c0..7df31217e 100644 --- a/fsspec/implementations/cached.py +++ b/fsspec/implementations/cached.py @@ -3,7 +3,11 @@ import logging import os import hashlib +<<<<<<< Updated upstream from shutil import move +======= +import shutil +>>>>>>> Stashed changes import tempfile import inspect from fsspec import AbstractFileSystem, filesystem @@ -190,6 +194,33 @@ def _check_file(self, path): return detail, fn return False, None + def clear_cache(self): + """Remove all files and metadat from the cache + + In the case of multiple cache locations, this clears only the last one, which is + assumed to be the read/write one. + """ + shutil.rmtree(self.storage[-1]) + self.load_cache() + + def pop_from_cache(self, path): + """Remove cached version of given file + + Deletes local copy of the given (remote) path. If it is found in a cache location + which is not the last, it is assumed to be read-only, and raises PermissionErroe + """ + path = self._strip_protocol(path) + _, fn = self._check_file(path) + if fn is None: + return + if fn.startswith(self.storage[-1]): + # is in in writable cache + os.remove(fn) + self.cached_files[-1].pop(path) + self.save_cache() + else: + raise PermissionError("Can only delete cached file in last, writable cache location") + def _open( self, path, From d08a46a867e24d7160088cf22b9e94ec4f826467 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Thu, 18 Jun 2020 13:33:02 -0400 Subject: [PATCH 2/4] bad stash --- fsspec/implementations/cached.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/fsspec/implementations/cached.py b/fsspec/implementations/cached.py index 7df31217e..e605a3f83 100644 --- a/fsspec/implementations/cached.py +++ b/fsspec/implementations/cached.py @@ -3,11 +3,7 @@ import logging import os import hashlib -<<<<<<< Updated upstream -from shutil import move -======= -import shutil ->>>>>>> Stashed changes +from shutil import move, rmtree import tempfile import inspect from fsspec import AbstractFileSystem, filesystem @@ -200,7 +196,7 @@ def clear_cache(self): In the case of multiple cache locations, this clears only the last one, which is assumed to be the read/write one. """ - shutil.rmtree(self.storage[-1]) + rmtree(self.storage[-1]) self.load_cache() def pop_from_cache(self, path): @@ -219,7 +215,9 @@ def pop_from_cache(self, path): self.cached_files[-1].pop(path) self.save_cache() else: - raise PermissionError("Can only delete cached file in last, writable cache location") + raise PermissionError( + "Can only delete cached file in last, writable cache location" + ) def _open( self, From dab25a0a3b791b1597f044469937022e784611a3 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Thu, 18 Jun 2020 13:38:55 -0400 Subject: [PATCH 3/4] flake --- fsspec/implementations/cached.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fsspec/implementations/cached.py b/fsspec/implementations/cached.py index e605a3f83..e1a336b6a 100644 --- a/fsspec/implementations/cached.py +++ b/fsspec/implementations/cached.py @@ -193,8 +193,8 @@ def _check_file(self, path): def clear_cache(self): """Remove all files and metadat from the cache - In the case of multiple cache locations, this clears only the last one, which is - assumed to be the read/write one. + In the case of multiple cache locations, this clears only the last one, + which is assumed to be the read/write one. """ rmtree(self.storage[-1]) self.load_cache() @@ -202,8 +202,9 @@ def clear_cache(self): def pop_from_cache(self, path): """Remove cached version of given file - Deletes local copy of the given (remote) path. If it is found in a cache location - which is not the last, it is assumed to be read-only, and raises PermissionErroe + Deletes local copy of the given (remote) path. If it is found in a cache + location which is not the last, it is assumed to be read-only, and + raises PermissionError """ path = self._strip_protocol(path) _, fn = self._check_file(path) From 65e8f22c3896edcae7d71b27c70ae63b46f1d1c2 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Thu, 18 Jun 2020 14:09:58 -0400 Subject: [PATCH 4/4] add tests --- fsspec/implementations/cached.py | 16 ++++-- fsspec/implementations/tests/test_cached.py | 56 +++++++++++++++++++++ fsspec/implementations/tests/test_local.py | 3 ++ 3 files changed, 72 insertions(+), 3 deletions(-) diff --git a/fsspec/implementations/cached.py b/fsspec/implementations/cached.py index e1a336b6a..5424c1d41 100644 --- a/fsspec/implementations/cached.py +++ b/fsspec/implementations/cached.py @@ -171,14 +171,17 @@ def _check_cache(self): def _check_file(self, path): """Is path in cache and still valid""" + path = self._strip_protocol(path) self._check_cache() if not path.startswith(self.target_protocol): store_path = self.target_protocol + "://" + path path = self.fs._strip_protocol(store_path) + else: + store_path = path for storage, cache in zip(self.storage, self.cached_files): - if path not in cache: + if store_path not in cache: continue - detail = cache[path].copy() + detail = cache[store_path].copy() if self.check_files: if detail["uid"] != self.fs.ukey(path): continue @@ -207,13 +210,18 @@ def pop_from_cache(self, path): raises PermissionError """ path = self._strip_protocol(path) + if not path.startswith(self.target_protocol): + store_path = self.target_protocol + "://" + path + path = self.fs._strip_protocol(store_path) + else: + store_path = path _, fn = self._check_file(path) if fn is None: return if fn.startswith(self.storage[-1]): # is in in writable cache os.remove(fn) - self.cached_files[-1].pop(path) + self.cached_files[-1].pop(store_path) self.save_cache() else: raise PermissionError( @@ -341,6 +349,8 @@ def __getattribute__(self, item): "head", "_check_file", "_check_cache", + "clear_cache", + "pop_from_cache", ]: # all the methods defined in this class. Note `open` here, since # it calls `_open`, but is actually in superclass diff --git a/fsspec/implementations/tests/test_cached.py b/fsspec/implementations/tests/test_cached.py index b860ff618..c6f0a2968 100644 --- a/fsspec/implementations/tests/test_cached.py +++ b/fsspec/implementations/tests/test_cached.py @@ -91,6 +91,62 @@ def test_write(): assert open(fn, "rb").read() == b"hello" +def test_clear(): + import tempfile + + origin = tempfile.mkdtemp() + cache1 = tempfile.mkdtemp() + data = b"test data" + f1 = os.path.join(origin, "afile") + with open(f1, "wb") as f: + f.write(data) + + # populates first cache + fs = fsspec.filesystem("filecache", target_protocol="file", cache_storage=cache1) + assert fs.cat(f1) == data + + assert "cache" in os.listdir(cache1) + assert len(os.listdir(cache1)) == 2 + assert fs._check_file(f1) + + fs.clear_cache() + assert not fs._check_file(f1)[0] + assert len(os.listdir(cache1)) < 2 + + +def test_pop(): + import tempfile + + origin = tempfile.mkdtemp() + cache1 = tempfile.mkdtemp() + cache2 = tempfile.mkdtemp() + data = b"test data" + f1 = os.path.join(origin, "afile") + f2 = os.path.join(origin, "bfile") + with open(f1, "wb") as f: + f.write(data) + with open(f2, "wb") as f: + f.write(data) + + # populates first cache + fs = fsspec.filesystem("filecache", target_protocol="file", cache_storage=cache1) + fs.cat(f1) + + # populates last cache if file not found in first cache + fs = fsspec.filesystem( + "filecache", target_protocol="file", cache_storage=[cache1, cache2] + ) + assert fs.cat(f2) == data + assert len(os.listdir(cache2)) == 2 + assert fs._check_file(f1) + with pytest.raises(PermissionError): + fs.pop_from_cache(f1) + fs.pop_from_cache(f2) + assert len(os.listdir(cache2)) == 1 + assert fs._check_file(f2)[0] is False + assert fs._check_file(f1) + + def test_write_pickle_context(): tmp = str(tempfile.mkdtemp()) fn = tmp + "afile" diff --git a/fsspec/implementations/tests/test_local.py b/fsspec/implementations/tests/test_local.py index 3239d9b96..f33c9bfb6 100644 --- a/fsspec/implementations/tests/test_local.py +++ b/fsspec/implementations/tests/test_local.py @@ -302,6 +302,8 @@ def test_globfind_dirs(tmpdir): def test_touch(tmpdir): + import time + fn = tmpdir + "/in/file" fs = fsspec.filesystem("file", auto_mkdir=False) with pytest.raises(OSError): @@ -309,6 +311,7 @@ def test_touch(tmpdir): fs = fsspec.filesystem("file", auto_mkdir=True) fs.touch(fn) info = fs.info(fn) + time.sleep(0.2) fs.touch(fn) info2 = fs.info(fn) if not WIN: