Skip to content

Commit 5cbb99c

Browse files
authored
Merge pull request #328 from martindurant/cache_clear
start cache clear ops
2 parents df53411 + 65e8f22 commit 5cbb99c

File tree

3 files changed

+102
-3
lines changed

3 files changed

+102
-3
lines changed

fsspec/implementations/cached.py

Lines changed: 43 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import logging
44
import os
55
import hashlib
6-
from shutil import move
6+
from shutil import move, rmtree
77
import tempfile
88
import inspect
99
from fsspec import AbstractFileSystem, filesystem
@@ -171,14 +171,17 @@ def _check_cache(self):
171171

172172
def _check_file(self, path):
173173
"""Is path in cache and still valid"""
174+
path = self._strip_protocol(path)
174175
self._check_cache()
175176
if not path.startswith(self.target_protocol):
176177
store_path = self.target_protocol + "://" + path
177178
path = self.fs._strip_protocol(store_path)
179+
else:
180+
store_path = path
178181
for storage, cache in zip(self.storage, self.cached_files):
179-
if path not in cache:
182+
if store_path not in cache:
180183
continue
181-
detail = cache[path].copy()
184+
detail = cache[store_path].copy()
182185
if self.check_files:
183186
if detail["uid"] != self.fs.ukey(path):
184187
continue
@@ -190,6 +193,41 @@ def _check_file(self, path):
190193
return detail, fn
191194
return False, None
192195

196+
def clear_cache(self):
197+
"""Remove all files and metadat from the cache
198+
199+
In the case of multiple cache locations, this clears only the last one,
200+
which is assumed to be the read/write one.
201+
"""
202+
rmtree(self.storage[-1])
203+
self.load_cache()
204+
205+
def pop_from_cache(self, path):
206+
"""Remove cached version of given file
207+
208+
Deletes local copy of the given (remote) path. If it is found in a cache
209+
location which is not the last, it is assumed to be read-only, and
210+
raises PermissionError
211+
"""
212+
path = self._strip_protocol(path)
213+
if not path.startswith(self.target_protocol):
214+
store_path = self.target_protocol + "://" + path
215+
path = self.fs._strip_protocol(store_path)
216+
else:
217+
store_path = path
218+
_, fn = self._check_file(path)
219+
if fn is None:
220+
return
221+
if fn.startswith(self.storage[-1]):
222+
# is in in writable cache
223+
os.remove(fn)
224+
self.cached_files[-1].pop(store_path)
225+
self.save_cache()
226+
else:
227+
raise PermissionError(
228+
"Can only delete cached file in last, writable cache location"
229+
)
230+
193231
def _open(
194232
self,
195233
path,
@@ -311,6 +349,8 @@ def __getattribute__(self, item):
311349
"head",
312350
"_check_file",
313351
"_check_cache",
352+
"clear_cache",
353+
"pop_from_cache",
314354
]:
315355
# all the methods defined in this class. Note `open` here, since
316356
# it calls `_open`, but is actually in superclass

fsspec/implementations/tests/test_cached.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,62 @@ def test_write():
9191
assert open(fn, "rb").read() == b"hello"
9292

9393

94+
def test_clear():
95+
import tempfile
96+
97+
origin = tempfile.mkdtemp()
98+
cache1 = tempfile.mkdtemp()
99+
data = b"test data"
100+
f1 = os.path.join(origin, "afile")
101+
with open(f1, "wb") as f:
102+
f.write(data)
103+
104+
# populates first cache
105+
fs = fsspec.filesystem("filecache", target_protocol="file", cache_storage=cache1)
106+
assert fs.cat(f1) == data
107+
108+
assert "cache" in os.listdir(cache1)
109+
assert len(os.listdir(cache1)) == 2
110+
assert fs._check_file(f1)
111+
112+
fs.clear_cache()
113+
assert not fs._check_file(f1)[0]
114+
assert len(os.listdir(cache1)) < 2
115+
116+
117+
def test_pop():
118+
import tempfile
119+
120+
origin = tempfile.mkdtemp()
121+
cache1 = tempfile.mkdtemp()
122+
cache2 = tempfile.mkdtemp()
123+
data = b"test data"
124+
f1 = os.path.join(origin, "afile")
125+
f2 = os.path.join(origin, "bfile")
126+
with open(f1, "wb") as f:
127+
f.write(data)
128+
with open(f2, "wb") as f:
129+
f.write(data)
130+
131+
# populates first cache
132+
fs = fsspec.filesystem("filecache", target_protocol="file", cache_storage=cache1)
133+
fs.cat(f1)
134+
135+
# populates last cache if file not found in first cache
136+
fs = fsspec.filesystem(
137+
"filecache", target_protocol="file", cache_storage=[cache1, cache2]
138+
)
139+
assert fs.cat(f2) == data
140+
assert len(os.listdir(cache2)) == 2
141+
assert fs._check_file(f1)
142+
with pytest.raises(PermissionError):
143+
fs.pop_from_cache(f1)
144+
fs.pop_from_cache(f2)
145+
assert len(os.listdir(cache2)) == 1
146+
assert fs._check_file(f2)[0] is False
147+
assert fs._check_file(f1)
148+
149+
94150
def test_write_pickle_context():
95151
tmp = str(tempfile.mkdtemp())
96152
fn = tmp + "afile"

fsspec/implementations/tests/test_local.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -302,13 +302,16 @@ def test_globfind_dirs(tmpdir):
302302

303303

304304
def test_touch(tmpdir):
305+
import time
306+
305307
fn = tmpdir + "/in/file"
306308
fs = fsspec.filesystem("file", auto_mkdir=False)
307309
with pytest.raises(OSError):
308310
fs.touch(fn)
309311
fs = fsspec.filesystem("file", auto_mkdir=True)
310312
fs.touch(fn)
311313
info = fs.info(fn)
314+
time.sleep(0.2)
312315
fs.touch(fn)
313316
info2 = fs.info(fn)
314317
if not WIN:

0 commit comments

Comments
 (0)