Skip to content

Commit

Permalink
Configurable Serialization (#63)
Browse files Browse the repository at this point in the history
* add file system serializer

* add filesystem serializer

* add redis serializer

* add simple serializer

* move serializers into separate module

* extract common code to base serializer class

* filter redis deprecation warning

* pass serializer into cache constructor

* add tests for custom serializers

* fix wrong docstring descriptions

* add changelog

* remove docstring repetition

* fix type mismatch

* class variable serializer

* add changelog
  • Loading branch information
northernSage authored Nov 8, 2021
1 parent 915a038 commit 75b5f2f
Show file tree
Hide file tree
Showing 11 changed files with 275 additions and 53 deletions.
8 changes: 8 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
Version 0.5.0
-------------

Unreleased

- Cache types now have configurable serializers. :pr:`63`


Version 0.4.1
-------------

Expand Down
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ testpaths = tests
filterwarnings =
error
default::DeprecationWarning:cachelib.uwsgi
default::DeprecationWarning:cachelib.redis

[coverage:run]
branch = True
Expand Down
23 changes: 13 additions & 10 deletions src/cachelib/file.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
import errno
import logging
import os
import pickle
import tempfile
import typing as _t
from hashlib import md5
from pathlib import Path
from time import time

from cachelib.base import BaseCache
from cachelib.serializers import FileSystemSerializer


class FileSystemCache(BaseCache):
Expand All @@ -32,6 +32,8 @@ class FileSystemCache(BaseCache):
#: keep amount of files in a cache element
_fs_count_file = "__wz_cache_count"

serializer = FileSystemSerializer()

def __init__(
self,
cache_dir: str,
Expand Down Expand Up @@ -96,7 +98,8 @@ def _remove_expired(self, now: float) -> None:
for fname in self._list_dir():
try:
with open(fname, "rb") as f:
expires = pickle.load(f)
expires = self.serializer.load(f)
print(expires)

This comment has been minimized.

Copy link
@mgorny

mgorny Jan 1, 2022

Isn't this leftover print-debug?

This comment has been minimized.

Copy link
@northernSage

northernSage Jan 1, 2022

Author Member

Oops, yeah that wasn't supposed to be there. Luckily it won't break anything, I'll release a patch to remove it soon. Thanks for letting me know.

if expires != 0 and expires < now:
os.remove(fname)
self._update_count(delta=-1)
Expand All @@ -114,7 +117,7 @@ def _remove_older(self) -> bool:
for fname in self._list_dir():
try:
with open(fname, "rb") as f:
exp_fname_tuples.append((pickle.load(f), fname))
exp_fname_tuples.append((self.serializer.load(f), fname))
except FileNotFoundError:
pass
except (OSError, EOFError):
Expand Down Expand Up @@ -181,12 +184,12 @@ def get(self, key: str) -> _t.Any:
filename = self._get_filename(key)
try:
with open(filename, "rb") as f:
pickle_time = pickle.load(f)
pickle_time = self.serializer.load(f)
if pickle_time == 0 or pickle_time >= time():
return pickle.load(f)
return self.serializer.load(f)
except FileNotFoundError:
pass
except (OSError, EOFError, pickle.PickleError):
except (OSError, EOFError):
logging.warning(
"Exception raised while handling cache file '%s'",
filename,
Expand Down Expand Up @@ -223,8 +226,8 @@ def set(
suffix=self._fs_transaction_suffix, dir=self._path
)
with os.fdopen(fd, "wb") as f:
pickle.dump(timeout, f, 1)
pickle.dump(value, f, pickle.HIGHEST_PROTOCOL)
self.serializer.dump(timeout, f) # this returns bool
self.serializer.dump(value, f)
os.replace(tmp, filename)
os.chmod(filename, self._mode)
fsize = Path(filename).stat().st_size
Expand Down Expand Up @@ -259,14 +262,14 @@ def has(self, key: str) -> bool:
filename = self._get_filename(key)
try:
with open(filename, "rb") as f:
pickle_time = pickle.load(f)
pickle_time = self.serializer.load(f)
if pickle_time == 0 or pickle_time >= time():
return True
else:
return False
except FileNotFoundError: # if there is no file there is no key
return False
except (OSError, EOFError, pickle.PickleError):
except (OSError, EOFError):
logging.warning(
"Exception raised while handling cache file '%s'",
filename,
Expand Down
44 changes: 20 additions & 24 deletions src/cachelib/redis.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import pickle
import typing as _t
import warnings

from cachelib.base import BaseCache
from cachelib.serializers import RedisSerializer


class RedisCache(BaseCache):
Expand All @@ -26,6 +27,8 @@ class RedisCache(BaseCache):
Any additional keyword arguments will be passed to ``redis.Redis``.
"""

serializer = RedisSerializer()

def __init__(
self,
host: _t.Any = "localhost",
Expand Down Expand Up @@ -60,29 +63,22 @@ def _normalize_timeout(self, timeout: _t.Optional[int]) -> int:
return timeout

def dump_object(self, value: _t.Any) -> bytes:
"""Dumps an object into a string for redis. By default it serializes
integers as regular string and pickle dumps everything else.
"""
if isinstance(type(value), int):
return str(value).encode("ascii")
return b"!" + pickle.dumps(value)

def load_object(self, value: _t.Optional[bytes]) -> _t.Any:
"""The reversal of :meth:`dump_object`. This might be called with
None.
"""
if value is None:
return None
if value.startswith(b"!"):
try:
return pickle.loads(value[1:])
except pickle.PickleError:
return None
try:
return int(value)
except ValueError:
# before 0.8 we did not have serialization. Still support that.
return value
warnings.warn(
"'dump_object' is deprecated and will be removed in the future."
"This is a proxy call to 'RedisCache.serializer.dumps'",
DeprecationWarning,
stacklevel=2,
)
return self.serializer.dumps(value)

def load_object(self, value: _t.Any) -> _t.Any:
warnings.warn(
"'load_object' is deprecated and will be removed in the future."
"This is a proxy call to 'RedisCache.serializer.loads'",
DeprecationWarning,
stacklevel=2,
)
return self.serializer.loads(value)

def get(self, key: str) -> _t.Any:
return self.load_object(self._client.get(self.key_prefix + key))
Expand Down
105 changes: 105 additions & 0 deletions src/cachelib/serializers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import logging
import pickle
import typing as _t


class BaseSerializer:
"""This is the base interface for all default serializers.
BaseSerializer.load and BaseSerializer.dump will
default to pickle.load and pickle.dump. This is currently
used only by FileSystemCache which dumps/loads to/from a file stream.
"""

def _warn(self, e: pickle.PickleError) -> None:
logging.warning(
f"An exception has been raised during a pickling operation: {e}"
)

def dump(
self, value: int, f: _t.IO, protocol: int = pickle.HIGHEST_PROTOCOL
) -> None:
try:
pickle.dump(value, f, protocol)
except (pickle.PickleError, pickle.PicklingError) as e:
self._warn(e)

def load(self, f: _t.BinaryIO) -> _t.Any:
try:
data = pickle.load(f)
except pickle.PickleError as e:
self._warn(e)
return None
else:
return data

"""BaseSerializer.loads and BaseSerializer.dumps
work on top of pickle.loads and pickle.dumps. Dumping/loading
strings and byte strings is the default for most cache types.
"""

def dumps(self, value: _t.Any, protocol: int = pickle.HIGHEST_PROTOCOL) -> bytes:
try:
serialized = pickle.dumps(value, protocol)
except (pickle.PickleError, pickle.PicklingError) as e:
self._warn(e)
return serialized

def loads(self, bvalue: bytes) -> _t.Any:
try:
data = pickle.loads(bvalue)
except pickle.PickleError as e:
self._warn(e)
return None
else:
return data


"""Default serializers for each cache type.
The following classes can be used to further customize
serialiation behaviour. Alternatively, any serializer can be
overriden in order to use a custom serializer with a different
strategy altogether.
"""


class UWSGISerializer(BaseSerializer):
"""Default serializer for UWSGICache."""


class SimpleSerializer(BaseSerializer):
"""Default serializer for SimpleCache."""


class FileSystemSerializer(BaseSerializer):
"""Default serializer for FileSystemCache."""


class RedisSerializer(BaseSerializer):
"""Default serializer for RedisCache."""

def dumps(self, value: _t.Any, protocol: int = pickle.HIGHEST_PROTOCOL) -> bytes:
"""Dumps an object into a string for redis. By default it serializes
integers as regular string and pickle dumps everything else.
"""
if isinstance(type(value), int):
return str(value).encode("ascii")
return b"!" + pickle.dumps(value, protocol)

def loads(self, value: _t.Optional[bytes]) -> _t.Any:
"""The reversal of :meth:`dump_object`. This might be called with
None.
"""
if value is None:
return None
if value.startswith(b"!"):
try:
return pickle.loads(value[1:])
except pickle.PickleError:
return None
try:
return int(value)
except ValueError:
# before 0.8 we did not have serialization. Still support that.
return value
18 changes: 12 additions & 6 deletions src/cachelib/simple.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import pickle
import typing as _t
from time import time

from cachelib.base import BaseCache
from cachelib.serializers import SimpleSerializer


class SimpleCache(BaseCache):
Expand All @@ -19,7 +19,13 @@ class SimpleCache(BaseCache):
0 indicates that the cache never expires.
"""

def __init__(self, threshold: int = 500, default_timeout: int = 300):
serializer = SimpleSerializer()

def __init__(
self,
threshold: int = 500,
default_timeout: int = 300,
):
BaseCache.__init__(self, default_timeout)
self._cache: _t.Dict[str, _t.Any] = {}
self._threshold = threshold or 500 # threshold = 0
Expand Down Expand Up @@ -62,22 +68,22 @@ def get(self, key: str) -> _t.Any:
try:
expires, value = self._cache[key]
if expires == 0 or expires > time():
return pickle.loads(value)
except (KeyError, pickle.PickleError):
return self.serializer.loads(value)
except KeyError:
return None

def set(
self, key: str, value: _t.Any, timeout: _t.Optional[int] = None
) -> _t.Optional[bool]:
expires = self._normalize_timeout(timeout)
self._prune()
self._cache[key] = (expires, pickle.dumps(value, pickle.HIGHEST_PROTOCOL))
self._cache[key] = (expires, self.serializer.dumps(value))
return True

def add(self, key: str, value: _t.Any, timeout: _t.Optional[int] = None) -> bool:
expires = self._normalize_timeout(timeout)
self._prune()
item = (expires, pickle.dumps(value, pickle.HIGHEST_PROTOCOL))
item = (expires, self.serializer.dumps(value))
if key in self._cache:
return False
self._cache.setdefault(key, item)
Expand Down
22 changes: 17 additions & 5 deletions src/cachelib/uwsgi.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import pickle
import platform
import typing as _t

from cachelib.base import BaseCache
from cachelib.serializers import UWSGISerializer


class UWSGICache(BaseCache):
Expand All @@ -20,7 +20,13 @@ class UWSGICache(BaseCache):
the cache.
"""

def __init__(self, default_timeout: int = 300, cache: str = ""):
serializer = UWSGISerializer()

def __init__(
self,
default_timeout: int = 300,
cache: str = "",
):
BaseCache.__init__(self, default_timeout)

if platform.python_implementation() == "PyPy":
Expand All @@ -44,7 +50,7 @@ def get(self, key: str) -> _t.Any:
rv = self._uwsgi.cache_get(key, self.cache)
if rv is None:
return
return pickle.loads(rv)
return self.serializer.loads(rv)

def delete(self, key: str) -> bool:
return bool(self._uwsgi.cache_del(key, self.cache))
Expand All @@ -53,14 +59,20 @@ def set(
self, key: str, value: _t.Any, timeout: _t.Optional[int] = None
) -> _t.Optional[bool]:
result = self._uwsgi.cache_update(
key, pickle.dumps(value), self._normalize_timeout(timeout), self.cache
key,
self.serializer.dumps(value),
self._normalize_timeout(timeout),
self.cache,
) # type: bool
return result

def add(self, key: str, value: _t.Any, timeout: _t.Optional[int] = None) -> bool:
return bool(
self._uwsgi.cache_set(
key, pickle.dumps(value), self._normalize_timeout(timeout), self.cache
key,
self.serializer.dumps(value),
self._normalize_timeout(timeout),
self.cache,
)
)

Expand Down
Loading

0 comments on commit 75b5f2f

Please sign in to comment.