diff --git a/.travis.yml b/.travis.yml index 39f23eb..31e869b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,34 +1,43 @@ -language: python - +os: linux dist: xenial -sudo: true +language: python notifications: email: false env: global: + - INSTALL_NUMPY=1 - PY_VENV=$TRAVIS_BUILD_DIR/venv - PYTHON=$PY_VENV/bin/python -matrix: +jobs: include: - python: "3.6" dist: trusty arch: amd64 - python: "3.6" arch: amd64 - - python: "3.6" - arch: arm64 - python: "3.7" arch: amd64 + - python: "3.6" + dist: bionic + arch: arm64 - python: "3.7" + dist: bionic arch: arm64 install: - python -m venv $PY_VENV - - $PYTHON -m pip install -U pip wheel setuptools numpy - - $PYTHON -m pip install -r requirements.txt + - export PIP="$PYTHON -m pip" + # For some reason, pip caching fails on ARM64 + - export PIP_INSTALL="$PIP install --no-cache-dir" + - $PIP_INSTALL -U pip wheel setuptools + - | + if [ "${INSTALL_NUMPY}" = "1" ]; then + $PIP_INSTALL -U numpy + fi + - $PIP_INSTALL -r requirements.txt before_script: - $PYTHON setup.py install diff --git a/BACKPORT-NOTES.txt b/BACKPORT-NOTES.txt index 0b7adc6..d8682c0 100644 --- a/BACKPORT-NOTES.txt +++ b/BACKPORT-NOTES.txt @@ -12,3 +12,4 @@ then: * Same for 3.7 +* Currently, we are syncing with cpython branch=3.8, revision=460eac20a625d5dcef409dadc120a26d272a8013 \ No newline at end of file diff --git a/patch b/patch index 6aa6ddf..d5e9665 100644 --- a/patch +++ b/patch @@ -1,5 +1,5 @@ ---- ../38/Include/picklebufobject.h 2019-11-02 16:20:20.358405065 +0100 -+++ pickle5/picklebufobject.h 2019-11-02 16:30:40.749731365 +0100 +--- ../38/Include/picklebufobject.h 2020-05-10 16:01:50.000000000 -0700 ++++ pickle5/picklebufobject.h 2020-05-18 16:44:04.000000000 -0700 @@ -10,18 +10,18 @@ #ifndef Py_LIMITED_API @@ -23,8 +23,8 @@ #endif /* !Py_LIMITED_API */ ---- ../38/Objects/picklebufobject.c 2019-11-02 16:20:20.834410181 +0100 -+++ pickle5/picklebufobject.c 2019-11-02 16:30:40.749731365 +0100 +--- ../38/Objects/picklebufobject.c 2020-05-10 16:01:50.000000000 -0700 ++++ pickle5/picklebufobject.c 2020-05-18 16:46:09.000000000 -0700 @@ -4,6 +4,8 @@ #include "Python.h" #include @@ -34,8 +34,8 @@ typedef struct { PyObject_HEAD /* The view exported by the original object */ ---- ../38/Modules/_pickle.c 2019-11-02 16:20:20.754409320 +0100 -+++ pickle5/_pickle.c 2019-11-02 16:46:35.809907792 +0100 +--- ../38/Modules/_pickle.c 2020-05-18 14:58:20.000000000 -0700 ++++ pickle5/_pickle.c 2020-05-18 17:05:11.000000000 -0700 @@ -1,11 +1,11 @@ -/* pickle accelerator C extensor: _pickle module. - * @@ -69,7 +69,7 @@ /************************************************************************* A custom hashtable mapping void* to Python ints. This is used by the pickler -@@ -7020,11 +7026,6 @@ +@@ -7066,11 +7072,6 @@ PyObject *global; PyObject *module; @@ -81,7 +81,7 @@ /* Try to map the old names used in Python 2.x to the new ones used in Python 3.x. We do this only with old pickle protocols and when the user has not disabled the feature. */ -@@ -7877,11 +7878,25 @@ +@@ -7923,11 +7924,25 @@ return NULL; } @@ -107,7 +107,7 @@ {NULL, NULL} /* sentinel */ }; -@@ -7965,6 +7980,8 @@ +@@ -8012,6 +8027,8 @@ Py_INCREF(&Unpickler_Type); if (PyModule_AddObject(m, "Unpickler", (PyObject *)&Unpickler_Type) < 0) return NULL; @@ -116,8 +116,8 @@ Py_INCREF(&PyPickleBuffer_Type); if (PyModule_AddObject(m, "PickleBuffer", (PyObject *)&PyPickleBuffer_Type) < 0) ---- ../38/Lib/pickle.py 2019-11-02 16:20:20.482406398 +0100 -+++ pickle5/pickle.py 2019-11-02 16:46:54.906114687 +0100 +--- ../38/Lib/pickle.py 2020-05-18 14:58:20.000000000 -0700 ++++ pickle5/pickle.py 2020-05-18 16:41:15.000000000 -0700 @@ -40,7 +40,7 @@ "Unpickler", "dump", "dumps", "load", "loads"] @@ -155,8 +155,8 @@ PickleError, PicklingError, UnpicklingError, ---- ../38/Lib/pickletools.py 2019-11-02 16:20:20.486406441 +0100 -+++ pickle5/pickletools.py 2019-11-02 16:30:40.753731415 +0100 +--- ../38/Lib/pickletools.py 2020-05-10 16:01:50.000000000 -0700 ++++ pickle5/pickletools.py 2020-05-18 16:41:34.000000000 -0700 @@ -12,10 +12,11 @@ import codecs @@ -170,8 +170,8 @@ __all__ = ['dis', 'genops', 'optimize'] bytes_types = pickle.bytes_types ---- ../38/Lib/test/pickletester.py 2019-11-02 16:20:20.538407000 +0100 -+++ pickle5/test/pickletester.py 2019-11-02 16:42:03.558511125 +0100 +--- ../38/Lib/test/pickletester.py 2020-05-18 14:58:20.000000000 -0700 ++++ pickle5/test/pickletester.py 2020-05-18 16:37:04.000000000 -0700 @@ -5,8 +5,6 @@ import functools import os @@ -191,7 +191,7 @@ requires_32b = unittest.skipUnless(sys.maxsize < 2**32, "test is only meaningful on 32-bit builds") -@@ -1445,12 +1444,11 @@ +@@ -1457,12 +1456,11 @@ # of 1. def dont_test_disassembly(self): from io import StringIO @@ -205,7 +205,7 @@ got = filelike.getvalue() self.assertEqual(expected, got) -@@ -2328,7 +2326,6 @@ +@@ -2340,7 +2338,6 @@ elif frameless_start is not None: self.assertLess(pos - frameless_start, self.FRAME_SIZE_MIN) @@ -213,7 +213,7 @@ def test_framing_many_objects(self): obj = list(range(10**5)) for proto in range(4, pickle.HIGHEST_PROTOCOL + 1): -@@ -2418,7 +2415,6 @@ +@@ -2430,7 +2427,6 @@ count_opcode(pickle.FRAME, pickled)) self.assertEqual(obj, self.loads(some_frames_pickle)) @@ -221,8 +221,8 @@ def test_framed_write_sizes_with_delayed_writer(self): class ChunkAccumulator: """Accumulate pickler output in a list of raw chunks.""" ---- ../38/Lib/test/test_pickle.py 2019-11-02 16:20:20.602407687 +0100 -+++ pickle5/test/test_pickle.py 2019-11-02 16:37:43.523126691 +0100 +--- ../38/Lib/test/test_pickle.py 2020-05-10 16:01:50.000000000 -0700 ++++ pickle5/test/test_pickle.py 2020-05-18 16:37:57.000000000 -0700 @@ -1,7 +1,6 @@ from _compat_pickle import (IMPORT_MAPPING, REVERSE_IMPORT_MAPPING, NAME_MAPPING, REVERSE_NAME_MAPPING) @@ -272,8 +272,8 @@ class CUnpicklerTests(PyUnpicklerTests): unpickler = _pickle.Unpickler ---- ../38/Lib/test/test_picklebuffer.py 2019-11-02 16:20:20.602407687 +0100 -+++ pickle5/test/test_picklebuffer.py 2019-11-02 16:38:22.283628888 +0100 +--- ../38/Lib/test/test_picklebuffer.py 2020-05-10 16:01:50.000000000 -0700 ++++ pickle5/test/test_picklebuffer.py 2020-05-18 16:31:01.000000000 -0700 @@ -4,12 +4,13 @@ """ diff --git a/pickle5/_pickle.c b/pickle5/_pickle.c index de11564..13975e0 100644 --- a/pickle5/_pickle.c +++ b/pickle5/_pickle.c @@ -1377,13 +1377,42 @@ _Unpickler_ReadInto(UnpicklerObject *self, char *buf, Py_ssize_t n) } /* Read from file */ - if (!self->readinto) { + if (!self->read) { + /* We're unpickling memory, this means the input is truncated */ return bad_readline(); } if (_Unpickler_SkipConsumed(self) < 0) { return -1; } + if (!self->readinto) { + /* readinto() not supported on file-like object, fall back to read() + * and copy into destination buffer (bpo-39681) */ + PyObject* len = PyLong_FromSsize_t(n); + if (len == NULL) { + return -1; + } + PyObject* data = _Pickle_FastCall(self->read, len); + if (data == NULL) { + return -1; + } + if (!PyBytes_Check(data)) { + PyErr_Format(PyExc_ValueError, + "read() returned non-bytes object (%R)", + Py_TYPE(data)); + Py_DECREF(data); + return -1; + } + Py_ssize_t read_size = PyBytes_GET_SIZE(data); + if (read_size < n) { + Py_DECREF(data); + return bad_readline(); + } + memcpy(buf, PyBytes_AS_STRING(data), n); + Py_DECREF(data); + return n; + } + /* Call readinto() into user buffer */ PyObject *buf_obj = PyMemoryView_FromMemory(buf, n, PyBUF_WRITE); if (buf_obj == NULL) { @@ -1612,17 +1641,19 @@ _Unpickler_SetInputStream(UnpicklerObject *self, PyObject *file) _Py_IDENTIFIER(readinto); _Py_IDENTIFIER(readline); + /* Optional file methods */ if (_PyObject_LookupAttrId(file, &PyId_peek, &self->peek) < 0) { return -1; } + if (_PyObject_LookupAttrId(file, &PyId_readinto, &self->readinto) < 0) { + return -1; + } (void)_PyObject_LookupAttrId(file, &PyId_read, &self->read); - (void)_PyObject_LookupAttrId(file, &PyId_readinto, &self->readinto); (void)_PyObject_LookupAttrId(file, &PyId_readline, &self->readline); - if (!self->readline || !self->readinto || !self->read) { + if (!self->readline || !self->read) { if (!PyErr_Occurred()) { PyErr_SetString(PyExc_TypeError, - "file must have 'read', 'readinto' and " - "'readline' attributes"); + "file must have 'read' and 'readline' attributes"); } Py_CLEAR(self->read); Py_CLEAR(self->readinto); @@ -4463,12 +4494,13 @@ static int dump(PicklerObject *self, PyObject *obj) { const char stop_op = STOP; + int status = -1; PyObject *tmp; _Py_IDENTIFIER(reducer_override); if (_PyObject_LookupAttrId((PyObject *)self, &PyId_reducer_override, &tmp) < 0) { - return -1; + goto error; } /* Cache the reducer_override method, if it exists. */ if (tmp != NULL) { @@ -4485,7 +4517,7 @@ dump(PicklerObject *self, PyObject *obj) assert(self->proto >= 0 && self->proto < 256); header[1] = (unsigned char)self->proto; if (_Pickler_Write(self, header, 2) < 0) - return -1; + goto error; if (self->proto >= 4) self->framing = 1; } @@ -4493,9 +4525,22 @@ dump(PicklerObject *self, PyObject *obj) if (save(self, obj, 0) < 0 || _Pickler_Write(self, &stop_op, 1) < 0 || _Pickler_CommitFrame(self) < 0) - return -1; + goto error; + + // Success + status = 0; + + error: self->framing = 0; - return 0; + + /* Break the reference cycle we generated at the beginning this function + * call when setting the reducer_override attribute of the Pickler instance + * to a bound method of the same instance. This is important as the Pickler + * instance holds a reference to each object it has pickled (through its + * memo): thus, these objects wont be garbage-collected as long as the + * Pickler itself is not collected. */ + Py_CLEAR(self->reducer_override); + return status; } /*[clinic input] @@ -4653,8 +4698,9 @@ _pickle.Pickler.__init__ This takes a binary file for writing a pickle data stream. The optional *protocol* argument tells the pickler to use the given -protocol; supported protocols are 0, 1, 2, 3 and 4. The default -protocol is 3; a backward-incompatible protocol designed for Python 3. +protocol; supported protocols are 0, 1, 2, 3, 4 and 5. The default +protocol is 4. It was introduced in Python 3.4, and is incompatible +with previous versions. Specifying a negative protocol version selects the highest protocol version supported. The higher the protocol used, the more recent the @@ -4686,7 +4732,7 @@ static int _pickle_Pickler___init___impl(PicklerObject *self, PyObject *file, PyObject *protocol, int fix_imports, PyObject *buffer_callback) -/*[clinic end generated code: output=0abedc50590d259b input=bb886e00443a7811]*/ +/*[clinic end generated code: output=0abedc50590d259b input=a7c969699bf5dad3]*/ { _Py_IDENTIFIER(persistent_id); _Py_IDENTIFIER(dispatch_table); @@ -7634,8 +7680,8 @@ This is equivalent to ``Pickler(file, protocol).dump(obj)``, but may be more efficient. The optional *protocol* argument tells the pickler to use the given -protocol; supported protocols are 0, 1, 2, 3 and 4. The default -protocol is 4. It was introduced in Python 3.4, it is incompatible +protocol; supported protocols are 0, 1, 2, 3, 4 and 5. The default +protocol is 4. It was introduced in Python 3.4, and is incompatible with previous versions. Specifying a negative protocol version selects the highest protocol @@ -7661,7 +7707,7 @@ static PyObject * _pickle_dump_impl(PyObject *module, PyObject *obj, PyObject *file, PyObject *protocol, int fix_imports, PyObject *buffer_callback) -/*[clinic end generated code: output=706186dba996490c input=cfdcaf573ed6e46c]*/ +/*[clinic end generated code: output=706186dba996490c input=5ed6653da99cd97c]*/ { PicklerObject *pickler = _Pickler_New(); @@ -7704,8 +7750,8 @@ _pickle.dumps Return the pickled representation of the object as a bytes object. The optional *protocol* argument tells the pickler to use the given -protocol; supported protocols are 0, 1, 2, 3 and 4. The default -protocol is 4. It was introduced in Python 3.4, it is incompatible +protocol; supported protocols are 0, 1, 2, 3, 4 and 5. The default +protocol is 4. It was introduced in Python 3.4, and is incompatible with previous versions. Specifying a negative protocol version selects the highest protocol @@ -7725,7 +7771,7 @@ into *file* as part of the pickle stream. It is an error if static PyObject * _pickle_dumps_impl(PyObject *module, PyObject *obj, PyObject *protocol, int fix_imports, PyObject *buffer_callback) -/*[clinic end generated code: output=fbab0093a5580fdf input=9f334d535ff7194f]*/ +/*[clinic end generated code: output=fbab0093a5580fdf input=e543272436c6f987]*/ { PyObject *result; PicklerObject *pickler = _Pickler_New(); @@ -7930,6 +7976,7 @@ pickle_traverse(PyObject *m, visitproc visit, void *arg) Py_VISIT(st->import_mapping_3to2); Py_VISIT(st->codecs_encode); Py_VISIT(st->getattr); + Py_VISIT(st->partial); return 0; } diff --git a/pickle5/clinic/_pickle.c.h b/pickle5/clinic/_pickle.c.h index 9da3f11..0457a43 100644 --- a/pickle5/clinic/_pickle.c.h +++ b/pickle5/clinic/_pickle.c.h @@ -69,8 +69,9 @@ PyDoc_STRVAR(_pickle_Pickler___init____doc__, "This takes a binary file for writing a pickle data stream.\n" "\n" "The optional *protocol* argument tells the pickler to use the given\n" -"protocol; supported protocols are 0, 1, 2, 3 and 4. The default\n" -"protocol is 3; a backward-incompatible protocol designed for Python 3.\n" +"protocol; supported protocols are 0, 1, 2, 3, 4 and 5. The default\n" +"protocol is 4. It was introduced in Python 3.4, and is incompatible\n" +"with previous versions.\n" "\n" "Specifying a negative protocol version selects the highest protocol\n" "version supported. The higher the protocol used, the more recent the\n" @@ -463,8 +464,8 @@ PyDoc_STRVAR(_pickle_dump__doc__, "be more efficient.\n" "\n" "The optional *protocol* argument tells the pickler to use the given\n" -"protocol; supported protocols are 0, 1, 2, 3 and 4. The default\n" -"protocol is 4. It was introduced in Python 3.4, it is incompatible\n" +"protocol; supported protocols are 0, 1, 2, 3, 4 and 5. The default\n" +"protocol is 4. It was introduced in Python 3.4, and is incompatible\n" "with previous versions.\n" "\n" "Specifying a negative protocol version selects the highest protocol\n" @@ -550,8 +551,8 @@ PyDoc_STRVAR(_pickle_dumps__doc__, "Return the pickled representation of the object as a bytes object.\n" "\n" "The optional *protocol* argument tells the pickler to use the given\n" -"protocol; supported protocols are 0, 1, 2, 3 and 4. The default\n" -"protocol is 4. It was introduced in Python 3.4, it is incompatible\n" +"protocol; supported protocols are 0, 1, 2, 3, 4 and 5. The default\n" +"protocol is 4. It was introduced in Python 3.4, and is incompatible\n" "with previous versions.\n" "\n" "Specifying a negative protocol version selects the highest protocol\n" @@ -835,4 +836,4 @@ _pickle_loads(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObjec exit: return return_value; } -/*[clinic end generated code: output=de075ec48d4ee0e1 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=e2506823be1960c5 input=a9049054013a1b77]*/ diff --git a/pickle5/pickle.py b/pickle5/pickle.py index ed7dab6..e66f6f4 100644 --- a/pickle5/pickle.py +++ b/pickle5/pickle.py @@ -409,9 +409,9 @@ def __init__(self, file, protocol=None, *, fix_imports=True, """This takes a binary file for writing a pickle data stream. The optional *protocol* argument tells the pickler to use the - given protocol; supported protocols are 0, 1, 2, 3 and 4. The - default protocol is 4. It was introduced in Python 3.4, it is - incompatible with previous versions. + given protocol; supported protocols are 0, 1, 2, 3, 4 and 5. + The default protocol is 4. It was introduced in Python 3.4, and + is incompatible with previous versions. Specifying a negative protocol version selects the highest protocol version supported. The higher the protocol used, the diff --git a/pickle5/test/pickletester.py b/pickle5/test/pickletester.py index fc143f1..95423e4 100644 --- a/pickle5/test/pickletester.py +++ b/pickle5/test/pickletester.py @@ -72,6 +72,18 @@ def tell(self): raise io.UnsupportedOperation +class MinimalIO(object): + """ + A file-like object that doesn't support readinto(). + """ + def __init__(self, *args): + self._bio = io.BytesIO(*args) + self.getvalue = self._bio.getvalue + self.read = self._bio.read + self.readline = self._bio.readline + self.write = self._bio.write + + # We can't very well test the extension registry without putting known stuff # in it, but we have to be careful to restore its original state. Code # should do this: @@ -3357,7 +3369,7 @@ def test_reusing_unpickler_objects(self): f.seek(0) self.assertEqual(unpickler.load(), data2) - def _check_multiple_unpicklings(self, ioclass): + def _check_multiple_unpicklings(self, ioclass, *, seekable=True): for proto in protocols: with self.subTest(proto=proto): data1 = [(x, str(x)) for x in range(2000)] + [b"abcde", len] @@ -3370,10 +3382,10 @@ def _check_multiple_unpicklings(self, ioclass): f = ioclass(pickled * N) unpickler = self.unpickler_class(f) for i in range(N): - if f.seekable(): + if seekable: pos = f.tell() self.assertEqual(unpickler.load(), data1) - if f.seekable(): + if seekable: self.assertEqual(f.tell(), pos + len(pickled)) self.assertRaises(EOFError, unpickler.load) @@ -3381,7 +3393,12 @@ def test_multiple_unpicklings_seekable(self): self._check_multiple_unpicklings(io.BytesIO) def test_multiple_unpicklings_unseekable(self): - self._check_multiple_unpicklings(UnseekableIO) + self._check_multiple_unpicklings(UnseekableIO, seekable=False) + + def test_multiple_unpicklings_minimal(self): + # File-like object that doesn't support peek() and readinto() + # (bpo-39681) + self._check_multiple_unpicklings(MinimalIO, seekable=False) def test_unpickling_buffering_readline(self): # Issue #12687: the unpickler's buffering logic could fail with @@ -3493,6 +3510,30 @@ class MyClass: ValueError, 'The reducer just failed'): p.dump(h) + @support.cpython_only + def test_reducer_override_no_reference_cycle(self): + # bpo-39492: reducer_override used to induce a spurious reference cycle + # inside the Pickler object, that could prevent all serialized objects + # from being garbage-collected without explicity invoking gc.collect. + + for proto in range(0, pickle.HIGHEST_PROTOCOL + 1): + with self.subTest(proto=proto): + def f(): + pass + + wr = weakref.ref(f) + + bio = io.BytesIO() + p = self.pickler_class(bio, proto) + p.dump(f) + new_f = pickle.loads(bio.getvalue()) + assert new_f == 5 + + del p + del f + + self.assertIsNone(wr()) + class AbstractDispatchTableTests(unittest.TestCase):