Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

[MXNET-1398] Enable zero-copy from numpy to MXNet NDArray #14733

Merged
merged 8 commits into from
Apr 28, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions include/mxnet/c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -823,6 +823,7 @@ MXNET_DLL int MXNDArrayToDLPack(NDArrayHandle handle,
*/
MXNET_DLL int MXNDArrayFromDLPack(DLManagedTensorHandle dlpack,
NDArrayHandle *out_handle);

/*!
* \brief Delete a dlpack tensor
* \param dlpack the pointer of the input DLManagedTensor
Expand Down
107 changes: 106 additions & 1 deletion python/mxnet/ndarray/ndarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
"imdecode", "lesser", "lesser_equal", "logical_and", "logical_or", "logical_xor",
"maximum", "minimum", "moveaxis", "modulo", "multiply", "not_equal", "onehot_encode",
"power", "subtract", "true_divide", "waitall", "_new_empty_handle", "histogram",
"split_v2", "to_dlpack_for_read", "to_dlpack_for_write", "from_dlpack"]
"split_v2", "to_dlpack_for_read", "to_dlpack_for_write", "from_dlpack", "from_numpy"]

_STORAGE_TYPE_UNDEFINED = -1
_STORAGE_TYPE_DEFAULT = 0
Expand Down Expand Up @@ -4115,3 +4115,108 @@ def from_dlpack(dlpack):
# delete the deleter of the old dlpack
ctypes.pythonapi.PyCapsule_SetDestructor(dlpack, None)
return NDArray(handle=handle)

class DLContext(ctypes.Structure):
_fields_ = [("device_type", ctypes.c_int),
("device_id", ctypes.c_int)]


class DLDataType(ctypes.Structure):
_fields_ = [("type_code", ctypes.c_uint8),
("bits", ctypes.c_uint8),
("lanes", ctypes.c_uint16)]
TYPE_MAP = {
"int32": (0, 32, 1),
"int64": (0, 64, 1),
"bool": (1, 1, 1),
"uint32": (1, 32, 1),
"uint64": (1, 64, 1),
"float32": (2, 32, 1),
"float64": (2, 64, 1),
}


class DLTensor(ctypes.Structure):
_fields_ = [("data", ctypes.c_void_p),
("ctx", DLContext),
("ndim", ctypes.c_int),
("dtype", DLDataType),
("shape", ctypes.POINTER(ctypes.c_int64)),
("strides", ctypes.POINTER(ctypes.c_int64)),
("byte_offset", ctypes.c_uint64)]

class DLManagedTensor(ctypes.Structure):
pass


DeleterFunc = ctypes.CFUNCTYPE(None, ctypes.POINTER(DLManagedTensor))


DLManagedTensor._fields_ = [("dl_tensor", DLTensor), # pylint: disable=protected-access
("manager_ctx", ctypes.c_void_p),
("deleter", DeleterFunc)]


@DeleterFunc
def dl_managed_tensor_deleter(dl_managed_tensor_handle):
void_p = dl_managed_tensor_handle.contents.manager_ctx
pyobj = ctypes.cast(void_p, ctypes.py_object)
ctypes.pythonapi.Py_DecRef(pyobj)


def from_numpy(ndarray, zero_copy=True):
"""Returns an MXNet's NDArray backed by Numpy's ndarray.

Parameters
----------
ndarray: numpy.ndarray
input data

zero_copy: bool
Whether we use DLPack's zero-copy conversion to convert to MXNet's NDArray.
This is only available for c-contiguous arrays, i.e. array.flags[C_CONTIGUOUS] == True.

Returns
-------
NDArray
a NDArray backed by a dlpack tensor

"""

def _make_manager_ctx(obj):
pyobj = ctypes.py_object(obj)
void_p = ctypes.c_void_p.from_buffer(pyobj)
ctypes.pythonapi.Py_IncRef(pyobj)
return void_p

def _make_dl_tensor(array):
if str(array.dtype) not in DLDataType.TYPE_MAP:
raise ValueError(str(array.dtype) + " is not supported.")
dl_tensor = DLTensor()
dl_tensor.data = array.ctypes.data_as(ctypes.c_void_p)
dl_tensor.ctx = DLContext(1, 0)
dl_tensor.ndim = array.ndim
dl_tensor.dtype = DLDataType.TYPE_MAP[str(array.dtype)]
junrushao marked this conversation as resolved.
Show resolved Hide resolved
dl_tensor.shape = array.ctypes.shape_as(ctypes.c_int64)
dl_tensor.strides = None
dl_tensor.byte_offset = 0
return dl_tensor
junrushao marked this conversation as resolved.
Show resolved Hide resolved

def _make_dl_managed_tensor(array):
c_obj = DLManagedTensor()
c_obj.dl_tensor = _make_dl_tensor(array)
c_obj.manager_ctx = _make_manager_ctx(array)
c_obj.deleter = dl_managed_tensor_deleter
return c_obj

if not zero_copy:
return array(ndarray, dtype=ndarray.dtype)

if not ndarray.flags['C_CONTIGUOUS']:
raise ValueError("Only c-contiguous arrays are supported for zero-copy")
c_obj = _make_dl_managed_tensor(ndarray)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

when doing zero-copy, should the ownership of the data pointer be transferred to the new ndarray? do we need to update the writable flag? what happens when the original data is updated by numpy?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@szha They share the ownership - which means numpy's modification is transparent to mxnet and vice versa. As requested by @reminisce in his review, I added a testcase for this transparency.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how does it work when using the asynchronous engine? since numpy calls happen immediately, allowing shared ownership may give unexpected results. consider the following case:

a = np.array(...)
b = nd.from_numpy(a, zero_copy=True)
c = nd.expensive_op(b, in_place=True)
d = np.inplace_op(a) # this is called when c hasn't finished yet

given the asynchronous execution, it's hard to tell whether people should expect d to be based on the input before or after expensive_op

Copy link
Member Author

@junrushao junrushao Apr 29, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(We actually have such discussion long time before, but it's good to make things open and achievable to the community)

My understanding is that your concern is a general question in multi-threading (see the code below). In any multi-threaded system, we may expect this issue to exist -- and MXNet has multi-threaded engine, so this issue exists, right?

We have discussion that how about the content of array is changed somewhere inside the system but user don't know that, should we set up the WRITEABLE flag to be true to prevent users from writing, or should we completely disallow users to access this numpy array?

The question, in the high-level, is all about trade-off. Restricting the freedom of users may make things less error-prone, while giving users full freedom may make the system more powerful.

On one hand, in my humble opinion, restricting the freedom of users, like setting up WRITEABLE flag, does not completely resolve this issue, because users can still encounter pitfalls when they read the array using the numpy side API. The only helps in this case is to completely disable users from read or write the array, which grant them on access only the numpy side - This is more like guaranteeing safety by disallowing users to create threads/processes.

On the other hand, giving users freedom enables them to do more things, and does not necessarily mean that they will definitely make a mistake, because we will encourage to use mxnet's numpy compatibility, which is on the way.

Anyway, I am open to any discussion and open to possible changes setting up several flags on the numpy array.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The API should have clear semantics regardless of users’ knowledge on how MXNet execution works. If you worry about flexibility I’d recommend making it an option to not disable writable flag.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Once we add a zero-copy option to asnumpy where the ownership is completely transferred back to numpy, I think there wouldn't be any need for co-ownership.

Copy link
Member

@wkcn wkcn Apr 30, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider the following case:

# the variable `a` is a `mx.nd.NDArray` object
a = mx.nd.array([1,2,3])
b = a.asnumpy(zero_copy=True)
#  users call `asnumpy(zero_copy=True)` twice
c = a.asnumpy(zero_copy=True)
c[:] = 3
print(a)
print(b)

a[:] = 5
a.wait_to_read()
print(b)
print(c)

It's necessary to keep co-ownership.
We can add extra option in from_numpy and asnumpy to prompt users to call the function wait_to_read().
Alternatively, can we add a hook into numpy.ndarray? When users access numpy.ndarray, wait_to_read() will be called automatically.
Reference: https://docs.scipy.org/doc/numpy/reference/arrays.classes.html

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If zero-copy is on, then a should not be usable after the asnumpy call. The point is to let the framework deal with all synchronization, and co-ownership doesn't allow that. And for the same case, you can simply copy c from b (the new numpy array) instead of a (the old ndarray), so it's not necessary to keep co-ownership.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The hook in the context seems to refer to our own subclass of numpy array. Creating a new subclass seems like overkill.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For reference, @szha's suggestion was addressed in #14948

address = ctypes.addressof(c_obj)
address = ctypes.cast(address, ctypes.c_void_p)
handle = NDArrayHandle()
check_call(_LIB.MXNDArrayFromDLPack(address, ctypes.byref(handle)))
return NDArray(handle=handle)
14 changes: 7 additions & 7 deletions src/ndarray/ndarray.cc
Original file line number Diff line number Diff line change
Expand Up @@ -339,8 +339,8 @@ NDArray NDArray::data_ndarray() const {
}

struct NDArrayDLManager {
NDArray handle; // ref NDArray
DLManagedTensor tensor;
NDArray handle; // ref NDArray
DLManagedTensor tensor;
};

DLManagedTensor* NDArray::ToDLPack() const {
Expand All @@ -356,13 +356,13 @@ DLManagedTensor* NDArray::ToDLPack() const {
}

NDArray NDArray::FromDLPack(const DLManagedTensor* tensor) {
const DLTensor &dl_tensor = tensor->dl_tensor;
auto deleter = [tensor](){
if (tensor->deleter != nullptr) {
tensor->deleter(const_cast<DLManagedTensor*>(tensor));
DLManagedTensor tensor_copy = *tensor;
auto deleter = [tensor_copy](){
if (tensor_copy.deleter != nullptr) {
tensor_copy.deleter(const_cast<DLManagedTensor*>(&tensor_copy));
}
};
return NDArray(TBlob(dl_tensor), dl_tensor.ctx.device_id, deleter);
return NDArray(TBlob(tensor_copy.dl_tensor), tensor_copy.dl_tensor.ctx.device_id, deleter);
}

bool NDArray::fresh_out_grad() const {
Expand Down
31 changes: 31 additions & 0 deletions tests/python/unittest/test_ndarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1653,6 +1653,37 @@ def test_ndarray_nan_comparison():
for i in (np.isnan(data1_grad))[1][0].flatten():
assert i == True


def test_zero_from_numpy():
junrushao marked this conversation as resolved.
Show resolved Hide resolved
# Test zero_copy
arrays = [
# ordinary numpy array
np.array([[1, 2], [3, 4], [5, 6]], dtype="float32"),
# 0-dim
np.array((1, )).reshape(()),
# 0-size
np.array(()).reshape((1, 0, 2)),
]
for zero_copy in [False, True]:
for np_array in arrays:
mx_array = mx.nd.from_numpy(np_array, zero_copy=zero_copy)
mx.test_utils.assert_almost_equal(np_array, mx_array.asnumpy())
np_array = arrays[0]
mx_array = mx.nd.from_numpy(np_array)
np_array[2, 1] = 0
mx.test_utils.assert_almost_equal(np_array, mx_array.asnumpy())
mx_array[2, 1] = 100
mx.test_utils.assert_almost_equal(np_array, mx_array.asnumpy())
np_array = np.array([[1, 2], [3, 4], [5, 6]]).transpose()
assert not np_array.flags["C_CONTIGUOUS"]
try:
mx_array = mx.nd.from_numpy(np_array)
except ValueError:
pass
else:
assert False


if __name__ == '__main__':
import nose
nose.runmodule()