diff --git a/python/mxnet/gluon/parameter.py b/python/mxnet/gluon/parameter.py index 086dbc07a043..83edbaf210a7 100644 --- a/python/mxnet/gluon/parameter.py +++ b/python/mxnet/gluon/parameter.py @@ -30,7 +30,8 @@ from .. import symbol, ndarray, initializer, context from ..context import Context, cpu from .. import autograd -from .utils import _indent, _brief_print_list +from .utils import _indent, _brief_print_list, shape_is_known +from .. import is_np_shape # pylint: disable= invalid-name tensor_types = (symbol.Symbol, ndarray.NDArray) @@ -156,7 +157,20 @@ def grad_req(self, req): @property def shape(self): - return self._shape + """The shape of the parameter. + + By default, an unknown dimension size is 0. However, when the NumPy semantic + is turned on, unknown dimension size is -1. + """ + if self._shape is None: + return None + elif is_np_shape(): + # Parameters shouldn't be zero-size. If one of its dimension is 0, + # it means the parameter isn't initialized. In the NumPy semantics, + # the unknown dimension should be marked with -1. + return tuple(i if i != 0 else -1 for i in self._shape) + else: + return self._shape @shape.setter def shape(self, new_shape): @@ -269,7 +283,7 @@ def _finish_deferred_init(self): return init, ctx, default_init, data = self._deferred_init self._deferred_init = () - assert self.shape is not None and np.prod(self.shape) > 0, \ + assert shape_is_known(self.shape), \ "Cannot initialize Parameter '%s' because it has " \ "invalid shape: %s. Please specify in_units, " \ "in_channels, etc for `Block`s."%( @@ -380,7 +394,7 @@ def initialize(self, init=None, ctx=None, default_init=initializer.Uniform(), ctx = [ctx] if init is None: init = default_init if self.init is None else self.init - if not self.shape or np.prod(self.shape) <= 0: + if not shape_is_known(self.shape): if self._allow_deferred_init: self._deferred_init = (init, ctx, default_init, None) return diff --git a/python/mxnet/gluon/utils.py b/python/mxnet/gluon/utils.py index 861542220927..3957b7402688 100644 --- a/python/mxnet/gluon/utils.py +++ b/python/mxnet/gluon/utils.py @@ -38,6 +38,7 @@ class requests_failed_to_import(object): import numpy as np from .. import ndarray +from ..util import is_np_shape def split_data(data, num_slice, batch_axis=0, even_split=True): """Splits an NDArray into `num_slice` slices along `batch_axis`. @@ -412,3 +413,20 @@ def __enter__(self): def __exit__(self, ptype, value, trace): self.detach() + +def shape_is_known(shape): + """Check whether a shape is completely known with or without np semantics. + + Please see the doc of is_np_shape for more details. + """ + if shape is None: + return False + unknown_dim_size = -1 if is_np_shape() else 0 + if len(shape) == 0: + return unknown_dim_size == -1 + for dim_size in shape: + if dim_size == unknown_dim_size: + return False + assert dim_size > unknown_dim_size, "shape dimension size cannot be less than {}, while " \ + "received {}".format(unknown_dim_size, dim_size) + return True diff --git a/python/mxnet/util.py b/python/mxnet/util.py index 29f5b78e454e..5bc1dc809c88 100644 --- a/python/mxnet/util.py +++ b/python/mxnet/util.py @@ -89,6 +89,10 @@ def is_np_shape(): the shapes of zero-size tensors. This is turned off by default for keeping backward compatibility. + In the NumPy shape semantics, `-1` indicates an unknown size. For example, + `(-1, 2, 2)` means that the size of the first dimension is unknown. Its size + may be inferred during shape inference. + Please note that this is designed as an infrastructure for the incoming MXNet-NumPy operators. Legacy operators registered in the modules `mx.nd` and `mx.sym` are not guaranteed to behave like their counterparts diff --git a/tests/python/unittest/test_gluon.py b/tests/python/unittest/test_gluon.py index efa04f4fa47a..08e4c52a5826 100644 --- a/tests/python/unittest/test_gluon.py +++ b/tests/python/unittest/test_gluon.py @@ -2726,6 +2726,22 @@ def hybrid_forward(self, F, x): net = Net(act0, act1, shape, slice) check_layer_forward_withinput(net, x) +@with_seed() +def test_np_shape_parameters(): + class Foo(gluon.Block): + def __init__(self, **kwargs): + super(Foo, self).__init__(**kwargs) + self.dense = gluon.nn.Dense(16) + def forward(self, x): + return self.dense(x) + + with mx.np_shape(True): + z = mx.nd.zeros((2,2016)) + print(z.shape) + foo = Foo() + foo.initialize() + print(foo(z).shape) + if __name__ == '__main__': import nose nose.runmodule()