Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Cherry-pick] Add return self for nn.Layer #36779

Closed
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 32 additions & 30 deletions python/paddle/fluid/dygraph/layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ class Layer(core.Layer):
If set str, it can be "bool", "float16", "float32", "float64",
"int8", "int16", "int32", "int64", "uint8" or "uint16".
Default: "float32"

Returns:
None
"""
Expand Down Expand Up @@ -278,7 +278,7 @@ def register_forward_post_hook(self, hook):

It should have the following form, `input` and `output` of the `hook` is `input` and `output` of the `Layer` respectively.
User can use forward post-hook to change the output of the Layer or perform information statistics tasks on the Layer.

hook(Layer, input, output) -> None or modified output

Parameters:
Expand Down Expand Up @@ -324,9 +324,9 @@ def forward_post_hook(layer, input, output):

def register_forward_pre_hook(self, hook):
"""Register a forward pre-hook for Layer. The hook will be called before `forward` function has been computed.

It should have the following form, `input` of the `hook` is `input` of the `Layer`,
hook can either return a tuple or a single modified value in the hook. We will wrap the value into a tuple if
hook can either return a tuple or a single modified value in the hook. We will wrap the value into a tuple if
a single value is returned(unless that value is already a tuple).
User can use forward pre-hook to change the input of the Layer or perform information statistics tasks on the Layer.

Expand Down Expand Up @@ -382,7 +382,7 @@ def create_parameter(self,
is_bias=False,
default_initializer=None):
"""Create parameters for this layer.

Parameters:
shape(list): Shape of the parameter.
attr(ParamAttr, optional): Parameter attribute of weight. Please refer to :ref:`api_paddle_ParamAttr`. Default: None.
Expand Down Expand Up @@ -453,13 +453,13 @@ def __init__(self,
out_features):
super(MyLinear, self).__init__()
self.linear = paddle.nn.Linear( 10, 10)

self.back_var = self.create_variable(name = "linear_tmp_0", dtype=self._dtype)

def forward(self, input):
out = self.linear(input)
paddle.assign( out, self.back_var)

return out

"""
Expand Down Expand Up @@ -503,13 +503,13 @@ def __init__(self,
out_features):
super(MyLinear, self).__init__()
self.linear = paddle.nn.Linear( 10, 10)

self.back_var = self.create_tensor(name = "linear_tmp_0", dtype=self._dtype)

def forward(self, input):
out = self.linear(input)
paddle.assign( out, self.back_var)

return out

"""
Expand Down Expand Up @@ -729,7 +729,7 @@ def register_buffer(self, name, tensor, persistable=True):

Returns:
None

Examples:
.. code-block:: python

Expand Down Expand Up @@ -856,10 +856,10 @@ def named_buffers(self, prefix='', include_sublayers=True):
def clear_gradients(self):
"""
Clear the gradients of all parameters for this layer.

Returns:
None

Examples:
.. code-block:: python

Expand Down Expand Up @@ -901,8 +901,8 @@ def __call__(self, *inputs, **kwargs):
with program_desc_tracing_guard(False):
self._build_once(*inputs, **kwargs)

# TODO(liuyuhui) Only xpu broadcast parameters here.
# The other device is to call _sync_params_buffers in DataParallel
# TODO(liuyuhui) Only xpu broadcast parameters here.
# The other device is to call _sync_params_buffers in DataParallel
# to realize the parameter synchronization among multiply cards.
if parallel_helper._is_data_parallel_mode(
) and paddle.is_compiled_with_xpu():
Expand Down Expand Up @@ -944,7 +944,7 @@ def add_sublayer(self, name, sublayer):
sublayer(Layer): an instance of Layer.
Returns:
Layer: the sublayer passed in.

Examples:
.. code-block:: python

Expand Down Expand Up @@ -1167,7 +1167,7 @@ def _remove_if_exist(*dicts):
self._non_persistable_buffer_names_set.add(name)
_buffers[name] = value
elif _buffers is not None and name in _buffers:
# Note(Aurelius84): In Dy2stat, the value of the Buffer may be modified in
# Note(Aurelius84): In Dy2stat, the value of the Buffer may be modified in
# decorated function, such as `self.buffer = new_tensor`. So we update its
# value via `assign`.
if type(value) == framework.Variable:
Expand Down Expand Up @@ -1326,7 +1326,7 @@ def to_static_state_dict(self,
Parameters:
destination(dict, optional) : If provide, all the parameters and persistable buffers will be set to this dict . Default: None
include_sublayers(bool, optional) : If true, also include the parameters and persistable buffers from sublayers. Default: True

Retruns:
dict: a dict contains all the parameters and persistable buffers.

Expand Down Expand Up @@ -1357,7 +1357,7 @@ def state_dict(self,
Parameters:
destination(dict, optional) : If provide, all the parameters and persistable buffers will be set to this dict . Default: None
include_sublayers(bool, optional) : If true, also include the parameters and persistable buffers from sublayers. Default: True

Retruns:
dict: a dict contains all the parameters and persistable buffers.

Expand Down Expand Up @@ -1385,7 +1385,7 @@ def set_state_dict(self, state_dict, use_structured_name=True):

Parameters:
state_dict(dict) : Dict contains all the parameters and persistable buffers.
use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter or buffer name as key.
use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter or buffer name as key.
Default: True
Returns:
None
Expand Down Expand Up @@ -1497,21 +1497,22 @@ def to(self, device=None, dtype=None, blocking=None):
Cast the parameters and buffers of Layer by the give device, dtype and blocking.

Parameters:
device(str|paddle.CPUPlace()|paddle.CUDAPlace()|paddle.CUDAPinnedPlace()|paddle.XPUPlace()|None, optional): The device of the Layer which want to be stored.
If None, the device is the same with the original Tensor. If device is string, it can be ``cpu``, ``gpu:x`` and ``xpu:x``, where ``x`` is the
index of the GPUs or XPUs. Default: None.
device(str|paddle.CPUPlace()|paddle.CUDAPlace()|paddle.CUDAPinnedPlace()|paddle.XPUPlace()|None, optional): The device of the Layer which want to be stored.
If None, the device is the same with the original Tensor. If device is string, it can be ``cpu``, ``gpu:x`` and ``xpu:x``, where ``x`` is the
index of the GPUs or XPUs. Default: None.

dtype(str|core.VarDesc.VarType|None, optional): The type of the data. If None, the dtype is the same with the original Tensor. Default: None.

blocking(bool|None, optional): If False and the source is in pinned memory, the copy will be
blocking(bool|None, optional): If False and the source is in pinned memory, the copy will be
asynchronous with respect to the host. Otherwise, the argument has no effect. If None, the blocking is set True. Default: None.

Returns:
None
self

Examples:
.. code-block:: python

# required: gpu
import paddle

linear=paddle.nn.Linear(2, 2)
Expand All @@ -1537,12 +1538,12 @@ def to(self, device=None, dtype=None, blocking=None):
#Tensor(shape=[2, 2], dtype=float64, place=CUDAPinnedPlace, stop_gradient=False,
# [[-0.04989364, -0.56889004],
# [ 0.33960250, 0.96878713]])


'''

if device is None and dtype is None and blocking is None:
return
return self

if device is not None:
if isinstance(device, str):
Expand Down Expand Up @@ -1587,6 +1588,7 @@ def transform(t, device, dtype, blocking):

self._apply(transform, device, dtype, blocking)
self._dtype = dtype
return self

# [aliases] Compatible with old method names
set_dict = set_state_dict
Expand Down