-
Notifications
You must be signed in to change notification settings - Fork 5.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Paddle TensorRT No.19] Add pd_op.cumsum converter #69330
Changes from 5 commits
6c51c56
57c3bbf
da03c88
8dee4d0
97e1610
05091af
099f24b
8d16718
0653afb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,6 +16,7 @@ | |
import tensorrt as trt | ||
|
||
from paddle.tensorrt.converter_utils import ( | ||
add_1D_constant_layer, | ||
add_cast_reduce_layer, | ||
add_elementwise_layer, | ||
add_reduce_layer, | ||
|
@@ -227,6 +228,96 @@ def all_converter(network, paddle_op, inputs): | |
) | ||
|
||
|
||
@converter_registry.register("pd_op.cumsum", trt_version="8.x") | ||
def cumsum_converter(network, paddle_op, inputs): | ||
input_tensor = inputs[0] | ||
dtype = input_tensor.dtype | ||
axis = paddle_op.operands()[1].source().get_defining_op().attrs()["value"] | ||
input_shape = input_tensor.shape | ||
rank = len(input_shape) | ||
|
||
if axis < 0: | ||
axis += rank | ||
axis = int(axis) | ||
|
||
# Obtain the number of cycles | ||
if input_shape[axis] > 0: | ||
axis_tensor = np.array(input_shape[axis], dtype=np.int32) | ||
trip_limit = network.add_constant((), axis_tensor) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add_1d_constant_layer(scalar也要区分是否为标量), |
||
else: | ||
dynamic_shape = network.add_shape(input_tensor).get_output(0) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. trt_reshape和get_shape_tensor_element复用 |
||
axis_tensor = np.array(axis, dtype=np.int32) | ||
index = network.add_constant((), axis_tensor).get_output(0) | ||
trip_limit = network.add_gather(dynamic_shape, index, 0) | ||
|
||
# Obtain the slice shape | ||
shape_list = [] | ||
for i in range(rank): | ||
if i == axis: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 你这块代码和旧ir的差距较大,我建议你也写一个getAxisLength函数,这边怎么输入维度<0,又要reshape维度 |
||
shape_list.append(add_1D_constant_layer(network, [1])) | ||
elif input_shape[i] < 0: | ||
dynamic_shape = network.add_shape(input_tensor).get_output(0) | ||
index = network.add_constant( | ||
(), np.array(i, dtype=np.int32) | ||
).get_output(0) | ||
shape_index = network.add_gather(dynamic_shape, index, 0) | ||
shuffle_layer = network.add_shuffle(shape_index.get_output(0)) | ||
shuffle_layer.reshape_dims = (1,) | ||
shape_list.append(shuffle_layer.get_output(0)) | ||
else: | ||
shape_list.append(add_1D_constant_layer(network, input_shape[i])) | ||
slice_shape = network.add_concatenation(shape_list).get_output(0) | ||
|
||
start = [0] * rank | ||
size = [1] * rank | ||
stride = [1] * rank | ||
input_sliced = network.add_slice(input_tensor, start, size, stride) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 如果axis=0,sizes_tensor为add_1d_constant_layer(network,1),否则get_axis_length,这段代码在哪,strides_tensor呢 |
||
input_sliced.set_input(2, slice_shape) | ||
|
||
# squeeze axis | ||
shape_list.pop(axis) | ||
new_shape = network.add_concatenation(shape_list).get_output(0) | ||
squeeze_layer = network.add_shuffle(input_sliced.get_output(0)) | ||
squeeze_layer.set_input(1, new_shape) | ||
|
||
loop = network.add_loop() | ||
loop.add_trip_limit(trip_limit.get_output(0), trt.TripLimit.COUNT) | ||
|
||
iterator = loop.add_iterator(input_tensor, axis) | ||
data = iterator.get_output(0) | ||
|
||
# create zero tensor | ||
zero_vec = np.array([0.0], dtype=np.float32) | ||
zero = network.add_constant((1,), zero_vec).get_output(0) | ||
lhs_val, rhs_val = broadcast( | ||
network, | ||
squeeze_layer.get_output(0), | ||
zero, | ||
squeeze_layer.get_output(0).name, | ||
zero.name, | ||
) | ||
cast_tensor = trt_cast(network, rhs_val, dtype) | ||
zero_tensor = network.add_elementwise( | ||
lhs_val, cast_tensor, trt.ElementWiseOperation.PROD | ||
).get_output(0) | ||
|
||
# Cycle and add according to the axis | ||
running_sum = loop.add_recurrence(zero_tensor) | ||
running_sum_tensor = running_sum.get_output(0) | ||
|
||
cur_sum = network.add_elementwise( | ||
data, running_sum_tensor, trt.ElementWiseOperation.SUM | ||
).get_output(0) | ||
|
||
running_sum.set_input(1, cur_sum) | ||
|
||
reverse_flag = trt.LoopOutput.CONCATENATE | ||
loop_out = loop.add_loop_output(cur_sum, reverse_flag, axis) | ||
loop_out.set_input(1, trip_limit.get_output(0)) | ||
|
||
return loop_out.get_output(0) | ||
|
||
|
||
@converter_registry.register("pd_op.floor_divide", trt_version="8.x") | ||
def floor_divide_converter(network, paddle_op, inputs): | ||
return add_elementwise_layer( | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
现在支持rank=0了,和旧ir的converter保持对齐吧
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
现在rank=0的情况似乎通过zero_dims_to_one_dims自动转为rank = 1了