From 116545ebbec655e099e6bb716dd772669d032384 Mon Sep 17 00:00:00 2001 From: co63oc Date: Sun, 31 Aug 2025 08:44:42 +0800 Subject: [PATCH] add test_speculate_get_output_padding_offset --- ...est_speculate_get_output_padding_offset.py | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 tests/operators/test_speculate_get_output_padding_offset.py diff --git a/tests/operators/test_speculate_get_output_padding_offset.py b/tests/operators/test_speculate_get_output_padding_offset.py new file mode 100644 index 00000000000..fc973b75e3a --- /dev/null +++ b/tests/operators/test_speculate_get_output_padding_offset.py @@ -0,0 +1,53 @@ +# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np +import paddle + +from fastdeploy.model_executor.ops.gpu import speculate_get_output_padding_offset + + +class TestSpeculateGetOutputPaddingOffset(unittest.TestCase): + def test_speculate_get_output_padding_offset(self): + bsz = 256 + max_seq_len = 8192 + + seq_lens_output = np.random.randint(0, 4, size=bsz) + output_token_num = np.sum(seq_lens_output) + + seq_lens_output = paddle.to_tensor(seq_lens_output, dtype="int32") + out_token_num = paddle.sum(seq_lens_output).astype("int32") + output_cum_offsets_tmp = paddle.cumsum(max_seq_len - seq_lens_output).astype("int32") + + output_padding_offset_gpu, output_cum_offsets_gpu = speculate_get_output_padding_offset( + output_cum_offsets_tmp, out_token_num, seq_lens_output, max_seq_len + ) + + output_padding_offset_ref = [-1] * output_token_num + output_cum_offsets_ref = [-1] * bsz + + for bi in range(bsz): + cum_offset = 0 if bi == 0 else output_cum_offsets_tmp[bi - 1] + output_cum_offsets_ref[bi] = cum_offset + for token_i in range(seq_lens_output[bi]): + output_padding_offset_ref[bi * max_seq_len - cum_offset + token_i] = cum_offset + + np.testing.assert_allclose(output_padding_offset_gpu, output_padding_offset_ref) + np.testing.assert_allclose(output_cum_offsets_gpu, output_cum_offsets_ref) + + +if __name__ == "__main__": + unittest.main()