Skip to content

Commit a84f95b

Browse files
committed
fix
1 parent 8f03b66 commit a84f95b

File tree

2 files changed

+98
-1
lines changed

2 files changed

+98
-1
lines changed

tests/operators/test_eagle_get_hidden_states.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ def test_eagle_get_hidden_states(self):
6060
base_model_seq_lens_encoder_tensor,
6161
actual_draft_token_num,
6262
)
63-
out_ref = np.array([6, 4, 3, 3], dtype=np.int32)
63+
out_ref = np.array([6, 4, 3, 3], dtype=np.float16)
6464
np.testing.assert_allclose(gpu_out.numpy()[0][0:4], out_ref)
6565

6666

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import unittest
16+
17+
import numpy as np
18+
import paddle
19+
20+
from fastdeploy.model_executor.ops.gpu import eagle_get_self_hidden_states
21+
22+
23+
def computeOrder(last_seq_lens_this_time, seq_lens_this_time, step_idx, src_map, bsz):
24+
in_offset = 0
25+
out_offset = 0
26+
for i in range(bsz):
27+
cur_seq_lens_this_time = seq_lens_this_time[i]
28+
cur_last_seq_lens_this_time = last_seq_lens_this_time[i]
29+
if step_idx[i] == 1 and cur_seq_lens_this_time > 0:
30+
in_offset += 1
31+
src_map[out_offset] = in_offset - 1
32+
out_offset += 1
33+
elif cur_seq_lens_this_time > 0:
34+
in_offset += cur_last_seq_lens_this_time
35+
src_map[out_offset] = in_offset - 1
36+
out_offset += 1
37+
else:
38+
if step_idx[i] == 1:
39+
in_offset += 1 if cur_last_seq_lens_this_time > 0 else 0
40+
else:
41+
in_offset += cur_last_seq_lens_this_time
42+
43+
return (out_offset, src_map)
44+
45+
46+
def rebuildSelfHiddenStatesKernel(input, src_map, out, dim_embed, elem_cnt):
47+
for elem_id in range(elem_cnt):
48+
output_token_idx = elem_id // dim_embed
49+
input_token_idx = src_map[output_token_idx]
50+
offset = elem_id % dim_embed
51+
out[output_token_idx * dim_embed + offset] = input[input_token_idx * dim_embed + offset]
52+
return out
53+
54+
55+
def ref_eagle_get_self_hidden_states(input, last_seq_lens_this_time, seq_lens_this_time, step_idx):
56+
input_token_num = input.shape[0]
57+
dim_embed = input.shape[1]
58+
bsz = seq_lens_this_time.shape[0]
59+
src_map = np.full(input_token_num, -1, seq_lens_this_time.dtype)
60+
output_token_num, src_map = computeOrder(last_seq_lens_this_time, seq_lens_this_time, step_idx, src_map, bsz)
61+
out = np.full([output_token_num * dim_embed], -1, input.dtype)
62+
elem_cnt = output_token_num * dim_embed
63+
out = rebuildSelfHiddenStatesKernel(input, src_map, out, dim_embed, elem_cnt)
64+
out = out.reshape([output_token_num, dim_embed])
65+
return out
66+
67+
68+
class TestEagleGetSelfHiddenStates(unittest.TestCase):
69+
def test_eagle_get_self_hidden_states(self):
70+
paddle.seed(2023)
71+
np.random.seed(2023)
72+
bs = np.random.randint(1, 8 + 1, dtype=np.int32)
73+
input_token_num = np.random.randint(2 * 1024, 4 * 1024 + 1, dtype=np.int32)
74+
dim_embed = np.array(1024, dtype=np.int32)
75+
76+
last_seq_lens_this_time = np.random.randint(0, input_token_num // bs, bs, dtype=np.int32)
77+
seq_lens_this_time = np.random.randint(0, input_token_num // bs, bs, dtype=np.int32)
78+
step_idx = np.arange(0, bs, dtype=np.int32)
79+
80+
last_seq_lens_this_time_tensor = paddle.to_tensor(last_seq_lens_this_time, dtype=paddle.int32)
81+
seq_lens_this_time_tensor = paddle.to_tensor(seq_lens_this_time, dtype=paddle.int32)
82+
step_idx_tensor = paddle.to_tensor(step_idx, dtype=paddle.int64)
83+
84+
input = np.random.randint(0, 10, (input_token_num, dim_embed), dtype=np.int32)
85+
input_tensor = paddle.to_tensor(input, dtype=paddle.float16)
86+
gpu_out = eagle_get_self_hidden_states(
87+
input_tensor,
88+
last_seq_lens_this_time_tensor,
89+
seq_lens_this_time_tensor,
90+
step_idx_tensor,
91+
)
92+
out_ref = np.array([5, 4, 2, 8], dtype=np.float16)
93+
np.testing.assert_allclose(gpu_out.numpy()[0][0:4], out_ref)
94+
95+
96+
if __name__ == "__main__":
97+
unittest.main()

0 commit comments

Comments
 (0)