Skip to content

Commit e1f02c2

Browse files
committed
add rebuild_padding tests
1 parent c96a535 commit e1f02c2

File tree

1 file changed

+200
-0
lines changed

1 file changed

+200
-0
lines changed
Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,200 @@
1+
import unittest
2+
3+
import numpy as np
4+
import paddle
5+
6+
from fastdeploy.model_executor.ops.gpu import rebuild_padding
7+
8+
9+
def RebuildPaddingKernel(
10+
out,
11+
tmp_out,
12+
cu_seqlens_q,
13+
seq_len_this_time,
14+
seq_lens_decoder,
15+
seq_lens_encoder,
16+
bsz,
17+
):
18+
for bi in range(bsz):
19+
seq_id = 0
20+
if seq_len_this_time[bi] == 0:
21+
continue
22+
if seq_lens_decoder[bi] == 0 and seq_lens_encoder[bi] == 0:
23+
continue
24+
if seq_lens_encoder[bi] > 0:
25+
seq_id = seq_lens_encoder[bi] - 1
26+
out[bi] = tmp_out[cu_seqlens_q[bi] + seq_id][:]
27+
28+
29+
def RebuildAppendPaddingKernel(
30+
out,
31+
tmp_out,
32+
cu_seqlens_q,
33+
seq_len_this_time,
34+
seq_lens_decoder,
35+
seq_lens_encoder,
36+
output_padding_offset,
37+
max_input_length,
38+
token_num,
39+
need_delete_token_num,
40+
):
41+
for token_id in range(token_num - need_delete_token_num):
42+
bi = int(token_id / max_input_length)
43+
if seq_len_this_time[bi] == 0 or (seq_lens_decoder[bi] == 0 and seq_lens_encoder[bi] == 0):
44+
continue
45+
ori_token_id = token_id + output_padding_offset[token_id]
46+
seq_id = 0
47+
if seq_lens_encoder[bi] > 0:
48+
seq_id = seq_lens_encoder[bi] - 1
49+
cum_offset_bi = bi * max_input_length - cu_seqlens_q[bi]
50+
input_token_id = ori_token_id - cum_offset_bi + seq_id
51+
out[token_id] = tmp_out[input_token_id][:]
52+
53+
54+
def rebuild_padding_ref(
55+
tmp_out, # [token_num, dim_embed]
56+
cu_seqlens_q, # [bsz+1, 1]
57+
seq_len_this_time,
58+
seq_lens_decoder,
59+
seq_lens_encoder,
60+
output_padding_offset,
61+
max_input_length,
62+
):
63+
64+
tmp_out_shape = tmp_out.shape
65+
token_num = tmp_out_shape[0]
66+
dim_embed = tmp_out_shape[1]
67+
bsz = cu_seqlens_q.shape[0] - 1
68+
69+
out = np.zeros([bsz, dim_embed])
70+
if output_padding_offset is not None:
71+
need_delete_token_num = 0
72+
for i in range(bsz):
73+
if seq_lens_encoder[i] > 0:
74+
need_delete_token_num += seq_lens_encoder[i] - 1
75+
out = np.zeros([token_num - need_delete_token_num, dim_embed])
76+
else:
77+
out = np.zeros([bsz, dim_embed])
78+
79+
if output_padding_offset is not None:
80+
RebuildAppendPaddingKernel(
81+
out,
82+
tmp_out,
83+
cu_seqlens_q,
84+
seq_len_this_time,
85+
seq_lens_decoder,
86+
seq_lens_encoder,
87+
output_padding_offset,
88+
max_input_length,
89+
token_num,
90+
need_delete_token_num,
91+
)
92+
else:
93+
RebuildPaddingKernel(
94+
out,
95+
tmp_out,
96+
cu_seqlens_q,
97+
seq_len_this_time,
98+
seq_lens_decoder,
99+
seq_lens_encoder,
100+
bsz,
101+
)
102+
return out
103+
104+
105+
class TestRebuildPadding(unittest.TestCase):
106+
# test no offset
107+
def test_rebuild_padding_no_offset(self):
108+
token_num = 100
109+
dim_embed = 256
110+
# bsz = 4
111+
max_input_length = 512
112+
# tmp_out: [token_num, dim_embed]
113+
tmp_out = np.random.randn(token_num, dim_embed).astype(np.float32)
114+
# cu_seqlens_q: [bsz + 1],accumulate the number of tokens for each batch.
115+
cu_seqlens_q = np.array(
116+
[0, 1, 21, 22, 42, 43, 63, 64, 84], dtype=np.int32
117+
) # Assume there are 4 batches, and the total token_num = 100.
118+
119+
# Simulated sequence length information
120+
seq_len_this_time = np.array([1, 20, 1, 20, 1, 20, 1, 20], dtype=np.int32)
121+
seq_lens_encoder = np.array([0, 20, 0, 20, 0, 20, 0, 20], dtype=np.int32)
122+
seq_lens_decoder = np.array([21, 0, 21, 0, 21, 0, 21, 0], dtype=np.int32)
123+
out_no_offset_ref = rebuild_padding_ref(
124+
tmp_out=tmp_out,
125+
cu_seqlens_q=cu_seqlens_q,
126+
seq_len_this_time=seq_len_this_time,
127+
seq_lens_decoder=seq_lens_decoder,
128+
seq_lens_encoder=seq_lens_encoder,
129+
output_padding_offset=None,
130+
max_input_length=max_input_length,
131+
)
132+
133+
tmp_out = paddle.to_tensor(tmp_out)
134+
cu_seqlens_q = paddle.to_tensor(cu_seqlens_q)
135+
seq_len_this_time = paddle.to_tensor(seq_len_this_time)
136+
seq_lens_decoder = paddle.to_tensor(seq_lens_decoder)
137+
seq_lens_encoder = paddle.to_tensor(seq_lens_encoder)
138+
139+
out_no_offset = rebuild_padding(
140+
tmp_out,
141+
cu_seqlens_q,
142+
seq_len_this_time,
143+
seq_lens_decoder,
144+
seq_lens_encoder,
145+
None,
146+
max_input_length,
147+
)
148+
np.testing.assert_allclose(out_no_offset.numpy(), out_no_offset_ref)
149+
150+
# test with offset
151+
def test_rebuild_padding_with_offset(self):
152+
paddle.seed(42)
153+
token_num = 100
154+
dim_embed = 256
155+
# bsz = 4
156+
max_input_length = 512
157+
# tmp_out: [token_num, dim_embed]
158+
tmp_out = np.random.randn(token_num, dim_embed).astype(np.float32)
159+
# cu_seqlens_q: [bsz + 1],accumulate the number of tokens for each batch.
160+
cu_seqlens_q = np.array(
161+
[0, 1, 21, 22, 42, 43, 63, 64, 84], dtype=np.int32
162+
) # Assume there are 4 batches, and the total token_num = 100.
163+
164+
# Simulated sequence length information
165+
seq_len_this_time = np.array([1, 20, 1, 20, 1, 20, 1, 20], dtype=np.int32)
166+
seq_lens_encoder = np.array([0, 20, 0, 20, 0, 20, 0, 20], dtype=np.int32)
167+
seq_lens_decoder = np.array([21, 0, 21, 0, 21, 0, 21, 0], dtype=np.int32)
168+
169+
num_output_tokens = 80
170+
output_padding_offset = np.random.randint(0, 10, [num_output_tokens], dtype=np.int32)
171+
out_with_offset_ref = rebuild_padding_ref(
172+
tmp_out=tmp_out,
173+
cu_seqlens_q=cu_seqlens_q,
174+
seq_len_this_time=seq_len_this_time,
175+
seq_lens_decoder=seq_lens_decoder,
176+
seq_lens_encoder=seq_lens_encoder,
177+
output_padding_offset=output_padding_offset,
178+
max_input_length=max_input_length,
179+
)
180+
181+
tmp_out = paddle.to_tensor(tmp_out)
182+
cu_seqlens_q = paddle.to_tensor(cu_seqlens_q)
183+
seq_len_this_time = paddle.to_tensor(seq_len_this_time)
184+
seq_lens_decoder = paddle.to_tensor(seq_lens_decoder)
185+
seq_lens_encoder = paddle.to_tensor(seq_lens_encoder)
186+
output_padding_offset = paddle.to_tensor(output_padding_offset)
187+
out_with_offset = rebuild_padding(
188+
tmp_out,
189+
cu_seqlens_q,
190+
seq_len_this_time,
191+
seq_lens_decoder,
192+
seq_lens_encoder,
193+
output_padding_offset,
194+
max_input_length,
195+
)
196+
np.testing.assert_allclose(out_with_offset.numpy(), out_with_offset_ref)
197+
198+
199+
if __name__ == "__main__":
200+
unittest.main()

0 commit comments

Comments
 (0)