Skip to content

Commit

Permalink
not_need_stop to cpu
Browse files Browse the repository at this point in the history
  • Loading branch information
yuanlehome committed Feb 24, 2025
1 parent 3102788 commit 2fb3378
Show file tree
Hide file tree
Showing 5 changed files with 13 additions and 129 deletions.
7 changes: 3 additions & 4 deletions csrc/gpu/save_with_output_msg.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,20 +27,19 @@ struct msgdata {
};

void SaveOutMmsg(const paddle::Tensor& x,
const paddle::Tensor& not_need_stop,
const paddle::Tensor& not_need_stop, // cpu
int64_t rank_id) {
if (rank_id > 0) return;
auto x_cpu = x.copy_to(paddle::CPUPlace(), false);
int64_t *x_data = x_cpu.data<int64_t>();
auto not_need_stop_cpu = not_need_stop.copy_to(paddle::CPUPlace(), false);
bool* not_need_stop_data = not_need_stop_cpu.data<bool>();
auto not_need_stop_data = not_need_stop.data<bool>()[0];

static struct msgdata msg_sed;
static key_t key = ftok("./", 1);
static int msgid = msgget(key, IPC_CREAT | 0666);

msg_sed.mtype = 1;
msg_sed.mtext[0] = not_need_stop_data[0] ? 1 : -1;
msg_sed.mtext[0] = not_need_stop_data ? 1 : -1;
int bsz = x.shape()[0];
msg_sed.mtext[1] = bsz;
for (int i = 2; i < bsz + 2; i++) {
Expand Down
121 changes: 0 additions & 121 deletions csrc/gpu/update_inputs.cu

This file was deleted.

11 changes: 9 additions & 2 deletions csrc/gpu/update_inputs_v2.cu
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ __global__ void update_inputs_kernel_v2(

void UpdateInputesV2(const paddle::Tensor& stop_flags,
const paddle::Tensor& step_idx,
const paddle::Tensor& not_need_stop,
const paddle::Tensor& not_need_stop, // cpu
const paddle::Tensor& seq_lens_this_time,
const paddle::Tensor& seq_lens_encoder,
const paddle::Tensor& seq_lens_decoder,
Expand All @@ -125,8 +125,11 @@ void UpdateInputesV2(const paddle::Tensor& stop_flags,
const int now_bsz = seq_lens_this_time.shape()[0];
const int input_ids_stride = input_ids.shape()[1];
const int end_length = end_ids.shape()[0];

auto not_need_stop_gpu = not_need_stop.copy_to(stop_flags.place(), false);

update_inputs_kernel_v2<1024><<<1, 1024, 0, input_ids.stream()>>>(
const_cast<bool*>(not_need_stop.data<bool>()),
const_cast<bool*>(not_need_stop_gpu.data<bool>()),
const_cast<int64_t*>(step_idx.data<int64_t>()),
const_cast<bool*>(stop_flags.data<bool>()),
const_cast<int*>(seq_lens_this_time.data<int>()),
Expand All @@ -144,6 +147,10 @@ void UpdateInputesV2(const paddle::Tensor& stop_flags,
input_ids_stride,
end_length
);

auto not_need_stop_cpu = not_need_stop_gpu.copy_to(not_need_stop.place(), false);
bool *not_need_stop_data = const_cast<bool*>(not_need_stop.data<bool>());
not_need_stop_data[0] = not_need_stop_cpu.data<bool>()[0];
}

PD_BUILD_OP(update_inputs_v2)
Expand Down
1 change: 0 additions & 1 deletion csrc/setup_cuda.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,6 @@ def get_gencode_flags():
"./gpu/rebuild_padding_v2.cu",
"./gpu/set_value_by_flags_v2.cu",
"./gpu/stop_generation_multi_ends_v2.cu",
"./gpu/update_inputs.cu",
"./gpu/get_output.cc",
"./gpu/save_with_output_msg.cc",
"./gpu/write_int8_cache_kv.cu",
Expand Down
2 changes: 1 addition & 1 deletion llm/predict/predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -979,7 +979,7 @@ def _preprocess(self, input_text: list[str]):
shape=[self.config.batch_size, 1], fill_value=0, dtype="int32"
)
self.model_inputs["step_idx"] = paddle.full(shape=[self.config.batch_size, 1], fill_value=0, dtype="int64")
self.model_inputs["not_need_stop"] = paddle.full(shape=[1], fill_value=True, dtype="bool")
self.model_inputs["not_need_stop"] = paddle.full(shape=[1], fill_value=True, dtype="bool").cpu() # cpu
self.model_inputs["stop_flags"] = paddle.full(
shape=[self.config.batch_size, 1], fill_value=False, dtype="bool"
)
Expand Down

0 comments on commit 2fb3378

Please sign in to comment.