Skip to content

Commit c07a0b5

Browse files
committed
Revert "sched : fix possible use of wrong ids tensor when offloading moe prompt processing (ggml-org#15488)"
This reverts commit 54a241f.
1 parent de93218 commit c07a0b5

File tree

2 files changed

+8
-21
lines changed

2 files changed

+8
-21
lines changed

common/arg.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1757,7 +1757,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
17571757
[](common_params & params) {
17581758
params.warmup = false;
17591759
}
1760-
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_RETRIEVAL, LLAMA_EXAMPLE_PERPLEXITY}));
1760+
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_RETRIEVAL}));
17611761
add_opt(common_arg(
17621762
{"--spm-infill"},
17631763
string_format(

ggml/src/ggml-backend.cpp

Lines changed: 7 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1361,15 +1361,15 @@ static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t s
13611361
std::vector<int32_t> ids;
13621362
std::vector<ggml_bitset_t> used_ids;
13631363

1364-
for (int split_id = 0; split_id < sched->n_splits; split_id++) {
1365-
struct ggml_backend_sched_split * split = &splits[split_id];
1364+
for (int i = 0; i < sched->n_splits; i++) {
1365+
struct ggml_backend_sched_split * split = &splits[i];
13661366
int split_backend_id = split->backend_id;
13671367
ggml_backend_t split_backend = sched->backends[split_backend_id];
13681368

13691369
// copy the input tensors to the split backend
1370-
for (int input_id = 0; input_id < split->n_inputs; input_id++) {
1371-
ggml_backend_t input_backend = ggml_backend_sched_get_tensor_backend(sched, split->inputs[input_id]);
1372-
struct ggml_tensor * input = split->inputs[input_id];
1370+
for (int j = 0; j < split->n_inputs; j++) {
1371+
ggml_backend_t input_backend = ggml_backend_sched_get_tensor_backend(sched, split->inputs[j]);
1372+
struct ggml_tensor * input = split->inputs[j];
13731373
struct ggml_tensor * input_cpy = tensor_copy(input, split_backend_id, sched->cur_copy);
13741374

13751375
if (input->flags & GGML_TENSOR_FLAG_INPUT) {
@@ -1404,30 +1404,17 @@ static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t s
14041404

14051405
// get the ids
14061406
ggml_tensor * ids_tensor = node->src[2];
1407-
ggml_backend_t ids_backend = split_backend;
1408-
1409-
// if the ids tensor is also an input of the split, it may not have been copied yet to the split backend
1410-
// in that case, we use the original ids tensor
1411-
for (int i = input_id + 1; i < split->n_inputs; i++) {
1412-
if (ids_tensor == tensor_copy(split->inputs[i], split_backend_id, sched->cur_copy)) {
1413-
ids_tensor = split->inputs[i];
1414-
ids_backend = ggml_backend_sched_get_tensor_backend(sched, split->inputs[i]);
1415-
break;
1416-
}
1417-
}
1418-
14191407
if (ids_tensor != prev_ids_tensor) {
14201408
ids.resize(ggml_nbytes(ids_tensor) / sizeof(int32_t));
1421-
ggml_backend_tensor_get_async(ids_backend, ids_tensor, ids.data(), 0, ggml_nbytes(ids_tensor));
1422-
ggml_backend_synchronize(ids_backend);
1409+
ggml_backend_tensor_get_async(split_backend, ids_tensor, ids.data(), 0, ggml_nbytes(ids_tensor));
1410+
ggml_backend_synchronize(split_backend);
14231411

14241412
// find the used experts
14251413
used_ids.clear();
14261414
used_ids.resize(ggml_bitset_size(n_expert));
14271415
for (int64_t i1 = 0; i1 < ids_tensor->ne[1]; i1++) {
14281416
for (int64_t i0 = 0; i0 < ids_tensor->ne[0]; i0++) {
14291417
int32_t id = ids[i1 * ids_tensor->nb[1]/sizeof(int32_t) + i0 * ids_tensor->nb[0]/sizeof(int32_t)];
1430-
GGML_ASSERT(id >= 0 && id < n_expert);
14311418
ggml_bitset_set(used_ids.data(), id);
14321419
}
14331420
}

0 commit comments

Comments
 (0)