Skip to content

Commit 84853b9

Browse files
author
Iwan Kawrakow
committed
Better concat for contiguous tensors
If all the op does is to concatenate the second tensor to the first, why would we want to have a loop?
1 parent 285b97b commit 84853b9

File tree

1 file changed

+20
-0
lines changed

1 file changed

+20
-0
lines changed

ggml/src/ggml.c

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12627,6 +12627,26 @@ static void ggml_compute_forward_concat_f32(
1262712627

1262812628
GGML_ASSERT(dim >= 0 && dim < 4);
1262912629

12630+
if (ggml_is_contiguous(src0) && ggml_is_contiguous(src1) && ggml_is_contiguous(dst) &&
12631+
(dim == 3 || (dim == 2 && dst->ne[3] == 1) || (dim == 1 && dst->ne[2]*dst->ne[3] == 1))) {
12632+
// simply copy the data
12633+
const int64_t size_src_0 = ggml_nbytes(src0);
12634+
const int64_t size_src_1 = ggml_nbytes(src1);
12635+
const int64_t block_size = 4096;
12636+
const int64_t num_blocks = (size_src_0 + size_src_1 + block_size - 1)/block_size;
12637+
for (int64_t i_block = ith; i_block < num_blocks; i_block += nth) {
12638+
const int64_t start = i_block*block_size;
12639+
if (start < size_src_0) {
12640+
int64_t copy_size = MIN(block_size, size_src_0 - start);
12641+
memcpy((char *)dst->data + start, (char *)src0->data + start, copy_size);
12642+
} else {
12643+
int64_t copy_size = MIN(block_size, size_src_0 + size_src_1 - start);
12644+
memcpy((char *)dst->data + start, (char *)src1->data + start - size_src_0, copy_size);
12645+
}
12646+
}
12647+
return;
12648+
}
12649+
1263012650
int64_t o[4] = {0, 0, 0, 0};
1263112651
o[dim] = src0->ne[dim];
1263212652

0 commit comments

Comments
 (0)