Skip to content

Commit

Permalink
[LibOS] Allocate temporary CPU mask on stack in sched_setaffinity
Browse files Browse the repository at this point in the history
Previously, temporary CPU mask was always allocated on heap. However,
the current LibOS memory allocator exhibits bad performance in
multi-threaded environments as it has a single global lock. This becomes
a performace bottleneck on some heavily multi-threaded workloads like
OpenVINO. This commit introduces a fast path for the common case of
platforms with less than 1024 CPUs, where temporary CPU mask is
allocated on the stack and thus avoids lock contention.

Signed-off-by: Dmitrii Kuvaiskii <[email protected]>
  • Loading branch information
dimakuv committed Sep 16, 2022
1 parent 00e91a0 commit a8e3414
Showing 1 changed file with 25 additions and 8 deletions.
33 changes: 25 additions & 8 deletions libos/src/sys/libos_sched.c
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,8 @@ long libos_syscall_sched_rr_get_interval(pid_t pid, struct timespec* interval) {

long libos_syscall_sched_setaffinity(pid_t pid, unsigned int user_mask_size,
unsigned long* user_mask_ptr) {
int ret;

if (!is_user_memory_readable(user_mask_ptr, user_mask_size)) {
return -EFAULT;
}
Expand All @@ -158,14 +160,28 @@ long libos_syscall_sched_setaffinity(pid_t pid, unsigned int user_mask_size,
get_thread(thread);
}

int ret;
unsigned long* cpu_mask = calloc(GET_CPU_MASK_LEN(), sizeof(*cpu_mask));
if (!cpu_mask) {
ret = -ENOMEM;
goto out;
/* allocate temporary CPU mask on stack for the common case of platforms with <= 1024 CPUs; we
* try to avoid heap allocations because current memory allocator has a global lock */
bool cpu_mask_on_heap;
unsigned long* cpu_mask;
size_t cpu_mask_size = GET_CPU_MASK_LEN() * sizeof(*cpu_mask);

if (cpu_mask_size <= 128) {
/* fast path: allocate on stack if the platform has <= 1024 CPUs */
cpu_mask_on_heap = false;
cpu_mask = __alloca(cpu_mask_size);
} else {
/* slow path: allocate on heap if the platform has > 1024 CPUs */
cpu_mask_on_heap = true;
cpu_mask = malloc(cpu_mask_size);
if (!cpu_mask) {
ret = -ENOMEM;
goto out;
}
}

memcpy(cpu_mask, user_mask_ptr, MIN(user_mask_size, GET_CPU_MASK_LEN() * sizeof(*cpu_mask)));
memset(cpu_mask, 0, cpu_mask_size);
memcpy(cpu_mask, user_mask_ptr, MIN(user_mask_size, cpu_mask_size));

bool seen_online = false;
size_t threads_count = g_pal_public_state->topo_info.threads_cnt;
Expand Down Expand Up @@ -195,13 +211,14 @@ long libos_syscall_sched_setaffinity(pid_t pid, unsigned int user_mask_size,
goto out_unlock;
}

memcpy(thread->cpu_affinity_mask, cpu_mask, GET_CPU_MASK_LEN() * sizeof(*cpu_mask));
memcpy(thread->cpu_affinity_mask, cpu_mask, cpu_mask_size);
ret = 0;

out_unlock:
unlock(&thread->lock);
out:
free(cpu_mask);
if (cpu_mask_on_heap)
free(cpu_mask);
put_thread(thread);
return ret;
}
Expand Down

0 comments on commit a8e3414

Please sign in to comment.