Skip to content

Commit 1bafdcc

Browse files
committed
linux: add support for numa set_mempolicy(2)
mempolicy.c: - new file - add CPP info to match userland vs kernel interface versioning to avoid manual tracking of numaif interface - provides libcrun_set_mempolicy as wrapper to set_mempolicy(2) with all possible error checking and report that can be done before calling set_mempolicy(2). - use libnuma nodemask parser that is actively maintained by numa kernel maintainers to match kernel features and provides accurate error reports based on hw running the code (see also changes to error.c) mempolicy.h: - new file - define libcrun_set_mempolicy mempolicy_internal.h: - new file - define memory policy mode and flags maps to be shared and updated in one single place - numa python bindings are not available on most distros. This makes numa features detection challenging for crun test suite without causing false positives. This header provides shared common definitions with tests_mempolicy_helper.c that is used by the test suite and avoid duplicate code around container.c: - add call to libcrun_set_mempolicy in libcrun_container_run_internal error.c: - override numa_warn WEAK symbol from libnuma to capture numa parser errors and translate them into crun warnings tests/tests_mempolicy_helper.c: - new file - print a list of numa features detected during the build - shares info from mempolicy_internal.h tests/test_mempolicy.py: - new file - add both negative and positive tests for mempolicy.c - tests will run if hw supports numa or skip Makefile.am: - update - changes verified also with make distcheck Signed-off-by: Fabio M. Di Nitto <[email protected]>
1 parent 4fa30e1 commit 1bafdcc

File tree

8 files changed

+831
-2
lines changed

8 files changed

+831
-2
lines changed

Makefile.am

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ libcrun_SOURCES = src/libcrun/utils.c \
6767
src/libcrun/linux.c \
6868
src/libcrun/mount_flags.c \
6969
src/libcrun/scheduler.c \
70+
src/libcrun/mempolicy.c \
7071
src/libcrun/seccomp.c \
7172
src/libcrun/seccomp_notify.c \
7273
src/libcrun/signals.c \
@@ -160,7 +161,7 @@ EXTRA_DIST = COPYING COPYING.libcrun README.md NEWS SECURITY.md rpm/crun.spec au
160161
src/libcrun/custom-handler.h src/libcrun/io_priority.h \
161162
src/libcrun/handlers/handler-utils.h \
162163
src/libcrun/linux.h src/libcrun/utils.h src/libcrun/error.h src/libcrun/criu.h \
163-
src/libcrun/scheduler.h src/libcrun/status.h src/libcrun/terminal.h \
164+
src/libcrun/scheduler.h src/libcrun/mempolicy.h src/libcrun/mempolicy_internal.h src/libcrun/status.h src/libcrun/terminal.h \
164165
src/libcrun/mount_flags.h src/libcrun/intelrdt.h src/libcrun/ring_buffer.h src/libcrun/string_map.h \
165166
src/libcrun/net_device.h \
166167
crun.1.md crun.1 libcrun.lds \
@@ -179,7 +180,7 @@ noinst_PROGRAMS = crun
179180
endif
180181

181182
if BUILD_TESTS
182-
check_PROGRAMS = tests/init $(UNIT_TESTS) tests/tests_libcrun_fuzzer
183+
check_PROGRAMS = tests/init $(UNIT_TESTS) tests/tests_libcrun_fuzzer tests/tests_mempolicy_helper
183184

184185
TESTS_LDADD = libcrun_testing.la $(FOUND_LIBS) $(maybe_libyajl.la)
185186

@@ -212,6 +213,11 @@ tests_tests_libcrun_errors_SOURCES = tests/tests_libcrun_errors.c
212213
tests_tests_libcrun_errors_LDADD = $(TESTS_LDADD)
213214
tests_tests_libcrun_errors_LDFLAGS = $(crun_LDFLAGS)
214215

216+
tests_tests_mempolicy_helper_CFLAGS = -I $(abs_top_builddir)/libocispec/src -I $(abs_top_srcdir)/libocispec/src -I $(abs_top_builddir)/src/libcrun -I $(abs_top_srcdir)/src/libcrun
217+
tests_tests_mempolicy_helper_SOURCES = tests/tests_mempolicy_helper.c
218+
tests_tests_mempolicy_helper_LDADD = $(TESTS_LDADD)
219+
tests_tests_mempolicy_helper_LDFLAGS = $(crun_LDFLAGS)
220+
215221
endif
216222
TEST_EXTENSIONS = .py
217223
PY_LOG_COMPILER = $(PYTHON)
@@ -225,6 +231,7 @@ PYTHON_TESTS = tests/test_capabilities.py \
225231
tests/test_hostname.py \
226232
tests/test_limits.py \
227233
tests/test_oci_features.py \
234+
tests/test_mempolicy.py \
228235
tests/test_mounts.py \
229236
tests/test_paths.py \
230237
tests/test_pid.py \

src/libcrun/container.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include "container.h"
2424
#include "utils.h"
2525
#include "seccomp.h"
26+
#include "mempolicy.h"
2627
#ifdef HAVE_SECCOMP
2728
# include <seccomp.h>
2829
#endif
@@ -2805,6 +2806,10 @@ libcrun_container_run_internal (libcrun_container_t *container, libcrun_context_
28052806
if (UNLIKELY (ret < 0))
28062807
return ret;
28072808

2809+
ret = libcrun_set_mempolicy (def, err);
2810+
if (UNLIKELY (ret < 0))
2811+
return ret;
2812+
28082813
ret = libcrun_configure_handler (container_args.context->handler_manager,
28092814
container_args.context,
28102815
container,

src/libcrun/error.c

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -480,6 +480,23 @@ libcrun_warning (const char *msg, ...)
480480
va_end (args_list);
481481
}
482482

483+
#ifdef HAVE_NUMA
484+
/* override libnuma internal numa_warn implementation
485+
* that is defined as WEAK to allow consumers to define
486+
* their own behavior.
487+
* symbol has to be public for linker to use our version
488+
* and allow to convert numa messages into libcrun messages */
489+
LIBCRUN_PUBLIC
490+
void
491+
numa_warn (int number arg_unused, char *msg, ...)
492+
{
493+
va_list args_list;
494+
va_start (args_list, msg);
495+
write_log (0, LIBCRUN_VERBOSITY_WARNING, msg, args_list);
496+
va_end (args_list);
497+
}
498+
#endif
499+
483500
void
484501
libcrun_error (int errno_, const char *msg, ...)
485502
{

src/libcrun/mempolicy.c

Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
/*
2+
* crun - OCI runtime written in C
3+
*
4+
* Copyright (C) 2017, 2018, 2019 Giuseppe Scrivano <[email protected]>
5+
* crun is free software; you can redistribute it and/or modify
6+
* it under the terms of the GNU Lesser General Public License as published by
7+
* the Free Software Foundation; either version 2.1 of the License, or
8+
* (at your option) any later version.
9+
*
10+
* crun is distributed in the hope that it will be useful,
11+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13+
* GNU Lesser General Public License for more details.
14+
*
15+
* You should have received a copy of the GNU Lesser General Public License
16+
* along with crun. If not, see <http://www.gnu.org/licenses/>.
17+
*/
18+
19+
#include <config.h>
20+
#include "linux.h"
21+
#include "utils.h"
22+
#include <ocispec/runtime_spec_schema_config_schema.h>
23+
24+
#ifdef HAVE_NUMA
25+
# include <numa.h>
26+
# include <numaif.h>
27+
# include "mempolicy_internal.h"
28+
29+
# define CRUN_NUMA_API_VERSION 2 /* numa.h LIBNUMA_API_VERSION at the time of writing */
30+
# define CRUN_NUMA_MPOL_MAX 7 /* numaif.h MPOL_MAX at the time of writing */
31+
32+
# ifndef LIBNUMA_API_VERSION
33+
# error "Unable to determine libnuma api version"
34+
# else
35+
# if LIBNUMA_API_VERSION > CRUN_NUMA_API_VERSION
36+
# warning "This code was written with libnuma API version 2. numa.h reports a higher version"
37+
# endif
38+
# endif
39+
# ifndef MPOL_MAX
40+
# error "Unable to determine numaif interface version"
41+
# else
42+
# if MPOL_MAX > CRUN_NUMA_MPOL_MAX
43+
# warning "This code was written with numaif MPOL_MAX 7. numaif.h reports a higher version"
44+
# endif
45+
# endif
46+
47+
static int
48+
mpol_str2int (const char *str, const str2int_map_t *map)
49+
{
50+
int idx = 0;
51+
52+
while (map[idx].name != NULL)
53+
{
54+
if (! strcmp (map[idx].name, str))
55+
{
56+
return map[idx].value;
57+
}
58+
idx++;
59+
}
60+
61+
errno = EINVAL;
62+
return -1;
63+
}
64+
#endif
65+
66+
int
67+
libcrun_set_mempolicy (runtime_spec_schema_config_schema *def, libcrun_error_t *err)
68+
{
69+
#ifdef HAVE_NUMA
70+
runtime_spec_schema_config_linux_memory_policy *memory_policy = NULL;
71+
int mpol_mode = 0;
72+
int mpol_flag = 0;
73+
int mpol_mode_flags = 0;
74+
struct bitmask *nodemask = NULL;
75+
size_t i = 0;
76+
int ret = 0;
77+
int savederrno = 0;
78+
79+
libcrun_debug ("Initializing linux numa mempolicy");
80+
81+
if (def->linux && def->linux->memory_policy)
82+
{
83+
memory_policy = def->linux->memory_policy;
84+
85+
libcrun_debug ("Checking hardware numa availability");
86+
if (numa_available () < 0)
87+
{
88+
return crun_make_error (err, ENOENT, "linux numa not supported on current hardware");
89+
}
90+
91+
libcrun_debug ("Validating linux numa mempolicy");
92+
93+
/* validate memory policy mode */
94+
if (! memory_policy->mode)
95+
{
96+
return crun_make_error (err, EINVAL, "linux numa mempolicy mode is missing from the configuration");
97+
}
98+
libcrun_debug ("Validating mode: %s", memory_policy->mode);
99+
mpol_mode = mpol_str2int (memory_policy->mode, mpol_mode_map);
100+
if (mpol_mode < 0)
101+
{
102+
return crun_make_error (err, EINVAL, "Requested linux numa mempolicy mode '%s' is unknown", memory_policy->mode);
103+
}
104+
mpol_mode_flags = mpol_mode;
105+
106+
/* both MPOL_DEFAULT and MPOL_LOCAL calls to set_mempolicy expects only
107+
* the mpol_mode, no nodemask or flags */
108+
if (mpol_mode != MPOL_DEFAULT && mpol_mode != MPOL_LOCAL)
109+
{
110+
/* validating memory policy flags */
111+
libcrun_debug ("Validating mode flags: %zu configured", memory_policy->flags_len);
112+
for (i = 0; i < memory_policy->flags_len; i++)
113+
{
114+
libcrun_debug ("Validating mode flag: %s", memory_policy->flags[i]);
115+
mpol_flag = mpol_str2int (memory_policy->flags[i], mpol_flag_map);
116+
if (mpol_flag < 0)
117+
{
118+
return crun_make_error (err, EINVAL, "Requested linux numa mempolicy flag '%s' is unknown", memory_policy->flags[i]);
119+
}
120+
mpol_mode_flags = mpol_mode_flags | mpol_flag;
121+
}
122+
123+
/* sanity check mode and flags combinations */
124+
# if defined MPOL_F_NUMA_BALANCING
125+
if ((mpol_mode_flags & MPOL_F_NUMA_BALANCING) && mpol_mode != MPOL_BIND)
126+
{
127+
return crun_make_error (err, EINVAL, "Requested linux numa mempolicy flag MPOL_F_NUMA_BALANCING is incompatible with %s", memory_policy->mode);
128+
}
129+
# endif
130+
# if defined MPOL_F_RELATIVE_NODES && defined MPOL_F_STATIC_NODES
131+
if ((mpol_mode_flags & MPOL_F_RELATIVE_NODES) && (mpol_mode_flags & MPOL_F_STATIC_NODES))
132+
{
133+
return crun_make_error (err, EINVAL, "Requested linux numa mempolicy flag MPOL_F_RELATIVE_NODES and MPOL_F_STATIC_NODES cannot be combined");
134+
}
135+
# endif
136+
/* validate memory nodes */
137+
if (! memory_policy->nodes)
138+
{
139+
return crun_make_error (err, EINVAL, "linux numa mempolicy nodes is missing from the configuration");
140+
}
141+
libcrun_debug ("Validating nodes: %s", memory_policy->nodes);
142+
/* validation is done by libnuma based on hw environment
143+
* and numa_warn symbol is overridden in error.c to convert
144+
* numa logging to libcrun logging */
145+
nodemask = numa_parse_nodestring_all (memory_policy->nodes);
146+
if (! nodemask)
147+
{
148+
return crun_make_error (err, EINVAL, "numa_parse_nodestring_all validation failed");
149+
}
150+
151+
ret = set_mempolicy (mpol_mode_flags, nodemask->maskp, nodemask->size - 1);
152+
savederrno = errno;
153+
numa_bitmask_free (nodemask);
154+
errno = savederrno;
155+
}
156+
else
157+
{
158+
ret = set_mempolicy (mpol_mode, NULL, 0);
159+
}
160+
161+
if (ret < 0)
162+
{
163+
return crun_make_error (err, errno, "set_mempolicy: %d errno: %d\n", ret, errno);
164+
}
165+
}
166+
else
167+
{
168+
libcrun_debug ("no linux numa mempolicy configuration found");
169+
}
170+
#endif
171+
return ret;
172+
}

src/libcrun/mempolicy.h

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
/*
2+
* crun - OCI runtime written in C
3+
*
4+
* Copyright (C) 2017, 2018, 2019, 2021 Giuseppe Scrivano <[email protected]>
5+
* crun is free software; you can redistribute it and/or modify
6+
* it under the terms of the GNU Lesser General Public License as published by
7+
* the Free Software Foundation; either version 2.1 of the License, or
8+
* (at your option) any later version.
9+
*
10+
* crun is distributed in the hope that it will be useful,
11+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13+
* GNU Lesser General Public License for more details.
14+
*
15+
* You should have received a copy of the GNU Lesser General Public License
16+
* along with crun. If not, see <http://www.gnu.org/licenses/>.
17+
*/
18+
#ifndef MEMPOLICY_H
19+
#define MEMPOLICY_H
20+
#include <config.h>
21+
#include "error.h"
22+
#include "container.h"
23+
#include "status.h"
24+
25+
int libcrun_set_mempolicy (runtime_spec_schema_config_schema *def, libcrun_error_t *err);
26+
27+
#endif

src/libcrun/mempolicy_internal.h

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
/*
2+
* crun - OCI runtime written in C
3+
*
4+
* Copyright (C) 2017, 2018, 2019, 2021 Giuseppe Scrivano <[email protected]>
5+
* crun is free software; you can redistribute it and/or modify
6+
* it under the terms of the GNU Lesser General Public License as published by
7+
* the Free Software Foundation; either version 2.1 of the License, or
8+
* (at your option) any later version.
9+
*
10+
* crun is distributed in the hope that it will be useful,
11+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13+
* GNU Lesser General Public License for more details.
14+
*
15+
* You should have received a copy of the GNU Lesser General Public License
16+
* along with crun. If not, see <http://www.gnu.org/licenses/>.
17+
*/
18+
#ifndef MEMPOLICY_INTERNAL_H
19+
#define MEMPOLICY_INTERNAL_H
20+
21+
#include <numaif.h>
22+
23+
typedef struct
24+
{
25+
const char *name;
26+
int value;
27+
} str2int_map_t;
28+
29+
/* update mpol_mode_map based on numaif.h MPOL_MAX
30+
* the warn in mempolicy.c will indicate that an update is required.
31+
* MPOL_WEIGHTED_INTERLEAVE has been introduced in MPOL_MAX 7 (kernel 6.9+)
32+
* and some distros still has older kernel interfaces */
33+
str2int_map_t mpol_mode_map[] = {
34+
{ "MPOL_DEFAULT", MPOL_DEFAULT },
35+
{ "MPOL_PREFERRED", MPOL_PREFERRED },
36+
{ "MPOL_BIND", MPOL_BIND },
37+
{ "MPOL_INTERLEAVE", MPOL_INTERLEAVE },
38+
{ "MPOL_LOCAL", MPOL_LOCAL },
39+
{ "MPOL_PREFERRED_MANY", MPOL_PREFERRED_MANY },
40+
#ifdef MPOL_WEIGHTED_INTERLEAVE
41+
{ "MPOL_WEIGHTED_INTERLEAVE", MPOL_WEIGHTED_INTERLEAVE },
42+
#endif
43+
{ NULL, -1 }
44+
};
45+
46+
/* flags cannot be tracked the same way as mode */
47+
str2int_map_t mpol_flag_map[] = {
48+
#ifdef MPOL_F_NUMA_BALANCING
49+
{ "MPOL_F_NUMA_BALANCING", MPOL_F_NUMA_BALANCING },
50+
#endif
51+
#ifdef MPOL_F_RELATIVE_NODES
52+
{ "MPOL_F_RELATIVE_NODES", MPOL_F_RELATIVE_NODES },
53+
#endif
54+
#ifdef MPOL_F_STATIC_NODES
55+
{ "MPOL_F_STATIC_NODES", MPOL_F_STATIC_NODES },
56+
#endif
57+
{ NULL, -1 }
58+
};
59+
60+
#endif

0 commit comments

Comments
 (0)