From 3e778e576f25ad2e497e8651b919487cfb9fc076 Mon Sep 17 00:00:00 2001 From: Dmitrii Kuvaiskii Date: Tue, 10 Aug 2021 09:28:36 -0700 Subject: [PATCH] Add `loader.env.[ENVIRON] = {passthrough=true}` manifest option It is now possible to define some envvars that are passed through from the host inside the Gramine instance (previously the only way to pass envvars was to set `loader.insecure__use_host_env`). A new LibOS regression test is added. Signed-off-by: Dmitrii Kuvaiskii --- Documentation/manifest-syntax.rst | 28 +- LibOS/shim/test/regression/.gitignore | 1 + LibOS/shim/test/regression/Makefile | 1 + .../env_passthrough.manifest.template | 22 ++ LibOS/shim/test/regression/test_libos.py | 22 +- Pal/src/db_main.c | 266 ++++++++++++++---- 6 files changed, 272 insertions(+), 68 deletions(-) create mode 100644 LibOS/shim/test/regression/env_passthrough.manifest.template diff --git a/Documentation/manifest-syntax.rst b/Documentation/manifest-syntax.rst index 73585bab03..d86a30a28a 100644 --- a/Documentation/manifest-syntax.rst +++ b/Documentation/manifest-syntax.rst @@ -156,10 +156,20 @@ both of the following options: :: loader.env.[ENVIRON] = "[VALUE]" + or + loader.env.[ENVIRON] = { value = "[VALUE]" } + or + loader.env.[ENVIRON] = { passthrough = true } + loader.env_src_file = "file:file_with_serialized_envs" -``loader.env.[ENVIRON]`` adds/overwrites a single environment variable and can -be used multiple times to specify more than one variable. +``loader.env.[ENVIRON]`` adds/overwrites/passes a single environment variable +and can be used multiple times to specify more than one variable. To +add/overwrite the environment variable, specify a TOML string (``"[VALUE]"``) or +a TOML table with the key-value pair ``{ value = "[VALUE]" }``. To pass the +environment variable from the host, specify a TOML table with the key-value pair +``{ passthrough = true }``. If you specify a variable, it needs to either have a +value or be a passthrough. ``loader.env_src_file`` allows to specify a URI to a file containing serialized environment, which can be generated using :program:`gramine-argv-serializer`. @@ -174,7 +184,17 @@ provided at runtime from an external (trusted) source. variables. If the same variable is set in both, then ``loader.env.[ENVIRON]`` takes -precedence. +precedence. It is prohibited to specify both ``value`` and ``passthrough`` keys +for the same environment variable. If manifest option ``insecure__use_host_env`` +is specified, then ``passthrough = true`` manifest options have no effect (they +are "consumed" by ``insecure__use_host_env``). + +.. note :: + It is tempting to try to passthrough all environment variables using + ``insecure__use_host_env`` and then disallow some of them using ``passthrough + = false``. However, this deny list approach is intentionally prohibited + because it's inherently insecure (doesn't provide any real security). + Gramine loudly fails if ``passthrough = false`` manifest options are set. Disabling ASLR ^^^^^^^^^^^^^^ @@ -308,7 +328,7 @@ Gramine currently supports two types of mount points: * ``chroot``: Host-backed files. All host files and sub-directories found under ``[URI]`` are forwarded to the Gramine instance and placed under ``[PATH]``. For example, with a host-level path specified as - ``fs.mount.lib.uri = "file:/one/path/"`` and forwarded to Graphene via + ``fs.mount.lib.uri = "file:/one/path/"`` and forwarded to Gramine via ``fs.mount.lib.path = "/another/path"``, a host-level file ``/one/path/file`` is visible to graphenized application as ``/another/path/file``. This concept is similar to FreeBSD's chroot and to diff --git a/LibOS/shim/test/regression/.gitignore b/LibOS/shim/test/regression/.gitignore index 423e9a7d1c..f678c9f5fb 100644 --- a/LibOS/shim/test/regression/.gitignore +++ b/LibOS/shim/test/regression/.gitignore @@ -25,6 +25,7 @@ /double_fork /env_from_file /env_from_host +/env_passthrough /epoll_epollet /epoll_wait_timeout /eventfd diff --git a/LibOS/shim/test/regression/Makefile b/LibOS/shim/test/regression/Makefile index aaaaa63ad5..52e2b0229f 100644 --- a/LibOS/shim/test/regression/Makefile +++ b/LibOS/shim/test/regression/Makefile @@ -107,6 +107,7 @@ repo_manifests = \ device_passthrough.manifest \ env_from_file.manifest \ env_from_host.manifest \ + env_passthrough.manifest \ file_check_policy_allow_all_but_log.manifest \ file_check_policy_strict.manifest \ multi_pthread_exitless.manifest diff --git a/LibOS/shim/test/regression/env_passthrough.manifest.template b/LibOS/shim/test/regression/env_passthrough.manifest.template new file mode 100644 index 0000000000..eb97508df8 --- /dev/null +++ b/LibOS/shim/test/regression/env_passthrough.manifest.template @@ -0,0 +1,22 @@ +loader.preload = "file:{{ gramine.libos }}" +loader.argv0_override = "bootstrap" +libos.entrypoint = "bootstrap" + +loader.env.LD_LIBRARY_PATH = "/lib" + +loader.env.A = { passthrough = true } +loader.env.B = { value = "OVERWRITTEN_VALUE" } + +# loader.env.C = { passthrough = false } # not allowed for security reasons +# loader.env.E = { passthrough = true, value = "THIS_IS_INCORRECT_SYNTAX" } + +fs.mount.lib.type = "chroot" +fs.mount.lib.path = "/lib" +fs.mount.lib.uri = "file:{{ gramine.runtimedir() }}" + +sgx.nonpie_binary = true + +sgx.trusted_files = [ + "file:{{ gramine.runtimedir() }}/", + "file:bootstrap", +] diff --git a/LibOS/shim/test/regression/test_libos.py b/LibOS/shim/test/regression/test_libos.py index 21c62b73a6..03e6d22e00 100644 --- a/LibOS/shim/test/regression/test_libos.py +++ b/LibOS/shim/test/regression/test_libos.py @@ -112,15 +112,33 @@ def test_104_env_from_file(self): finally: os.remove('env_test_input') + def test_105_env_passthrough(self): + host_envs = { + 'A': 'THIS_WILL_BE_PASSED', + 'B': 'THIS_WILL_BE_OVERWRITTEN', + 'C': 'THIS_SHOULDNT_BE_PASSED', + 'D': 'THIS_SHOULDNT_BE_PASSED_TOO', + } + manifest_envs = {'LD_LIBRARY_PATH': '/lib'} + stdout, _ = self.run_binary(['env_passthrough'], env=host_envs) + self.assertIn('# of envs: %d\n' % (len(host_envs) - 2 + len(manifest_envs)), stdout) + + # We don't enforce any specific order of envs, so we skip checking the index. + self.assertIn('] = LD_LIBRARY_PATH=/lib\n', stdout) + self.assertIn('] = A=THIS_WILL_BE_PASSED\n', stdout) + self.assertIn('] = B=OVERWRITTEN_VALUE\n', stdout) + self.assertNotIn('] = C=THIS_SHOULDNT_BE_PASSED\n', stdout) + self.assertNotIn('] = D=THIS_SHOULDNT_BE_PASSED_TOO\n', stdout) + @unittest.skipUnless(HAS_SGX, 'This test is only meaningful on SGX PAL because only SGX catches raw ' 'syscalls and redirects to Gramine\'s LibOS. If we will add seccomp to ' 'Linux PAL, then we should allow this test on Linux PAL as well.') - def test_105_basic_bootstrapping_static(self): + def test_106_basic_bootstrapping_static(self): stdout, _ = self.run_binary(['bootstrap_static']) self.assertIn('Hello world (bootstrap_static)!', stdout) - def test_106_basic_bootstrapping_pie(self): + def test_107_basic_bootstrapping_pie(self): stdout, _ = self.run_binary(['bootstrap_pie']) self.assertIn('User program started', stdout) self.assertIn('Local Address in Executable: 0x', stdout) diff --git a/Pal/src/db_main.c b/Pal/src/db_main.c index 87176028bf..cea1ae255d 100644 --- a/Pal/src/db_main.c +++ b/Pal/src/db_main.c @@ -63,91 +63,227 @@ static void load_libraries(void) { } } +/* Finds `loader.env.{key}` in the manifest and returns its value in `out_val`/`out_passthrough`. + * Recognizes several formats: + * - `loader.env.{key} = "val"` + * - `loader.env.{key} = { value = "val" }` + * - `loader.env.{key} = { passthrough = true|false }` + * + * If `loader.env.{key}` doesn't exist, then returns 0 and `*out_exists = false`. If the key exists, + * then `*out_exists = true` and: + * - if passthrough is specified, then `*out_val` is NULL and `*out_passthrough` is true/false; + * - if the value is specified, then `*out_val` is set and `*out_passthrough` is false. + * + * For any other format, returns negative error code. The caller is responsible to free `out_val`. + */ +static int get_env_value_from_manifest(toml_table_t* toml_envs, const char* key, bool* out_exists, + char** out_val, bool* out_passthrough) { + int ret; + + assert(out_exists); + assert(out_val); + assert(out_passthrough); + + toml_table_t* toml_env_table = toml_table_in(toml_envs, key); + if (!toml_env_table) { + /* not table like `loader.env.key = {...}`, must be string like `loader.env.key = "val"` */ + toml_raw_t toml_env_val_raw = toml_raw_in(toml_envs, key); + if (!toml_env_val_raw) { + *out_val = NULL; + *out_passthrough = false; + *out_exists = false; + return 0; + } + + ret = toml_rtos(toml_env_val_raw, out_val); + if (ret < 0) + return -PAL_ERROR_NOMEM; + + *out_passthrough = false; + *out_exists = true; + return 0; + } + + toml_raw_t toml_passthrough_raw = toml_raw_in(toml_env_table, "passthrough"); + toml_raw_t toml_value_raw = toml_raw_in(toml_env_table, "value"); + + if (toml_passthrough_raw && toml_value_raw) { + /* disallow `loader.env.key = { passthrough = true, value = "val" }` */ + return -PAL_ERROR_INVAL; + } + + if (!toml_passthrough_raw) { + /* not passthrough like `loader.env.key = { passthrough = true }`, must be value-table like + * `loader.env.key = { value = "val" }` */ + if (!toml_value_raw) + return -PAL_ERROR_INVAL; + + ret = toml_rtos(toml_value_raw, out_val); + if (ret < 0) + return -PAL_ERROR_NOMEM; + + *out_passthrough = false; + *out_exists = true; + return 0; + } + + /* at this point, it must be `loader.env.key = { passthrough = true|false }`*/ + int passthrough_int; + ret = toml_rtob(toml_passthrough_raw, &passthrough_int); + if (ret < 0) + return -PAL_ERROR_INVAL; + + *out_passthrough = !!passthrough_int; + *out_val = NULL; + *out_exists = true; + return 0; +} + +static int create_empty_envs(const char*** out_envp) { + const char** new_envp = malloc(sizeof(*new_envp)); + if (!new_envp) + return -PAL_ERROR_NOMEM; + + new_envp[0] = NULL; + *out_envp = new_envp; + return 0; +} + /* This function leaks memory on failure (and this is non-trivial to fix), but the assumption is * that its failure finishes the execution of the whole process right away. */ -static int insert_envs_from_manifest(const char*** envpp) { +static int deep_copy_envs(const char** envp, const char*** out_envp) { + size_t orig_envs_cnt = 0; + for (const char** orig_env = envp; *orig_env; orig_env++) + orig_envs_cnt++; + + const char** new_envp = calloc(orig_envs_cnt + 1, sizeof(const char*)); + if (!new_envp) + return -PAL_ERROR_NOMEM; + + size_t idx = 0; + for (const char** orig_env = envp; *orig_env; orig_env++) { + new_envp[idx] = strdup(*orig_env); + if (!new_envp[idx]) + return -PAL_ERROR_NOMEM; + idx++; + } + assert(idx == orig_envs_cnt); + + *out_envp = new_envp; + return 0; +} + +/* Build environment for Gramine process, based on original environment (from host or file) and + * manifest. If `propagate` is true, copies all variables from `orig_envp`, except the ones + * overriden in manifest. Otherwise, copies only the variables from `orig_envp` that the manifest + * specifies as passthrough. + * + * This function leaks memory on failure (and this is non-trivial to fix), but the assumption is + * that its failure finishes the execution of the whole process right away. */ +static int build_envs(const char** orig_envp, bool propagate, const char*** out_envp) { int ret; - assert(envpp); toml_table_t* toml_loader = toml_table_in(g_pal_state.manifest_root, "loader"); if (!toml_loader) - return 0; + return propagate ? deep_copy_envs(orig_envp, out_envp) : create_empty_envs(out_envp); toml_table_t* toml_envs = toml_table_in(toml_loader, "env"); if (!toml_envs) - return 0; + return propagate ? deep_copy_envs(orig_envp, out_envp) : create_empty_envs(out_envp); ssize_t toml_envs_cnt = toml_table_nkval(toml_envs); - if (toml_envs_cnt <= 0) { - /* no env entries found in the manifest */ - return 0; - } + if (toml_envs_cnt < 0) + return -PAL_ERROR_INVAL; - size_t orig_envs_cnt = 0; - size_t overwrite_cnt = 0; - for (const char** orig_env = *envpp; *orig_env; orig_env++, orig_envs_cnt++) { - char* orig_env_key_end = strchr(*orig_env, '='); - if (!orig_env_key_end) - return -PAL_ERROR_INVAL; + ssize_t toml_env_tables_cnt = toml_table_ntab(toml_envs); + if (toml_env_tables_cnt < 0) + return -PAL_ERROR_INVAL; - *orig_env_key_end = '\0'; - toml_raw_t toml_env_raw = toml_raw_in(toml_envs, *orig_env); - if (toml_env_raw) { - /* found the original-env key in manifest (i.e., loader.env. exists) */ - overwrite_cnt++; - } - *orig_env_key_end = '='; - } + toml_envs_cnt += toml_env_tables_cnt; + if (toml_envs_cnt == 0) + return propagate ? deep_copy_envs(orig_envp, out_envp) : create_empty_envs(out_envp); + + size_t orig_envs_cnt = 0; + for (const char** orig_env = orig_envp; *orig_env; orig_env++) + orig_envs_cnt++; - size_t total_envs_cnt = orig_envs_cnt + toml_envs_cnt - overwrite_cnt; + /* For simplicity, overapproximate the number of envs by summing up the ones found in the + * original environment and the ones found in the manifest. */ + size_t total_envs_cnt = orig_envs_cnt + toml_envs_cnt; const char** new_envp = calloc(total_envs_cnt + 1, sizeof(const char*)); if (!new_envp) return -PAL_ERROR_NOMEM; - /* For simplicity, allocate each env anew; this is suboptimal but happens only once. First - * go through original envs and populate new_envp with only those that are not overwritten by - * manifest envs. Then append all manifest envs to new_envp. */ size_t idx = 0; - for (const char** orig_env = *envpp; *orig_env; orig_env++) { + + /* First, go through original variables and copy the ones that we're going to use (because of + * `propagate`, or because passthrough is specified for that variable in manifest). */ + for (const char** orig_env = orig_envp; *orig_env; orig_env++) { char* orig_env_key_end = strchr(*orig_env, '='); + if (!orig_env_key_end) + return -PAL_ERROR_INVAL; - *orig_env_key_end = '\0'; - toml_raw_t toml_env_raw = toml_raw_in(toml_envs, *orig_env); - if (!toml_env_raw) { - /* this original env is not found in manifest (i.e., not overwritten) */ - *orig_env_key_end = '='; - new_envp[idx] = malloc_copy(*orig_env, strlen(*orig_env) + 1); - if (!new_envp[idx]) { - /* don't care about proper memory cleanup since will terminate anyway */ + char* env_key = alloc_substr(*orig_env, orig_env_key_end - *orig_env); + if (!env_key) + return -PAL_ERROR_NOMEM; + + bool exists; + char* env_val; + bool passthrough; + ret = get_env_value_from_manifest(toml_envs, env_key, &exists, &env_val, &passthrough); + if (ret < 0) { + log_error("Invalid environment variable in manifest: '%s'", env_key); + return ret; + } + + if ((propagate && !exists) || (exists && passthrough)) { + new_envp[idx] = strdup(*orig_env); + if (!new_envp[idx]) return -PAL_ERROR_NOMEM; - } idx++; } - *orig_env_key_end = '='; + + free(env_key); + free(env_val); } - assert(idx < total_envs_cnt); + /* Then, go through the manifest variables and copy the ones with value provided. */ for (ssize_t i = 0; i < toml_envs_cnt; i++) { const char* toml_env_key = toml_key_in(toml_envs, i); assert(toml_env_key); - toml_raw_t toml_env_value_raw = toml_raw_in(toml_envs, toml_env_key); - assert(toml_env_value_raw); - char* toml_env_value = NULL; - ret = toml_rtos(toml_env_value_raw, &toml_env_value); + bool exists; + char* env_val; + bool passthrough; + ret = get_env_value_from_manifest(toml_envs, toml_env_key, &exists, &env_val, &passthrough); if (ret < 0) { - /* don't care about proper memory cleanup since will terminate anyway */ - return -PAL_ERROR_NOMEM; + log_error("Invalid environment variable in manifest: '%s'", toml_env_key); + return ret; + } + + assert(exists); + if (!env_val && !passthrough) { + log_error("Detected environment variable '%s' with `passthrough = false`. For security" + " reasons, Gramine doesn't allow this. Please see documentation on" + " 'loader.env' for details.", toml_env_key); + return -PAL_ERROR_DENIED; + } + + if (!env_val) { + /* this is envvar with passthrough = true, we accounted for it in previous loop */ + continue; } - char* final_env = alloc_concat3(toml_env_key, strlen(toml_env_key), "=", 1, toml_env_value, - strlen(toml_env_value)); + char* final_env = alloc_concat3(toml_env_key, -1, "=", 1, env_val, -1); + if (!final_env) + return -PAL_ERROR_NOMEM; + new_envp[idx++] = final_env; - free(toml_env_value); + free(env_val); } - assert(idx == total_envs_cnt); + assert(idx <= total_envs_cnt); - *envpp = new_envp; + *out_envp = new_envp; return 0; } @@ -207,8 +343,7 @@ static void configure_logging(void) { /* Loads a file containing a concatenation of C-strings. The resulting array of pointers is * NULL-terminated. All C-strings inside it reside in a single malloc-ed buffer starting at - * (*res)[0]. - */ + * `(*res)[0]`. The caller must free `(*res)[0]` and then `*res` when done with the array. */ static int load_cstring_array(const char* uri, const char*** res) { PAL_HANDLE hdl; PAL_STREAM_ATTR attr; @@ -363,6 +498,9 @@ noreturn void pal_main(uint64_t instance_id, /* current instance id */ } } + const char** orig_environments = NULL; + const char** final_environments = NULL; + bool use_host_env; ret = toml_bool_in(g_pal_state.manifest_root, "loader.insecure__use_host_env", /*defaultval=*/false, &use_host_env); @@ -378,11 +516,6 @@ noreturn void pal_main(uint64_t instance_id, /* current instance id */ "continue application execution, but this configuration must not be used in " "production!"); } - } else { - environments = malloc(sizeof(*environments)); - if (!environments) - INIT_FAIL(PAL_ERROR_NOMEM, "Out of memory"); - environments[0] = NULL; } char* env_src_file = NULL; @@ -397,18 +530,27 @@ noreturn void pal_main(uint64_t instance_id, /* current instance id */ if (env_src_file) { /* Insert environment variables from a file. We trust the file contents (this can be * achieved using protected or trusted files). */ - ret = load_cstring_array(env_src_file, &environments); + ret = load_cstring_array(env_src_file, &orig_environments); if (ret < 0) INIT_FAIL(-ret, "Cannot load environment variables from 'loader.env_src_file'"); - free(env_src_file); + } else { + /* Environment variables are taken from the host. */ + orig_environments = environments; } - // TODO: Envs from file should be able to override ones from the manifest, but current // code makes this hard to implement. - ret = insert_envs_from_manifest(&environments); + ret = build_envs(orig_environments, /*propagate=*/use_host_env || env_src_file, + &final_environments); if (ret < 0) - INIT_FAIL(-ret, "Inserting environment variables from the manifest failed"); + INIT_FAIL(-ret, "Building the final environment based on the original environment and the" + " manifest failed"); + + if (orig_environments != environments) { + free((char*)orig_environments[0]); + free(orig_environments); + } + free(env_src_file); load_libraries(); @@ -451,7 +593,7 @@ noreturn void pal_main(uint64_t instance_id, /* current instance id */ } /* Now we will start the execution */ - start_execution(arguments, environments); + start_execution(arguments, final_environments); out_fail: /* We wish we will never reached here */