From 628c56d7f0371202d047496ad23b5eacf1a96fe4 Mon Sep 17 00:00:00 2001 From: Gerd Zellweger Date: Sun, 6 Aug 2023 22:33:39 -0700 Subject: [PATCH 01/32] WiP on dynamic NR. Signed-off-by: Gerd Zellweger --- Cargo.lock | 1078 +++++++++++++++-- Cargo.toml | 1 + kernel/Cargo.toml | 3 +- kernel/run.py | 3 +- kernel/src/arch/unix/kcb.rs | 2 +- kernel/src/arch/unix/mod.rs | 2 +- kernel/src/arch/unix/process.rs | 2 +- kernel/src/arch/x86_64/coreboot.rs | 29 +- kernel/src/arch/x86_64/mod.rs | 41 +- .../x86_64/rackscale/get_shmem_structure.rs | 2 +- kernel/src/environment.rs | 2 +- kernel/src/integration_tests.rs | 2 +- kernel/src/nr.rs | 12 +- 13 files changed, 1048 insertions(+), 131 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1b11d08f0..050df3015 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -36,6 +36,12 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +[[package]] +name = "ahash" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8fd72866655d1904d6b0997d0b07ba561047d070fbe29de039031c641b61217" + [[package]] name = "ahash" version = "0.7.6" @@ -44,7 +50,7 @@ checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" dependencies = [ "getrandom 0.2.7", "once_cell", - "version_check", + "version_check 0.9.4", ] [[package]] @@ -65,6 +71,15 @@ dependencies = [ "as-slice", ] +[[package]] +name = "ansi_term" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2" +dependencies = [ + "winapi 0.3.9", +] + [[package]] name = "apic" version = "0.0.1" @@ -104,7 +119,7 @@ checksum = "0609c78bd572f4edc74310dfb63a01f5609d53fa8b4dd7c4d98aef3b3e8d72d1" dependencies = [ "proc-macro-hack", "quote", - "syn", + "syn 1.0.98", ] [[package]] @@ -125,6 +140,17 @@ dependencies = [ "stable_deref_trait", ] +[[package]] +name = "async-trait" +version = "0.1.72" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc6dde6e4ed435a4c1ee4e73592f5ba9da2151af10076cc04858746af9352d09" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.28", +] + [[package]] name = "atomic-polyfill" version = "0.1.8" @@ -212,18 +238,58 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8fe8f5a8a398345e52358e18ff07cc17a568fbca5c6f73873d3a62056309603" -[[package]] -name = "base64ct" -version = "1.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b" - [[package]] name = "beef" version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1" +[[package]] +name = "bench_utils" +version = "0.1.0" +dependencies = [ + "arr_macro", + "async-trait", + "core_affinity", + "crossbeam-utils 0.8.10", + "csv", + "hwloc2", + "lazy_static", + "log", + "nix", + "nr2", + "num_cpus", + "parking_lot 0.12.1", + "rand 0.8.5", + "serde", + "static_assertions", + "tokio", + "urcu-sys", +] + +[[package]] +name = "bindgen" +version = "0.51.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebd71393f1ec0509b553aa012b9b58e81dadbdff7130bd3b8cba576e69b32f75" +dependencies = [ + "bitflags", + "cexpr", + "cfg-if 0.1.10", + "clang-sys", + "clap", + "env_logger 0.6.2", + "lazy_static", + "log", + "peeking_take_while", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", + "which 3.1.1", +] + [[package]] name = "bit-set" version = "0.5.2" @@ -257,15 +323,6 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" -[[package]] -name = "block-buffer" -version = "0.10.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" -dependencies = [ - "generic-array 0.14.6", -] - [[package]] name = "bootloader" version = "0.0.1" @@ -306,12 +363,27 @@ version = "1.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" +[[package]] +name = "bytes" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89b2fd2a0dcf38d7971e2194b6b6eebab45ae01067456a7fd93d5547a61b70be" + [[package]] name = "cc" version = "1.0.73" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11" +[[package]] +name = "cexpr" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fce5b5fb86b0c57c20c834c1b412fd09c77c8a59b9473f86272709e78874cd1d" +dependencies = [ + "nom", +] + [[package]] name = "cfg-if" version = "0.1.10" @@ -331,7 +403,43 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ff41a3c2c1e39921b9003de14bf0439c7b63a9039637c291e1a64925d8ddfa45" dependencies = [ "owning_ref", - "parking_lot", + "parking_lot 0.4.8", +] + +[[package]] +name = "clang-sys" +version = "0.28.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81de550971c976f176130da4b2978d3b524eaa0fd9ac31f3ceb5ae1231fb4853" +dependencies = [ + "glob", + "libc 0.2.126", + "libloading", +] + +[[package]] +name = "clap" +version = "2.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" +dependencies = [ + "ansi_term", + "atty", + "bitflags", + "strsim", + "textwrap", + "unicode-width", + "vec_map", + "yaml-rust", +] + +[[package]] +name = "cloudabi" +version = "0.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f" +dependencies = [ + "bitflags", ] [[package]] @@ -341,7 +449,7 @@ dependencies = [ "arr_macro", "chashmap", "crossbeam-queue", - "crossbeam-utils", + "crossbeam-utils 0.8.10", "env_logger 0.8.4", "log", "static_assertions", @@ -356,6 +464,18 @@ dependencies = [ "memchr", ] +[[package]] +name = "core_affinity" +version = "0.5.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f8a03115cc34fb0d7c321dd154a3914b3ca082ccc5c11d91bf7117dbbe7171f" +dependencies = [ + "kernel32-sys", + "libc 0.2.126", + "num_cpus", + "winapi 0.2.8", +] + [[package]] name = "cortex-m" version = "0.6.7" @@ -393,6 +513,33 @@ dependencies = [ "riscv", ] +[[package]] +name = "crossbeam-epoch" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "058ed274caafc1f60c4997b5fc07bf7dc7cca454af7c6e81edffe5f33f70dace" +dependencies = [ + "autocfg", + "cfg-if 0.1.10", + "crossbeam-utils 0.7.2", + "lazy_static", + "maybe-uninit", + "memoffset 0.5.6", + "scopeguard", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.15" +source = "git+https://github.com/crossbeam-rs/crossbeam#cd412be043b247505f2d73f9e9e41fc89557d500" +dependencies = [ + "autocfg", + "cfg-if 1.0.0", + "crossbeam-utils 0.8.16", + "memoffset 0.9.0", + "scopeguard", +] + [[package]] name = "crossbeam-queue" version = "0.3.5" @@ -400,7 +547,29 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1f25d8400f4a7a5778f0e4e52384a48cbd9b5c495d110786187fc750075277a2" dependencies = [ "cfg-if 1.0.0", - "crossbeam-utils", + "crossbeam-utils 0.8.10", +] + +[[package]] +name = "crossbeam-skiplist" +version = "0.1.1" +source = "git+https://github.com/crossbeam-rs/crossbeam#cd412be043b247505f2d73f9e9e41fc89557d500" +dependencies = [ + "cfg-if 1.0.0", + "crossbeam-epoch 0.9.15", + "crossbeam-utils 0.8.16", + "scopeguard", +] + +[[package]] +name = "crossbeam-utils" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3c7c73a2d1e9fc0886a08b93e98eb643461230d5f1925e4036204d5f2e261a8" +dependencies = [ + "autocfg", + "cfg-if 0.1.10", + "lazy_static", ] [[package]] @@ -414,13 +583,11 @@ dependencies = [ ] [[package]] -name = "crypto-common" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +name = "crossbeam-utils" +version = "0.8.16" +source = "git+https://github.com/crossbeam-rs/crossbeam#cd412be043b247505f2d73f9e9e41fc89557d500" dependencies = [ - "generic-array 0.14.6", - "typenum", + "cfg-if 1.0.0", ] [[package]] @@ -462,7 +629,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f877be4f7c9f246b183111634f75baa039715e3f46ce860677d3b19a69fb229c" dependencies = [ "quote", - "syn", + "syn 1.0.98", ] [[package]] @@ -478,13 +645,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4f8a51dd197fa6ba5b4dc98a990a43cc13693c23eb0089ebb0fcc1f04152bca6" [[package]] -name = "digest" -version = "0.10.7" +name = "custom_error_core" +version = "1.8.0" +source = "git+https://github.com/gz/custom_error.git#be6af8aa1ad227b2686c3fdfd94a0a249767bf6f" + +[[package]] +name = "dashmap" +version = "4.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +checksum = "e77a43b28d0668df09411cb0bc9a8c2adc40f9a048afe863e05fd43251e8e39c" dependencies = [ - "block-buffer", - "crypto-common", + "cfg-if 1.0.0", + "num_cpus", ] [[package]] @@ -495,7 +667,7 @@ checksum = "3bf95dc3f046b9da4f2d51833c0d3547d8564ef6910f5c1ed130306a75b92886" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.98", ] [[package]] @@ -558,6 +730,19 @@ dependencies = [ "termcolor", ] +[[package]] +name = "env_logger" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aafcde04e90a5226a6443b7aabdb016ba2f8307c847d524724bd9b346dd1a2d3" +dependencies = [ + "atty", + "humantime 1.3.0", + "log", + "regex", + "termcolor", +] + [[package]] name = "env_logger" version = "0.8.4" @@ -605,6 +790,12 @@ dependencies = [ "libc 0.2.126", ] +[[package]] +name = "error-chain" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9435d864e017c3c6afeac1654189b06cdb491cf2ff73dbf0d73b0f292f42ff8" + [[package]] name = "error-chain" version = "0.12.4" @@ -612,7 +803,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2d2f06b9cac1506ece98fe3231e3cc9c4410ec3d5b1f24ae1c8946f0742cdefc" dependencies = [ "backtrace", - "version_check", + "version_check 0.9.4", +] + +[[package]] +name = "evmap" +version = "8.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15ddfe939e67347afdefc312443bf3c2c61ac79153243669ae3a594e56ab2225" +dependencies = [ + "smallvec 1.11.0", ] [[package]] @@ -632,6 +832,18 @@ dependencies = [ "instant", ] +[[package]] +name = "flurry" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c0a35f7b50e99185a2825541946252f669f3c3ca77801357cd682a1b356bb3e" +dependencies = [ + "ahash 0.3.8", + "crossbeam-epoch 0.8.2", + "num_cpus", + "parking_lot 0.10.2", +] + [[package]] name = "fnv" version = "1.0.7" @@ -653,6 +865,95 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba" +[[package]] +name = "futures" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23342abe12aba583913b2e62f22225ff9c950774065e4bfb61a19cd9770fec40" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "955518d47e09b25bbebc7a18df10b81f0c766eaf4c4f1cccef2fca5f2a4fb5f2" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4bca583b7e26f571124fe5b7561d49cb2868d79116cfa0eefce955557c6fee8c" + +[[package]] +name = "futures-executor" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccecee823288125bd88b4d7f565c9e58e41858e47ab72e8ea2d64e93624386e0" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fff74096e71ed47f8e023204cfd0aa1289cd54ae5430a9523be060cdb849964" + +[[package]] +name = "futures-macro" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.28", +] + +[[package]] +name = "futures-sink" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f43be4fe21a13b9781a69afa4985b0f6ee0e1afab2c6f454a8cf30e2b2237b6e" + +[[package]] +name = "futures-task" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76d3d132be6c0e6aa1534069c705a74a5997a356c0dc2f86a47765e5617c5b65" + +[[package]] +name = "futures-util" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26b01e40b772d54cf6c6d721c1d1abd0647a0106a12ecaa1c186273392a69533" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "pin-utils", + "slab", +] + [[package]] name = "gdbstub" version = "0.5.0" @@ -675,6 +976,19 @@ dependencies = [ "num-traits", ] +[[package]] +name = "generator" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cc16584ff22b460a382b7feec54b23d2908d858152e5739a120b949293bd74e" +dependencies = [ + "cc", + "libc 0.2.126", + "log", + "rustversion", + "windows", +] + [[package]] name = "generic-array" version = "0.12.4" @@ -700,7 +1014,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bff49e947297f3312447abdca79f45f4738097cc82b06e72054d2223f601f1b9" dependencies = [ "typenum", - "version_check", + "version_check 0.9.4", ] [[package]] @@ -743,6 +1057,12 @@ dependencies = [ "stable_deref_trait", ] +[[package]] +name = "glob" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" + [[package]] name = "half" version = "1.8.2" @@ -764,7 +1084,7 @@ version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" dependencies = [ - "ahash", + "ahash 0.7.6", ] [[package]] @@ -773,7 +1093,7 @@ version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" dependencies = [ - "ahash", + "ahash 0.7.6", ] [[package]] @@ -839,14 +1159,12 @@ name = "init" version = "0.1.0" dependencies = [ "arrayvec", - "base64ct", "cstr_core", "hashbrown 0.11.2", "kpi", "lazy_static", "lineup", "log", - "md-5", "num-traits", "proptest 1.0.0 (git+https://github.com/gz/proptest.git?branch=x86-asm)", "rawtime", @@ -911,7 +1229,7 @@ dependencies = [ "core2", "env_logger 0.9.0", "log", - "memoffset", + "memoffset 0.6.5", "serde", "serde_cbor", "static_assertions", @@ -945,6 +1263,16 @@ version = "0.2.126" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836" +[[package]] +name = "libloading" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2b111a074963af1d37a139918ac6d49ad1d0d5e47f72fd55388619691a7d753" +dependencies = [ + "cc", + "winapi 0.3.9", +] + [[package]] name = "libm" version = "0.2.2" @@ -958,7 +1286,7 @@ dependencies = [ "arr_macro", "backtracer_core", "crossbeam-queue", - "crossbeam-utils", + "crossbeam-utils 0.8.10", "either", "env_logger 0.9.0", "fringe", @@ -976,6 +1304,15 @@ version = "0.0.46" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d4d2456c373231a208ad294c33dc5bff30051eafd954cd4caae83a712b12854d" +[[package]] +name = "lock_api" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4da24a77a3d8a6d4862d95f72e6fdb9c09a643ecdb402d754004a557f2bec75" +dependencies = [ + "scopeguard", +] + [[package]] name = "lock_api" version = "0.4.7" @@ -1015,7 +1352,20 @@ dependencies = [ "proc-macro2", "quote", "regex-syntax", - "syn", + "syn 1.0.98", +] + +[[package]] +name = "loom" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff50ecb28bb86013e935fb6683ab1f6d3a20016f123c76fd4c27470076ac30f5" +dependencies = [ + "cfg-if 1.0.0", + "generator", + "scoped-tls", + "tracing", + "tracing-subscriber", ] [[package]] @@ -1030,6 +1380,15 @@ version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ca88d725a0a943b096803bd34e73a4437208b6077654cc4ecb2947a5f91618d" +[[package]] +name = "matchers" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" +dependencies = [ + "regex-automata", +] + [[package]] name = "matches" version = "0.1.9" @@ -1042,16 +1401,6 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "60302e4db3a61da70c0cb7991976248362f30319e88850c487b9b95bbf059e00" -[[package]] -name = "md-5" -version = "0.10.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" -dependencies = [ - "cfg-if 1.0.0", - "digest", -] - [[package]] name = "memchr" version = "2.5.0" @@ -1069,22 +1418,63 @@ dependencies = [ [[package]] name = "memoffset" -version = "0.6.5" +version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce" +checksum = "043175f069eda7b85febe4a74abbaeff828d9f8b448515d3151a14a3542811aa" dependencies = [ "autocfg", ] [[package]] -name = "miniz_oxide" -version = "0.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" +name = "memoffset" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce" +dependencies = [ + "autocfg", +] + +[[package]] +name = "memoffset" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c" +dependencies = [ + "autocfg", +] + +[[package]] +name = "metadeps" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73b122901b3a675fac8cecf68dcb2f0d3036193bc861d1ac0e1c337f7d5254c2" +dependencies = [ + "error-chain 0.10.0", + "pkg-config", + "toml", +] + +[[package]] +name = "miniz_oxide" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6f5c75688da582b8ffc1f1799e9db273f32133c49e048f614d22ec3256773ccc" dependencies = [ "adler", ] +[[package]] +name = "mio" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "927a765cd3fc26206e66b296465fa9d3e5ab003e651c1b3c060e7956d96b19d2" +dependencies = [ + "libc 0.2.126", + "log", + "wasi 0.11.0+wasi-snapshot-preview1", + "windows-sys 0.48.0", +] + [[package]] name = "mmap" version = "0.1.1" @@ -1119,7 +1509,7 @@ dependencies = [ "bitflags", "cfg-if 1.0.0", "libc 0.2.126", - "memoffset", + "memoffset 0.6.5", ] [[package]] @@ -1128,12 +1518,69 @@ version = "0.1.0" dependencies = [ "arr_macro", "chashmap", - "crossbeam-utils", + "crossbeam-utils 0.8.10", "log", "rand 0.7.3", "static_assertions", ] +[[package]] +name = "nom" +version = "4.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ad2a91a8e869eeb30b9cb3119ae87773a8f4ae617f41b1eb9c154b2905f7bd6" +dependencies = [ + "memchr", + "version_check 0.1.5", +] + +[[package]] +name = "nr2" +version = "0.2.0" +dependencies = [ + "arr_macro", + "arrayvec", + "bench_utils", + "chashmap", + "clap", + "crossbeam-queue", + "crossbeam-skiplist", + "crossbeam-utils 0.8.10", + "dashmap", + "env_logger 0.9.0", + "evmap", + "flurry", + "futures", + "hashbrown 0.11.2", + "log", + "loom", + "nrfs", + "parking_lot 0.12.1", + "rand 0.8.5", + "static_assertions", + "tokio", + "urcu-sys", + "x86 0.51.0", + "zipf", +] + +[[package]] +name = "nrfs" +version = "0.0.1" +source = "git+https://github.com/ankit-iitb/nrfs.git?branch=master#2b688ce16e762c79f1ba6809954de79ad5ba10a1" +dependencies = [ + "arr_macro", + "bitflags", + "crossbeam-utils 0.8.10", + "custom_error_core", + "hashbrown 0.12.3", + "hwloc2", + "log", + "spin 0.9.8", + "static_assertions", + "x86 0.49.0", +] + [[package]] name = "nrk" version = "0.0.4" @@ -1150,7 +1597,7 @@ dependencies = [ "cnr", "core2", "crossbeam-queue", - "crossbeam-utils", + "crossbeam-utils 0.8.10", "cstr_core", "csv", "ctor", @@ -1172,8 +1619,9 @@ dependencies = [ "log", "logos", "memfile", - "memoffset", + "memoffset 0.6.5", "node-replication", + "nr2", "once_cell", "proptest 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", "rand 0.8.5", @@ -1189,7 +1637,7 @@ dependencies = [ "testutils", "uefi", "vmxnet3", - "which", + "which 4.2.5", "x86 0.52.0", ] @@ -1276,7 +1724,27 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "149d8f5b97f3c1133e3cfcd8886449959e856b557ff281e292b733d7c69e005e" dependencies = [ "owning_ref", - "parking_lot_core", + "parking_lot_core 0.2.14", +] + +[[package]] +name = "parking_lot" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3a704eb390aafdc107b0e392f56a82b668e3a71366993b5340f5833fd62505e" +dependencies = [ + "lock_api 0.3.4", + "parking_lot_core 0.7.3", +] + +[[package]] +name = "parking_lot" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" +dependencies = [ + "lock_api 0.4.7", + "parking_lot_core 0.9.8", ] [[package]] @@ -1287,10 +1755,37 @@ checksum = "4db1a8ccf734a7bce794cc19b3df06ed87ab2f3907036b693c68f56b4d4537fa" dependencies = [ "libc 0.2.126", "rand 0.4.6", - "smallvec", + "smallvec 0.6.14", "winapi 0.3.9", ] +[[package]] +name = "parking_lot_core" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b93f386bb233083c799e6e642a9d73db98c24a5deeb95ffc85bf281255dffc98" +dependencies = [ + "cfg-if 0.1.10", + "cloudabi", + "libc 0.2.126", + "redox_syscall 0.1.57", + "smallvec 1.11.0", + "winapi 0.3.9", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93f00c865fe7cabf650081affecd3871070f26767e7b2070a3ffae14c654b447" +dependencies = [ + "cfg-if 1.0.0", + "libc 0.2.126", + "redox_syscall 0.3.5", + "smallvec 1.11.0", + "windows-targets 0.48.1", +] + [[package]] name = "paste" version = "1.0.7" @@ -1306,6 +1801,12 @@ dependencies = [ "tracing", ] +[[package]] +name = "peeking_take_while" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" + [[package]] name = "phf" version = "0.10.1" @@ -1350,6 +1851,12 @@ version = "0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116" +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + [[package]] name = "pkg-config" version = "0.3.25" @@ -1382,9 +1889,9 @@ checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5" [[package]] name = "proc-macro2" -version = "1.0.40" +version = "1.0.66" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd96a1e8ed2596c337f8eae5f24924ec83f5ad5ab21ea8e455d3566c69fbcaf7" +checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9" dependencies = [ "unicode-ident", ] @@ -1450,9 +1957,9 @@ checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3" [[package]] name = "quote" -version = "1.0.20" +version = "1.0.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3bcdf212e9776fbcb2d23ab029360416bb1706b1aea2d1a5ba002727cbcab804" +checksum = "50f3b39ccfb720540debaa0164757101c08ecb8d326b15358ce76a62c7e85965" dependencies = [ "proc-macro2", ] @@ -1605,6 +2112,12 @@ dependencies = [ "rand_core 0.3.1", ] +[[package]] +name = "redox_syscall" +version = "0.1.57" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce" + [[package]] name = "redox_syscall" version = "0.2.13" @@ -1614,6 +2127,15 @@ dependencies = [ "bitflags", ] +[[package]] +name = "redox_syscall" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" +dependencies = [ + "bitflags", +] + [[package]] name = "regex" version = "1.5.6" @@ -1630,6 +2152,9 @@ name = "regex-automata" version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" +dependencies = [ + "regex-syntax", +] [[package]] name = "regex-syntax" @@ -1651,7 +2176,7 @@ name = "rexpect" version = "0.4.0" source = "git+https://github.com/gz/rexpect.git?branch=eof-nix#de1352b82c52a9466a5c3b417abe02356c48744c" dependencies = [ - "error-chain", + "error-chain 0.12.4", "nix", "regex", "tempfile", @@ -1717,6 +2242,12 @@ version = "0.1.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ef03e0a2b150c7a90d01faf6254c9c48a41e95fb2a8c2ac1c6f0d2b9aefc342" +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + [[package]] name = "rustc_version" version = "0.2.3" @@ -1746,9 +2277,15 @@ dependencies = [ "io-lifetimes", "libc 0.2.126", "linux-raw-sys", - "windows-sys", + "windows-sys 0.36.1", ] +[[package]] +name = "rustversion" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4" + [[package]] name = "rusty-fork" version = "0.3.0" @@ -1767,6 +2304,12 @@ version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f3f6f92acf49d1b98f7a81226834412ada05458b7364277387724a237f062695" +[[package]] +name = "scoped-tls" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294" + [[package]] name = "scopeguard" version = "1.1.0" @@ -1821,7 +2364,31 @@ checksum = "1f26faba0c3959972377d3b2d306ee9f71faee9714294e41bb777f83f88578be" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.98", +] + +[[package]] +name = "sharded-slab" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "900fba806f70c630b0a382d0d825e17a0f19fcd059a2ade1ff237bcddf446b31" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "shlex" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fdf1b9db47230893d76faad238fd6097fd6d6a9245cd7a4d90dbd639536bbd2" + +[[package]] +name = "signal-hook-registry" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8229b473baa5980ac72ef434c4415e70c4b5e71b423043adb4ba059f89c99a1" +dependencies = [ + "libc 0.2.126", ] [[package]] @@ -1830,6 +2397,15 @@ version = "0.3.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de" +[[package]] +name = "slab" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6528351c9bc8ab22353f9d776db39a20288e8d6c37ef8cfe3317cf875eecfc2d" +dependencies = [ + "autocfg", +] + [[package]] name = "slabmalloc" version = "0.10.0" @@ -1848,6 +2424,12 @@ dependencies = [ "maybe-uninit", ] +[[package]] +name = "smallvec" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62bb4feee49fdd9f707ef802e22365a35de4b7b299de4763d44bfea899442ff9" + [[package]] name = "smoltcp" version = "0.7.5" @@ -1872,6 +2454,16 @@ dependencies = [ "managed 0.8.0", ] +[[package]] +name = "socket2" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02e2d2db9033d13a1567121ddd7a095ee144db4e1ca1b1bda3419bc0da294ebd" +dependencies = [ + "libc 0.2.126", + "winapi 0.3.9", +] + [[package]] name = "spin" version = "0.5.2" @@ -1884,7 +2476,7 @@ version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" dependencies = [ - "lock_api", + "lock_api 0.4.7", ] [[package]] @@ -1899,6 +2491,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" +[[package]] +name = "strsim" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" + [[package]] name = "syn" version = "1.0.98" @@ -1910,6 +2508,17 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "syn" +version = "2.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04361975b3f5e348b2189d8dc55bc942f278b2d482a6a0365de5bdd62d351567" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + [[package]] name = "tempdir" version = "0.3.7" @@ -1929,7 +2538,7 @@ dependencies = [ "cfg-if 1.0.0", "fastrand", "libc 0.2.126", - "redox_syscall", + "redox_syscall 0.2.13", "remove_dir_all", "winapi 0.3.9", ] @@ -1963,6 +2572,62 @@ dependencies = [ "serde", ] +[[package]] +name = "textwrap" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" +dependencies = [ + "unicode-width", +] + +[[package]] +name = "thread_local" +version = "1.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdd6f064ccff2d6567adcb3873ca630700f00b5ad3f060c25b5dcfd9a4ce152" +dependencies = [ + "cfg-if 1.0.0", + "once_cell", +] + +[[package]] +name = "tokio" +version = "1.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03201d01c3c27a29c8a5cee5b55a93ddae1ccf6f08f65365c2c918f8c1b76f64" +dependencies = [ + "autocfg", + "bytes", + "libc 0.2.126", + "memchr", + "mio", + "num_cpus", + "parking_lot 0.12.1", + "pin-project-lite", + "signal-hook-registry", + "socket2", + "tokio-macros", + "windows-sys 0.45.0", +] + +[[package]] +name = "tokio-macros" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d266c00fde287f55d3f1c3e96c500c362a2b8c695076ec180f27918820bc6df8" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.98", +] + +[[package]] +name = "toml" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "736b60249cb25337bc196faa43ee12c705e426f3d55c214d73a4e7be06f92cb4" + [[package]] name = "tracing" version = "0.1.35" @@ -1983,7 +2648,7 @@ checksum = "cc6b8ad3567499f98a1db7a752b07a7c8c7c7c34c332ec00effb2b0027974b7c" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.98", ] [[package]] @@ -1993,6 +2658,36 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7b7358be39f2f274f322d2aaed611acc57f382e8eb1e5b48cb9ae30933495ce7" dependencies = [ "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ddad33d2d10b1ed7eb9d1f518a5674713876e97e5bb9b7345a7984fbb4f922" +dependencies = [ + "lazy_static", + "log", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60db860322da191b40952ad9affe65ea23e7dd6a5c442c2c42865810c6ab8e6b" +dependencies = [ + "ansi_term", + "matchers", + "once_cell", + "regex", + "sharded-slab", + "smallvec 1.11.0", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", ] [[package]] @@ -2030,7 +2725,7 @@ checksum = "a9271b66bf83671563773e54b178f1022ac2dab87dc197f80be51885a5e1a2f4" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.98", ] [[package]] @@ -2050,12 +2745,46 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5bd2fe26506023ed7b5e1e315add59d6f584c621d037f9368fea9cfb988f368c" +[[package]] +name = "unicode-width" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" + +[[package]] +name = "urcu-sys" +version = "0.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fff1711e4a29f9bad323402d0cde3f5977f9fd131bb937625665a480547871b5" +dependencies = [ + "bindgen", + "metadeps", +] + +[[package]] +name = "valuable" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" + [[package]] name = "vcell" version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77439c1b53d2303b20d9459b1ade71a83c716e3f9c34f3228c00e6f185d6c002" +[[package]] +name = "vec_map" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" + +[[package]] +name = "version_check" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "914b1a6776c4c929a602fafd8bc742e06365d4bcbe48c30f9cca5824f70dc9dd" + [[package]] name = "version_check" version = "0.9.4" @@ -2068,7 +2797,7 @@ version = "0.1.0" dependencies = [ "arrayvec", "bitflags", - "crossbeam-utils", + "crossbeam-utils 0.8.10", "cstr_core", "hashbrown 0.11.2", "kpi", @@ -2134,6 +2863,15 @@ version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +[[package]] +name = "which" +version = "3.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d011071ae14a2f6671d0b74080ae0cd8ebf3a6f8c9589a2cd45f23126fe29724" +dependencies = [ + "libc 0.2.126", +] + [[package]] name = "which" version = "4.2.5" @@ -2188,49 +2926,190 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f" +dependencies = [ + "windows-targets 0.48.1", +] + [[package]] name = "windows-sys" version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2" dependencies = [ - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_msvc", + "windows_aarch64_msvc 0.36.1", + "windows_i686_gnu 0.36.1", + "windows_i686_msvc 0.36.1", + "windows_x86_64_gnu 0.36.1", + "windows_x86_64_msvc 0.36.1", +] + +[[package]] +name = "windows-sys" +version = "0.45.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" +dependencies = [ + "windows-targets 0.42.2", +] + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.1", +] + +[[package]] +name = "windows-targets" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" +dependencies = [ + "windows_aarch64_gnullvm 0.42.2", + "windows_aarch64_msvc 0.42.2", + "windows_i686_gnu 0.42.2", + "windows_i686_msvc 0.42.2", + "windows_x86_64_gnu 0.42.2", + "windows_x86_64_gnullvm 0.42.2", + "windows_x86_64_msvc 0.42.2", +] + +[[package]] +name = "windows-targets" +version = "0.48.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05d4b17490f70499f20b9e791dcf6a299785ce8af4d709018206dc5b4953e95f" +dependencies = [ + "windows_aarch64_gnullvm 0.48.0", + "windows_aarch64_msvc 0.48.0", + "windows_i686_gnu 0.48.0", + "windows_i686_msvc 0.48.0", + "windows_x86_64_gnu 0.48.0", + "windows_x86_64_gnullvm 0.48.0", + "windows_x86_64_msvc 0.48.0", ] +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc" + [[package]] name = "windows_aarch64_msvc" version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47" +[[package]] +name = "windows_aarch64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3" + [[package]] name = "windows_i686_gnu" version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6" +[[package]] +name = "windows_i686_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241" + [[package]] name = "windows_i686_msvc" version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024" +[[package]] +name = "windows_i686_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00" + [[package]] name = "windows_x86_64_gnu" version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1" +[[package]] +name = "windows_x86_64_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953" + [[package]] name = "windows_x86_64_msvc" version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680" +[[package]] +name = "windows_x86_64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" + [[package]] name = "x86" version = "0.47.0" @@ -2242,6 +3121,28 @@ dependencies = [ "raw-cpuid", ] +[[package]] +name = "x86" +version = "0.49.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae0df8874628b15fdefccbd5c6b06766ec9ec2d6cd30d6d2f82245343358c50a" +dependencies = [ + "bit_field", + "bitflags", + "raw-cpuid", +] + +[[package]] +name = "x86" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa7477e4af519a58818c1f0ab4b149d0ce3304e625be8fe56c8484cac50b0532" +dependencies = [ + "bit_field", + "bitflags", + "raw-cpuid", +] + [[package]] name = "x86" version = "0.52.0" @@ -2262,8 +3163,23 @@ dependencies = [ "zero", ] +[[package]] +name = "yaml-rust" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e66366e18dc58b46801afbf2ca7661a9f59cc8c5962c29892b6039b4f86fa992" + [[package]] name = "zero" version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5f1bc8a6b2005884962297587045002d8cfb8dcec9db332f4ca216ddc5de82c5" + +[[package]] +name = "zipf" +version = "7.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "835688a7a1b5d2dfaeb5b7e1b4cfb979e7095a70cd1c72fe083f4904ef3e995e" +dependencies = [ + "rand 0.8.5", +] diff --git a/Cargo.toml b/Cargo.toml index b2974a5f9..2c6c0ece2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,6 +10,7 @@ members = [ "lib/lineup", "lib/node-replication/cnr", "lib/node-replication/nr", + "lib/node-replication2/node-replication", "lib/rpc", "lib/vibrio", "lib/vmxnet3", diff --git a/kernel/Cargo.toml b/kernel/Cargo.toml index d91aeae3c..7656474c0 100644 --- a/kernel/Cargo.toml +++ b/kernel/Cargo.toml @@ -15,7 +15,8 @@ resolver = "2" [dependencies] # Our own dependencies: apic = { path = "../lib/apic/" } -node-replication = { path = "../lib/node-replication/nr" } #"0.1.1" +node-replication = { path = "../lib/node-replication/nr" } +nr2 = { path = "../lib/node-replication2/node-replication" } cnr = { path = "../lib/node-replication/cnr" } kpi = { path = "../lib/kpi" } rpc = { path = "../lib/rpc", optional = true } diff --git a/kernel/run.py b/kernel/run.py index a1fa5d4b5..cde5a351c 100644 --- a/kernel/run.py +++ b/kernel/run.py @@ -769,8 +769,7 @@ def configure_dcm_scheduler(args): # Download jar if necessary if not os.path.exists(dcm_path): subprocess.run("wget https://github.com/hunhoffe/nrk-dcm-scheduler/releases/download/release-{}/{} -P {}".format( - DCM_SCHEDULER_VERSION, dcm_jar, jar_dir), shell=True, check=True, timeout=10) - + DCM_SCHEDULER_VERSION, dcm_jar, jar_dir), shell=True, check=True, timeout=120) # Create consistent symlink location for the DCM scheduler jar if os.path.exists(symlink_jar_path) or os.path.islink(symlink_jar_path): os.unlink(symlink_jar_path) diff --git a/kernel/src/arch/unix/kcb.rs b/kernel/src/arch/unix/kcb.rs index d9ef577e3..bc2f0cd30 100644 --- a/kernel/src/arch/unix/kcb.rs +++ b/kernel/src/arch/unix/kcb.rs @@ -7,7 +7,7 @@ use alloc::sync::Arc; use core::any::Any; use arrayvec::ArrayVec; -use node_replication::{Replica, ReplicaToken}; +use nr2::nr::{Replica, ReplicaToken}; use crate::error::KError; use crate::memory::mcache::FrameCacheEarly; diff --git a/kernel/src/arch/unix/mod.rs b/kernel/src/arch/unix/mod.rs index db26933ba..50b34401b 100644 --- a/kernel/src/arch/unix/mod.rs +++ b/kernel/src/arch/unix/mod.rs @@ -10,7 +10,7 @@ use cnr::Replica as MlnrReplica; use ctor::ctor; use fallible_collections::TryClone; use log::{debug, info}; -use node_replication::{Log, Replica}; +use nr2::nr::{Log, Replica}; use x86::current::paging::HUGE_PAGE_SIZE; use crate::fs::cnrfs::MlnrKernelNode; diff --git a/kernel/src/arch/unix/process.rs b/kernel/src/arch/unix/process.rs index 089c80c4f..7ab2829f7 100644 --- a/kernel/src/arch/unix/process.rs +++ b/kernel/src/arch/unix/process.rs @@ -15,7 +15,7 @@ use arrayvec::ArrayVec; use kpi::process::FrameId; use lazy_static::lazy_static; -use node_replication::{Dispatch, Log, Replica}; +use nr2::nr::{Dispatch, Log, Replica}; use crate::arch::kcb::get_kcb; use crate::error::{KError, KResult}; diff --git a/kernel/src/arch/x86_64/coreboot.rs b/kernel/src/arch/x86_64/coreboot.rs index f7e4b2a5e..e41245618 100644 --- a/kernel/src/arch/x86_64/coreboot.rs +++ b/kernel/src/arch/x86_64/coreboot.rs @@ -8,6 +8,7 @@ use alloc::sync::Arc; use alloc::vec::Vec; +use core::num::NonZeroUsize; use core::sync::atomic::{AtomicBool, Ordering}; use apic::ApicDriver; @@ -17,7 +18,7 @@ use fallible_collections::FallibleVecGlobal; use fallible_collections::TryClone; use log::debug; use log::trace; -use node_replication::{Log, Replica}; +use nr2::nr::{NodeReplicated}; use x86::apic::ApicId; use x86::current::paging::PAddr; @@ -29,7 +30,6 @@ use crate::memory::global::GlobalMemory; use crate::memory::vspace::MapAction; use crate::memory::Frame; use crate::nr::KernelNode; -use crate::nr::Op; use crate::round_up; use crate::stack::OwnedStack; use crate::stack::Stack; @@ -57,8 +57,7 @@ pub(crate) struct AppCoreArgs { pub(super) global_pmem: &'static GlobalMemory, pub(super) thread: atopology::ThreadId, pub(super) node: atopology::NodeId, - pub(super) _log: Arc>, - pub(super) replica: Arc>, + pub(super) replica: Arc>, pub(super) fs_replica: Option>>, } @@ -333,8 +332,7 @@ pub(crate) unsafe fn initialize( /// - Initialized topology /// - Local APIC driver pub(super) fn boot_app_cores( - log: Arc>, - bsp_replica: Arc>, + kernel_node: Arc>, fs_logs: Vec>>, fs_replica: Option>>, ) { @@ -347,28 +345,23 @@ pub(super) fn boot_app_cores( // Let's go with one replica per NUMA node for now: let numa_nodes = core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes()); - - let mut replicas: Vec>> = - Vec::try_with_capacity(numa_nodes).expect("Not enough memory to initialize system"); + let numa_nodes = NonZeroUsize::new(numa_nodes).expect("At least one NUMA node"); + let mut fs_replicas: Vec>> = - Vec::try_with_capacity(numa_nodes).expect("Not enough memory to initialize system"); + Vec::try_with_capacity(numa_nodes.get()).expect("Not enough memory to initialize system"); // Push the replica for node 0 - debug_assert!(replicas.capacity() >= 1, "No re-allocation."); - replicas.push(bsp_replica); if let Some(node_0_fs_replica) = fs_replica { debug_assert!(fs_replicas.capacity() >= 1, "No re-allocation."); fs_replicas.push(node_0_fs_replica); } let pcm = kcb::per_core_mem(); + let numa_nodes = core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes()); + for node in 1..numa_nodes { pcm.set_mem_affinity(node as atopology::NodeId) .expect("Can't set affinity"); - - debug_assert!(replicas.capacity() > node, "No re-allocation."); - replicas.push(Replica::<'static, KernelNode>::new(&log)); - if fs_replicas.len() > 0 { debug_assert!(fs_replicas.capacity() > node, "No re-allocation."); fs_replicas.push(MlnrReplica::new( @@ -434,8 +427,7 @@ pub(super) fn boot_app_cores( global_memory, global_pmem, thread: thread.id, - _log: log.clone(), - replica: replicas[node as usize] + replica: kernel_node .try_clone() .expect("Not enough memory to initialize system"), fs_replica: thread_fs_replica, @@ -474,5 +466,4 @@ pub(super) fn boot_app_cores( pcm.set_mem_affinity(0).expect("Can't set affinity"); } - core::mem::forget(replicas); } diff --git a/kernel/src/arch/x86_64/mod.rs b/kernel/src/arch/x86_64/mod.rs index 8d8ff2290..e557331b0 100644 --- a/kernel/src/arch/x86_64/mod.rs +++ b/kernel/src/arch/x86_64/mod.rs @@ -23,6 +23,7 @@ use alloc::sync::Arc; use core::mem::transmute; use core::sync::atomic::AtomicBool; use core::sync::atomic::Ordering; +use core::num::NonZeroUsize; #[cfg(feature = "rackscale")] use crate::nr::NR_LOG; @@ -31,10 +32,8 @@ use cnr::Replica as MlnrReplica; use fallible_collections::TryClone; use klogger::sprint; use log::{debug, error, info}; -use node_replication::Replica; +use nr2::nr::{AffinityChange, NodeReplicated}; use x86::{controlregs, cpuid}; -#[cfg(not(feature = "rackscale"))] -use {crate::nr::Op, node_replication::Log}; use crate::cmdline::CommandLineArguments; use crate::fs::cnrfs::MlnrKernelNode; @@ -194,7 +193,7 @@ pub(crate) fn start_app_core(args: Arc, initialized: &AtomicBool) { serial::init(); { - let local_ridx = args.replica.register().unwrap(); + let local_ridx = args.replica.register(args.node).unwrap(); crate::nr::NR_REPLICA.call_once(|| (args.replica.clone(), local_ridx)); #[cfg(feature = "rackscale")] @@ -244,9 +243,6 @@ pub(crate) fn start_app_core(args: Arc, initialized: &AtomicBool) { #[start] #[no_mangle] fn _start(argc: isize, _argv: *const *const u8) -> isize { - #[cfg(not(feature = "rackscale"))] - use crate::memory::LARGE_PAGE_SIZE; - // Very early init: sprint!("\r\n"); sprint!("NRK booting on x86_64...\r\n"); @@ -453,16 +449,27 @@ fn _start(argc: isize, _argv: *const *const u8) -> isize { // Set-up interrupt routing drivers (I/O APIC controllers) irq::ioapic_initialize(); - // Create the global operation log and first replica and store it (needs - // TLS) + + // Let's go with one replica per NUMA node for now: + let numa_nodes = core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes()); + let numa_nodes = NonZeroUsize::new(numa_nodes).expect("At least one NUMA node"); + #[cfg(not(feature = "rackscale"))] - let (log, bsp_replica) = { - let log: Arc> = Arc::try_new(Log::::new(LARGE_PAGE_SIZE)) - .expect("Not enough memory to initialize system"); - let bsp_replica = Replica::::new(&log); - let local_ridx = bsp_replica.register().unwrap(); - crate::nr::NR_REPLICA.call_once(|| (bsp_replica.clone(), local_ridx)); - (log, bsp_replica) + let kernel_node = { + // Create the global operation log and first replica and store it (needs + // TLS) + let kernel_node: Arc> = Arc::try_new(NodeReplicated::new(numa_nodes, |afc: AffinityChange| { + let pcm = kcb::per_core_mem(); + match afc { + AffinityChange::Replica(r) => pcm.set_mem_affinity(r).expect("Can't set affinity"), + AffinityChange::Revert(orig) => pcm.set_mem_affinity(orig).expect("Can't set affinity"), + } + return 0; // xxx + }).expect("Not enough memory to initialize system")).expect("Not enough memory to initialize system"); + + let local_ridx = kernel_node.register(0).unwrap(); + crate::nr::NR_REPLICA.call_once(|| (kernel_node.clone(), local_ridx)); + kernel_node }; // Starting to initialize file-system @@ -540,7 +547,7 @@ fn _start(argc: isize, _argv: *const *const u8) -> isize { } // Bring up the rest of the system (needs topology, APIC, and global memory) - coreboot::boot_app_cores(log.clone(), bsp_replica, fs_logs, fs_replica); + coreboot::boot_app_cores(kernel_node, fs_logs, fs_replica); // Done with initialization, now we go in // the arch-independent part: diff --git a/kernel/src/arch/x86_64/rackscale/get_shmem_structure.rs b/kernel/src/arch/x86_64/rackscale/get_shmem_structure.rs index 3c2a5f997..42aefaed5 100644 --- a/kernel/src/arch/x86_64/rackscale/get_shmem_structure.rs +++ b/kernel/src/arch/x86_64/rackscale/get_shmem_structure.rs @@ -9,7 +9,7 @@ use core2::io::Write; use atopology::NodeId; use crossbeam_queue::ArrayQueue; -use node_replication::{Dispatch, Log}; +use nr2::nr::{Dispatch, Log}; use rpc::rpc::*; use super::client_state::CLIENT_STATE; diff --git a/kernel/src/environment.rs b/kernel/src/environment.rs index d13bc50a7..449be8ed4 100644 --- a/kernel/src/environment.rs +++ b/kernel/src/environment.rs @@ -58,7 +58,7 @@ pub fn init_topology() { "We don't support as many replicas as we have NUMA nodes." ); assert!( - node_replication::MAX_REPLICAS_PER_LOG >= nodes, + nr2::nr::MAX_REPLICAS_PER_LOG >= nodes, "We don't support as many replicas as we have NUMA nodes." ); assert!( diff --git a/kernel/src/integration_tests.rs b/kernel/src/integration_tests.rs index 7b93fe64a..4b132d461 100644 --- a/kernel/src/integration_tests.rs +++ b/kernel/src/integration_tests.rs @@ -422,7 +422,7 @@ fn coreboot_nrlog() { use core::sync::atomic::{AtomicBool, Ordering}; use klogger::sprintln; use log::info; - use node_replication::Log; + use nr2::Log; let log: Arc> = Arc::try_new(Log::::new(1024 * 1024 * 1)).expect("Can't Arc this"); diff --git a/kernel/src/nr.rs b/kernel/src/nr.rs index 7ba2929fa..b5554cc79 100644 --- a/kernel/src/nr.rs +++ b/kernel/src/nr.rs @@ -7,7 +7,7 @@ use core::fmt::Debug; use alloc::sync::Arc; use hashbrown::HashMap; use log::{error, trace}; -use node_replication::{Dispatch, Replica, ReplicaToken}; +use nr2::nr::{NodeReplicated, ThreadToken, Dispatch}; use spin::Once; #[cfg(feature = "rackscale")] @@ -19,18 +19,18 @@ use crate::process::{Pid, MAX_PROCESSES}; /// Kernel scheduler / process mgmt. replica #[thread_local] -pub(crate) static NR_REPLICA: Once<(Arc>, ReplicaToken)> = Once::new(); +pub(crate) static NR_REPLICA: Once<(Arc>, ThreadToken)> = Once::new(); // Base nr log. The rackscale controller needs to save a reference to this, so it can give // clones to client so they can create replicas of their own. #[cfg(feature = "rackscale")] lazy_static! { - pub(crate) static ref NR_LOG: Arc> = { + pub(crate) static ref NR_LOG: Arc> = { if crate::CMDLINE .get() .map_or(false, |c| c.mode == crate::cmdline::Mode::Controller) { - use node_replication::Log; + use nr2::nr::Log; use crate::arch::kcb::per_core_mem; use crate::memory::{LARGE_PAGE_SIZE, shmem_affinity::local_shmem_affinity}; @@ -47,7 +47,7 @@ lazy_static! { log } else { - use node_replication::Log; + use nr2::nr::Log; use crate::memory::{paddr_to_kernel_vaddr, PAddr}; use crate::arch::rackscale::get_shmem_structure::{rpc_get_shmem_structure, ShmemStructure}; @@ -99,6 +99,8 @@ pub(crate) struct CoreInfo { pub entry_point: VAddr, } + +#[derive(Debug, Clone)] pub(crate) struct KernelNode { process_map: HashMap, scheduler_map: HashMap, From 9eeb0f6f95e3d31ef381874b97fa5e906e0d1c92 Mon Sep 17 00:00:00 2001 From: Gerd Zellweger Date: Thu, 10 Aug 2023 16:14:33 -0700 Subject: [PATCH 02/32] Add submodule. Signed-off-by: Gerd Zellweger --- lib/node-replication2 | 1 + 1 file changed, 1 insertion(+) create mode 160000 lib/node-replication2 diff --git a/lib/node-replication2 b/lib/node-replication2 new file mode 160000 index 000000000..f0a91fdca --- /dev/null +++ b/lib/node-replication2 @@ -0,0 +1 @@ +Subproject commit f0a91fdca08adfdd5ad8d11c2e78d7b2d852e512 From eb5c56c637ead37b7c29ad521a360c2e14bbc79e Mon Sep 17 00:00:00 2001 From: Gerd Zellweger Date: Mon, 11 Sep 2023 00:20:05 -0700 Subject: [PATCH 03/32] Migrate process code to new nr library. Signed-off-by: Gerd Zellweger --- kernel/src/arch/unix/mod.rs | 34 ++++-- kernel/src/arch/unix/process.rs | 80 ++++++------ kernel/src/arch/x86_64/coreboot.rs | 5 +- kernel/src/arch/x86_64/mod.rs | 28 +++-- kernel/src/arch/x86_64/process.rs | 129 +++++++++++--------- kernel/src/arch/x86_64/vspace/mod.rs | 8 +- kernel/src/arch/x86_64/vspace/page_table.rs | 23 ++-- kernel/src/integration_tests.rs | 2 +- kernel/src/nr.rs | 3 +- kernel/src/nrproc.rs | 112 +++++------------ kernel/src/process.rs | 2 +- 11 files changed, 209 insertions(+), 217 deletions(-) diff --git a/kernel/src/arch/unix/mod.rs b/kernel/src/arch/unix/mod.rs index 50b34401b..a0d2fedad 100644 --- a/kernel/src/arch/unix/mod.rs +++ b/kernel/src/arch/unix/mod.rs @@ -3,6 +3,7 @@ use alloc::boxed::Box; use alloc::sync::Arc; +use core::num::NonZeroUsize; use core::sync::atomic::{AtomicBool, Ordering}; use arrayvec::ArrayVec; @@ -10,7 +11,7 @@ use cnr::Replica as MlnrReplica; use ctor::ctor; use fallible_collections::TryClone; use log::{debug, info}; -use nr2::nr::{Log, Replica}; +use nr2::nr::{AffinityChange, Log, NodeReplicated, Replica}; use x86::current::paging::HUGE_PAGE_SIZE; use crate::fs::cnrfs::MlnrKernelNode; @@ -102,14 +103,33 @@ fn init_setup() { unsafe { kcb::PER_CORE_MEMORY.set_global_mem(global_memory_static) }; debug!("Memory allocation should work at this point..."); - let log: Arc> = Arc::try_new(Log::::new(LARGE_PAGE_SIZE)) + let kernel_node = { + // Create the global operation log and first replica and store it (needs + // TLS) + let kernel_node: Arc> = Arc::try_new( + NodeReplicated::new(NonZeroUsize::new(1).unwrap(), |afc: AffinityChange| { + return 0; // xxx + }) + .expect("Not enough memory to initialize system"), + ) .expect("Not enough memory to initialize system"); - let bsp_replica = Replica::::new(&log); - let local_ridx = bsp_replica - .register() - .expect("Failed to register with Replica."); - crate::nr::NR_REPLICA.call_once(|| (bsp_replica.clone(), local_ridx)); + let local_ridx = kernel_node.register(0).unwrap(); + crate::nr::NR_REPLICA.call_once(|| (kernel_node.clone(), local_ridx)); + kernel_node + }; + + /* + + let log: Arc> = Arc::try_new(Log::::new(LARGE_PAGE_SIZE)) + .expect("Not enough memory to initialize system"); + let bsp_replica = Replica::::new(&log); + let local_ridx = bsp_replica + .register() + .expect("Failed to register with Replica."); + crate::nr::NR_REPLICA.call_once(|| (bsp_replica.clone(), local_ridx)); + + */ // Starting to initialize file-system let fs_logs = crate::fs::cnrfs::allocate_logs(); // Construct the first replica diff --git a/kernel/src/arch/unix/process.rs b/kernel/src/arch/unix/process.rs index 7ab2829f7..2117d844f 100644 --- a/kernel/src/arch/unix/process.rs +++ b/kernel/src/arch/unix/process.rs @@ -8,14 +8,16 @@ use alloc::vec::Vec; use bootloader_shared::Module; use core::alloc::Allocator; use core::cell::RefCell; +use core::num::NonZeroUsize; use core::ops::{Deref, DerefMut}; +use core::sync::atomic::{AtomicUsize, Ordering}; use x86::current::paging::PAddr; use arrayvec::ArrayVec; use kpi::process::FrameId; use lazy_static::lazy_static; -use nr2::nr::{Dispatch, Log, Replica}; +use nr2::nr::{AffinityChange, Dispatch, NodeReplicated, ThreadToken}; use crate::arch::kcb::get_kcb; use crate::error::{KError, KResult}; @@ -63,41 +65,19 @@ pub(crate) fn swap_current_executor(_current_executor: Box) -> Optio } lazy_static! { - pub(crate) static ref PROCESS_TABLE: ArrayVec>>, MAX_PROCESSES>, MAX_NUMA_NODES> = { + pub(crate) static ref PROCESS_TABLE: ArrayVec>>, MAX_PROCESSES> = { + debug_assert_eq!(*crate::environment::NODE_ID, 0, "Expect initialization to happen on node 0."); // Want at least one replica... - let numa_nodes = core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes()); - - let mut numa_cache = ArrayVec::new(); - for _n in 0..numa_nodes { - let process_replicas = ArrayVec::new(); - debug_assert!(!numa_cache.is_full(), "Ensured by loop range"); - numa_cache.push(process_replicas) - } + let num_replicas = NonZeroUsize::new(core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes())).expect("At least one numa node"); + let mut processes = ArrayVec::new(); for pid in 0..MAX_PROCESSES { - let log = Arc::try_new(Log::< as Dispatch>::WriteOperation>::new( - LARGE_PAGE_SIZE, - )).expect("Can't initialize processes, out of memory."); - - let da = DA::new().expect("Can't initialize process deterministic memory allocator"); - for node in 0..numa_nodes { - let pcm = super::kcb::per_core_mem(); - assert!(pcm.set_mem_affinity(node as atopology::NodeId).is_ok()); - - debug_assert!(!numa_cache[node].is_full(), "Ensured by loop range"); - - - let p = Box::try_new(UnixProcess::new(pid, Box::new(da.clone())).expect("Can't create process during init")).expect("Not enough memory to initialize processes"); - let nrp = NrProcess::new(p, Box::new(da.clone())); - - numa_cache[node].push(Replica::>::with_data(&log, nrp)); - - debug_assert_eq!(*crate::environment::NODE_ID, 0, "Expect initialization to happen on node 0."); - assert!(pcm.set_mem_affinity(0).is_ok()); - } + processes.push( + Arc::try_new(NodeReplicated::>::new(num_replicas, |afc: AffinityChange| { + return 0; // TODO(dynrep): Return error code + }).expect("Not enough memory to initialize system")).expect("Not enough memory to initialize system")); } - - numa_cache + processes }; } @@ -108,10 +88,7 @@ impl crate::nrproc::ProcessManager for ArchProcessManagement { fn process_table( &self, - ) -> &'static ArrayVec< - ArrayVec>>, MAX_PROCESSES>, - MAX_NUMA_NODES, - > { + ) -> &'static ArrayVec>>, MAX_PROCESSES> { &super::process::PROCESS_TABLE } } @@ -127,8 +104,37 @@ pub(crate) struct UnixProcess { pub frames: ArrayVec, MAX_FRAMES_PER_PROCESS>, } +static NEXT_PID: AtomicUsize = AtomicUsize::new(0); + +impl Default for NrProcess { + fn default() -> Self { + let next_pid = NEXT_PID.fetch_add(1, Ordering::Relaxed); + NrProcess::new( + Box::try_new( + UnixProcess::new(next_pid as Pid).expect("Failed to set-up process during init"), + ) + .expect("Failed to initialize process during init"), + ) + } +} + +impl Clone for UnixProcess { + fn clone(&self) -> Self { + unimplemented!("Clone not yet implemented for UnixProcess") + /* + UnixProcess { + pid: self.pid, + vspace: self.vspace.clone(), + fds: self.fds.clone(), + pinfo: self.pinfo.clone(), + frames: self.frames.clone(), + } + */ + } +} + impl UnixProcess { - fn new(pid: Pid, _allocator: Box) -> Result { + fn new(pid: Pid) -> Result { Ok(UnixProcess { pid, vspace: VSpace::new(), diff --git a/kernel/src/arch/x86_64/coreboot.rs b/kernel/src/arch/x86_64/coreboot.rs index e41245618..e3cdfa9a4 100644 --- a/kernel/src/arch/x86_64/coreboot.rs +++ b/kernel/src/arch/x86_64/coreboot.rs @@ -18,7 +18,7 @@ use fallible_collections::FallibleVecGlobal; use fallible_collections::TryClone; use log::debug; use log::trace; -use nr2::nr::{NodeReplicated}; +use nr2::nr::NodeReplicated; use x86::apic::ApicId; use x86::current::paging::PAddr; @@ -346,7 +346,7 @@ pub(super) fn boot_app_cores( // Let's go with one replica per NUMA node for now: let numa_nodes = core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes()); let numa_nodes = NonZeroUsize::new(numa_nodes).expect("At least one NUMA node"); - + let mut fs_replicas: Vec>> = Vec::try_with_capacity(numa_nodes.get()).expect("Not enough memory to initialize system"); @@ -465,5 +465,4 @@ pub(super) fn boot_app_cores( debug!("Core {:?} has started", thread.apic_id()); pcm.set_mem_affinity(0).expect("Can't set affinity"); } - } diff --git a/kernel/src/arch/x86_64/mod.rs b/kernel/src/arch/x86_64/mod.rs index e557331b0..7e4a7e4ae 100644 --- a/kernel/src/arch/x86_64/mod.rs +++ b/kernel/src/arch/x86_64/mod.rs @@ -21,9 +21,9 @@ use alloc::sync::Arc; use core::mem::transmute; +use core::num::NonZeroUsize; use core::sync::atomic::AtomicBool; use core::sync::atomic::Ordering; -use core::num::NonZeroUsize; #[cfg(feature = "rackscale")] use crate::nr::NR_LOG; @@ -449,7 +449,6 @@ fn _start(argc: isize, _argv: *const *const u8) -> isize { // Set-up interrupt routing drivers (I/O APIC controllers) irq::ioapic_initialize(); - // Let's go with one replica per NUMA node for now: let numa_nodes = core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes()); let numa_nodes = NonZeroUsize::new(numa_nodes).expect("At least one NUMA node"); @@ -458,14 +457,22 @@ fn _start(argc: isize, _argv: *const *const u8) -> isize { let kernel_node = { // Create the global operation log and first replica and store it (needs // TLS) - let kernel_node: Arc> = Arc::try_new(NodeReplicated::new(numa_nodes, |afc: AffinityChange| { - let pcm = kcb::per_core_mem(); - match afc { - AffinityChange::Replica(r) => pcm.set_mem_affinity(r).expect("Can't set affinity"), - AffinityChange::Revert(orig) => pcm.set_mem_affinity(orig).expect("Can't set affinity"), - } - return 0; // xxx - }).expect("Not enough memory to initialize system")).expect("Not enough memory to initialize system"); + let kernel_node: Arc> = Arc::try_new( + NodeReplicated::new(numa_nodes, |afc: AffinityChange| { + let pcm = kcb::per_core_mem(); + match afc { + AffinityChange::Replica(r) => { + pcm.set_mem_affinity(r).expect("Can't set affinity") + } + AffinityChange::Revert(orig) => { + pcm.set_mem_affinity(orig).expect("Can't set affinity") + } + } + return 0; // xxx + }) + .expect("Not enough memory to initialize system"), + ) + .expect("Not enough memory to initialize system"); let local_ridx = kernel_node.register(0).unwrap(); crate::nr::NR_REPLICA.call_once(|| (kernel_node.clone(), local_ridx)); @@ -508,6 +515,7 @@ fn _start(argc: isize, _argv: *const *const u8) -> isize { crate::pci::init(); // Initialize processes + #[cfg(feature = "rackscale")] lazy_static::initialize(&process::PROCESS_LOGS); #[cfg(not(feature = "rackscale"))] diff --git a/kernel/src/arch/x86_64/process.rs b/kernel/src/arch/x86_64/process.rs index 54c76eb01..b830712ea 100644 --- a/kernel/src/arch/x86_64/process.rs +++ b/kernel/src/arch/x86_64/process.rs @@ -6,11 +6,11 @@ use alloc::collections::TryReserveError; use alloc::string::String; use alloc::sync::Arc; use alloc::vec::Vec; -use core::alloc::Allocator; use core::arch::asm; use core::cell::RefCell; use core::cmp::PartialEq; use core::iter::Iterator; +use core::sync::atomic::{AtomicUsize, Ordering}; use core::{fmt, ptr}; use arrayvec::ArrayVec; @@ -20,12 +20,13 @@ use kpi::arch::SaveArea; use kpi::process::{FrameId, ELF_OFFSET, EXECUTOR_OFFSET}; use lazy_static::lazy_static; use log::{debug, info, trace, warn}; +#[cfg(feature = "rackscale")] use node_replication::{Dispatch, Log, Replica}; +use nr2::nr::NodeReplicated; use x86::bits64::paging::*; use x86::bits64::rflags; use x86::{controlregs, Ring}; -use crate::arch::kcb::per_core_mem; use crate::error::{KError, KResult}; use crate::fs::{fd::FileDescriptorEntry, MAX_FILES_PER_PROCESS}; use crate::memory::vspace::{AddressSpace, MapAction}; @@ -69,6 +70,7 @@ pub(crate) fn current_pid() -> KResult { .pid) } +#[cfg(feature = "rackscale")] lazy_static! { pub(crate) static ref PROCESS_LOGS: Box< ArrayVec< @@ -78,7 +80,6 @@ lazy_static! { > = { - #[cfg(feature = "rackscale")] if crate::CMDLINE .get() .map_or(false, |c| c.mode == crate::cmdline::Mode::Controller) @@ -117,7 +118,6 @@ lazy_static! { process_logs }; - #[cfg(feature = "rackscale")] if crate::CMDLINE .get() .map_or(false, |c| c.mode == crate::cmdline::Mode::Controller) @@ -132,61 +132,49 @@ lazy_static! { } lazy_static! { - pub(crate) static ref PROCESS_TABLE: ArrayVec< - ArrayVec>>, MAX_PROCESSES>, - MAX_NUMA_NODES, - > = create_process_table(); + pub(crate) static ref PROCESS_TABLE: ArrayVec>>, MAX_PROCESSES> = + create_process_table(); } #[cfg(not(feature = "rackscale"))] -fn create_process_table( -) -> ArrayVec>>, MAX_PROCESSES>, MAX_NUMA_NODES> -{ - use crate::memory::detmem::DA; +fn create_process_table() -> ArrayVec>>, MAX_PROCESSES> { + use crate::arch::kcb; + use core::num::NonZeroUsize; + use nr2::nr::AffinityChange; // Want at least one replica... - let numa_nodes = core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes()); - - let mut numa_cache = ArrayVec::new(); - for _n in 0..numa_nodes { - let process_replicas = ArrayVec::new(); - debug_assert!(!numa_cache.is_full()); - numa_cache.push(process_replicas) - } - - for pid in 0..MAX_PROCESSES { - let allocator = DA::new().expect("Can't initialize process deterministic memory allocator"); + let num_replicas = + NonZeroUsize::new(core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes())).unwrap(); + let mut processes = ArrayVec::new(); - for node in 0..numa_nodes { - debug_assert!(!numa_cache[node].is_full()); + for _pid in 0..MAX_PROCESSES { + debug_assert_eq!( + *crate::environment::NODE_ID, + 0, + "Expect initialization to happen on node 0." + ); - let pcm = per_core_mem(); - pcm.set_mem_affinity(node as atopology::NodeId) - .expect("Can't change affinity"); + let process: Arc>> = Arc::try_new( + NodeReplicated::new(num_replicas, |afc: AffinityChange| { + let pcm = kcb::per_core_mem(); + match afc { + AffinityChange::Replica(r) => { + pcm.set_mem_affinity(r).expect("Can't set affinity") + } + AffinityChange::Revert(orig) => { + pcm.set_mem_affinity(orig).expect("Can't set affinity") + } + } + return 0; // TODO(dynrep): Return error code + }) + .expect("Not enough memory to initialize system"), + ) + .expect("Not enough memory to initialize system"); - let p = Box::try_new( - Ring3Process::new(pid, Box::new(allocator.clone())) - .expect("Can't create process during init"), - ) - .expect("Not enough memory to initialize processes"); - let nrp = NrProcess::new(p, Box::new(allocator.clone())); - - numa_cache[node].push(Replica::>::with_data( - &PROCESS_LOGS[pid], - nrp, - )); - - pcm.set_mem_affinity(0 as atopology::NodeId) - .expect("Can't change affinity"); - debug_assert_eq!( - *crate::environment::NODE_ID, - 0, - "Expect initialization to happen on node 0." - ); - } + processes.push(process) } - numa_cache + processes } #[cfg(feature = "rackscale")] @@ -266,10 +254,7 @@ impl crate::nrproc::ProcessManager for ArchProcessManagement { fn process_table( &self, - ) -> &'static ArrayVec< - ArrayVec>>, MAX_PROCESSES>, - MAX_NUMA_NODES, - > { + ) -> &'static ArrayVec>>, MAX_PROCESSES> { &*super::process::PROCESS_TABLE } } @@ -1026,8 +1011,42 @@ pub(crate) struct Ring3Process { pub read_only_offset: VAddr, } +static NEXT_PID: AtomicUsize = AtomicUsize::new(0); + +impl Default for NrProcess { + fn default() -> Self { + let next_pid = NEXT_PID.fetch_add(1, Ordering::Relaxed); + NrProcess::new( + Box::try_new( + Ring3Process::new(next_pid as Pid).expect("Failed to set-up process during init"), + ) + .expect("Failed to initialize process during init"), + ) + } +} + +impl Clone for Ring3Process { + fn clone(&self) -> Self { + unimplemented!("Clone not implemented for Ring3Process") + /*Ring3Process { + pid: self.pid, + current_eid: self.current_eid, + vspace: self.vspace.clone(), + offset: self.offset, + pinfo: self.pinfo.clone(), + entry_point: self.entry_point, + executor_cache: self.executor_cache.clone(), + executor_offset: self.executor_offset, + fds: self.fds.clone(), + pfm: self.pfm.clone(), + writeable_sections: self.writeable_sections.clone(), + read_only_offset: self.read_only_offset, + }*/ + } +} + impl Ring3Process { - fn new(pid: Pid, allocator: Box) -> Result { + fn new(pid: Pid) -> Result { const NONE_EXECUTOR: Option>> = None; #[cfg(not(feature = "rackscale"))] let executor_cache: ArrayVec>>, MAX_NUMA_NODES> = @@ -1051,7 +1070,7 @@ impl Ring3Process { pid: pid, current_eid: 0, offset: VAddr::from(ELF_OFFSET), - vspace: VSpace::new(allocator)?, + vspace: VSpace::new()?, entry_point: VAddr::from(0usize), executor_cache, executor_offset: VAddr::from(EXECUTOR_OFFSET), diff --git a/kernel/src/arch/x86_64/vspace/mod.rs b/kernel/src/arch/x86_64/vspace/mod.rs index 08c504004..42ea4effb 100644 --- a/kernel/src/arch/x86_64/vspace/mod.rs +++ b/kernel/src/arch/x86_64/vspace/mod.rs @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 OR MIT use alloc::boxed::Box; -use core::alloc::Allocator; use core::ops::Bound::*; use fallible_collections::btree::BTreeMap; @@ -70,8 +69,7 @@ lazy_static! { // allocated with slabmalloc (maybe we can have a no_drop variant // of PageTable?) PageTable { - pml4: Box::into_pin(Box::from_raw(pml4_table)), - allocator: None, + pml4: Box::into_pin(Box::from_raw(pml4_table)) } } @@ -201,10 +199,10 @@ impl Drop for VSpace { } impl VSpace { - pub(crate) fn new(allocator: Box) -> Result { + pub(crate) fn new() -> Result { Ok(VSpace { mappings: BTreeMap::new(), - page_table: PageTable::new(allocator)?, + page_table: PageTable::new()?, }) } diff --git a/kernel/src/arch/x86_64/vspace/page_table.rs b/kernel/src/arch/x86_64/vspace/page_table.rs index 20ec9ac09..a21e8db17 100644 --- a/kernel/src/arch/x86_64/vspace/page_table.rs +++ b/kernel/src/arch/x86_64/vspace/page_table.rs @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 OR MIT use alloc::boxed::Box; -use core::alloc::{Allocator, Layout}; +use core::alloc::Layout; use core::mem::transmute; use core::pin::Pin; use core::ptr::NonNull; @@ -33,7 +33,6 @@ enum Modify { /// The actual page-table. We allocate the PML4 upfront. pub(crate) struct PageTable { pub pml4: Pin>, - pub allocator: Option>, } impl Drop for PageTable { @@ -171,14 +170,13 @@ impl PageTable { /// Create a new address-space. /// /// Allocate an initial PML4 table for it. - pub(crate) fn new(allocator: Box) -> Result { + pub(crate) fn new() -> Result { let pml4 = Box::try_new( [PML4Entry::new(PAddr::from(0x0u64), PML4Flags::empty()); PAGE_SIZE_ENTRIES], )?; Ok(PageTable { pml4: Box::into_pin(pml4), - allocator: Some(allocator), }) } @@ -820,16 +818,13 @@ impl PageTable { } fn alloc_frame(&self) -> Frame { - let frame_ptr = self.allocator.as_ref().map_or_else( - || unsafe { - let ptr = alloc::alloc::alloc(PT_LAYOUT); - debug_assert!(!ptr.is_null()); - - let nptr = NonNull::new_unchecked(ptr); - NonNull::slice_from_raw_parts(nptr, PT_LAYOUT.size()) - }, - |allocator| allocator.allocate(PT_LAYOUT).unwrap(), - ); + let frame_ptr = unsafe { + let ptr = alloc::alloc::alloc(PT_LAYOUT); + debug_assert!(!ptr.is_null()); + + let nptr = NonNull::new_unchecked(ptr); + NonNull::slice_from_raw_parts(nptr, PT_LAYOUT.size()) + }; let vaddr = VAddr::from(frame_ptr.as_ptr() as *const u8 as u64); let paddr = crate::arch::memory::kernel_vaddr_to_paddr(vaddr); let mut frame = Frame::new(paddr, PT_LAYOUT.size(), 0); diff --git a/kernel/src/integration_tests.rs b/kernel/src/integration_tests.rs index 4b132d461..7b93fe64a 100644 --- a/kernel/src/integration_tests.rs +++ b/kernel/src/integration_tests.rs @@ -422,7 +422,7 @@ fn coreboot_nrlog() { use core::sync::atomic::{AtomicBool, Ordering}; use klogger::sprintln; use log::info; - use nr2::Log; + use node_replication::Log; let log: Arc> = Arc::try_new(Log::::new(1024 * 1024 * 1)).expect("Can't Arc this"); diff --git a/kernel/src/nr.rs b/kernel/src/nr.rs index b5554cc79..09de748c5 100644 --- a/kernel/src/nr.rs +++ b/kernel/src/nr.rs @@ -7,7 +7,7 @@ use core::fmt::Debug; use alloc::sync::Arc; use hashbrown::HashMap; use log::{error, trace}; -use nr2::nr::{NodeReplicated, ThreadToken, Dispatch}; +use nr2::nr::{Dispatch, NodeReplicated, ThreadToken}; use spin::Once; #[cfg(feature = "rackscale")] @@ -99,7 +99,6 @@ pub(crate) struct CoreInfo { pub entry_point: VAddr, } - #[derive(Debug, Clone)] pub(crate) struct KernelNode { process_map: HashMap, diff --git a/kernel/src/nrproc.rs b/kernel/src/nrproc.rs index a021fc002..61e2502de 100644 --- a/kernel/src/nrproc.rs +++ b/kernel/src/nrproc.rs @@ -4,7 +4,6 @@ use crate::prelude::*; use alloc::sync::Arc; use alloc::vec::Vec; -use core::alloc::Allocator; use core::mem::MaybeUninit; use fallible_collections::FallibleVecGlobal; @@ -12,11 +11,12 @@ use arrayvec::ArrayVec; use fallible_collections::vec::FallibleVec; use kpi::process::{FrameId, ProcessInfo}; use kpi::MemType; -use node_replication::{Dispatch, Replica, ReplicaToken}; +use nr2::nr::{Dispatch, NodeReplicated, ThreadToken}; use spin::Once; use crate::arch::process::PROCESS_TABLE; use crate::arch::MAX_NUMA_NODES; +use crate::arch::Module; use crate::error::{KError, KResult}; use crate::memory::vspace::{AddressSpace, MapAction, TlbFlushHandle}; use crate::memory::{Frame, PAddr, VAddr}; @@ -26,7 +26,7 @@ use crate::process::{ /// The tokens per core to access the process replicas. #[thread_local] -pub(crate) static PROCESS_TOKEN: Once> = Once::new(); +pub(crate) static PROCESS_TOKEN: Once> = Once::new(); /// Initializes `PROCESS_TOKEN`. /// @@ -38,9 +38,9 @@ pub(crate) fn register_thread_with_process_replicas() { PROCESS_TOKEN.call_once(|| { let mut tokens = ArrayVec::new(); for pid in 0..MAX_PROCESSES { - debug_assert!(PROCESS_TABLE[node].len() > pid, "Invalid PID"); + debug_assert!(PROCESS_TABLE.len() > pid, "Invalid PID"); - let token = PROCESS_TABLE[node][pid].register(); + let token = PROCESS_TABLE[pid].register(node); tokens.push(token.expect("Need to be able to register")); } @@ -115,11 +115,7 @@ pub(crate) enum ProcessResult { /// Advances the replica of all the processes on the current NUMA node. pub(crate) fn advance_all() { - let node = *crate::environment::NODE_ID; - - for pid in 0..MAX_PROCESSES { - PROCESS_TABLE[node][pid].sync(PROCESS_TOKEN.get().unwrap()[pid]); - } + unreachable!("shouldn't be called anymore dynrep"); } pub(crate) trait ProcessManager { @@ -128,25 +124,20 @@ pub(crate) trait ProcessManager { #[allow(clippy::type_complexity)] // fix this once `associated_type_defaults` works fn process_table( &self, - ) -> &'static ArrayVec< - ArrayVec>>, MAX_PROCESSES>, - MAX_NUMA_NODES, - >; + ) -> &'static ArrayVec>>, MAX_PROCESSES>; } /// A node-replicated process. -pub(crate) struct NrProcess { +#[derive(Clone)] +pub(crate) struct NrProcess { /// A list of all cores where the current process is running. - active_cores: Vec<(kpi::system::GlobalThreadId, Eid), M>, + active_cores: Vec<(kpi::system::GlobalThreadId, Eid)>, /// The process struct itself. process: Box

, } impl NrProcess

{ - pub(crate) fn new( - process: Box

, - _allocator: Box, - ) -> NrProcess

{ + pub(crate) fn new(process: Box

) -> NrProcess

{ NrProcess { active_cores: Vec::new(), process, @@ -161,10 +152,7 @@ impl NrProcess

{ writeable_sections: Vec, ) -> Result<(), KError> { debug_assert!(pid < MAX_PROCESSES, "Invalid PID"); - - let node = *crate::environment::NODE_ID; - - let response = PROCESS_TABLE[node][pid].execute_mut( + let response = PROCESS_TABLE[pid].execute_mut( ProcessOpMut::Load(pid, module_name, writeable_sections), PROCESS_TOKEN.get().unwrap()[pid], ); @@ -178,10 +166,7 @@ impl NrProcess

{ pub(crate) fn resolve(pid: Pid, base: VAddr) -> Result<(u64, u64), KError> { debug_assert!(pid < MAX_PROCESSES, "Invalid PID"); debug_assert!(base.as_u64() < kpi::KERNEL_BASE, "Invalid base"); - - let node = *crate::environment::NODE_ID; - - let response = PROCESS_TABLE[node][pid].execute( + let response = PROCESS_TABLE[pid].execute( ProcessOp::MemResolve(base), PROCESS_TOKEN.get().unwrap()[pid], ); @@ -194,10 +179,7 @@ impl NrProcess

{ pub(crate) fn synchronize(pid: Pid) { debug_assert!(pid < MAX_PROCESSES, "Invalid PID"); - - let node = *crate::environment::NODE_ID; - - PROCESS_TABLE[node][pid].sync(PROCESS_TOKEN.get().unwrap()[pid]); + PROCESS_TABLE[pid].sync(PROCESS_TOKEN.get().unwrap()[pid]); } pub(crate) fn map_device_frame( @@ -206,10 +188,7 @@ impl NrProcess

{ action: MapAction, ) -> Result<(u64, u64), KError> { debug_assert!(pid < MAX_PROCESSES, "Invalid PID"); - - let node = *crate::environment::NODE_ID; - - let response = PROCESS_TABLE[node][pid].execute_mut( + let response = PROCESS_TABLE[pid].execute_mut( ProcessOpMut::MemMapDevice(frame, action), PROCESS_TOKEN.get().unwrap()[pid], ); @@ -222,10 +201,7 @@ impl NrProcess

{ pub(crate) fn unmap(pid: Pid, base: VAddr) -> Result, KError> { debug_assert!(pid < MAX_PROCESSES, "Invalid PID"); - - let node = *crate::environment::NODE_ID; - - let response = PROCESS_TABLE[node][pid].execute_mut( + let response = PROCESS_TABLE[pid].execute_mut( ProcessOpMut::MemUnmap(base), PROCESS_TOKEN.get().unwrap()[pid], ); @@ -243,11 +219,8 @@ impl NrProcess

{ action: MapAction, ) -> Result<(PAddr, usize), KError> { debug_assert!(pid < MAX_PROCESSES, "Invalid PID"); - - let node = *crate::environment::NODE_ID; //action.multiple_mappings(true); - - let response = PROCESS_TABLE[node][pid].execute_mut( + let response = PROCESS_TABLE[pid].execute_mut( ProcessOpMut::MemMapFrameId(base, frame_id, action), PROCESS_TOKEN.get().unwrap()[pid], ); @@ -265,12 +238,9 @@ impl NrProcess

{ action: MapAction, ) -> Result<(u64, u64), KError> { debug_assert!(pid < MAX_PROCESSES, "Invalid PID"); - - let node = *crate::environment::NODE_ID; - let mut virtual_offset = 0; for frame in frames { - let response = PROCESS_TABLE[node][pid].execute_mut( + let response = PROCESS_TABLE[pid].execute_mut( ProcessOpMut::MemMapFrame(base + virtual_offset, frame, action), PROCESS_TOKEN.get().unwrap()[pid], ); @@ -293,11 +263,8 @@ impl NrProcess

{ pub(crate) fn pinfo(pid: Pid) -> Result { debug_assert!(pid < MAX_PROCESSES, "Invalid PID"); - - let node = *crate::environment::NODE_ID; - - let response = PROCESS_TABLE[node][pid] - .execute(ProcessOp::ProcessInfo, PROCESS_TOKEN.get().unwrap()[pid]); + let response = + PROCESS_TABLE[pid].execute(ProcessOp::ProcessInfo, PROCESS_TOKEN.get().unwrap()[pid]); match response { Ok(ProcessResult::ProcessInfo(pinfo)) => Ok(pinfo), Err(e) => Err(e), @@ -313,7 +280,7 @@ impl NrProcess

{ let gtid = *crate::environment::CORE_ID; let node = *crate::environment::NODE_ID; - let response = pm.process_table()[node][pid].execute_mut( + let response = pm.process_table()[pid].execute_mut( ProcessOpMut::AssignExecutor(gtid, node), PROCESS_TOKEN.get().unwrap()[pid], ); @@ -344,10 +311,7 @@ impl NrProcess

{ pub(crate) fn allocate_frame_to_process(pid: Pid, frame: Frame) -> Result { debug_assert!(pid < MAX_PROCESSES, "Invalid PID"); - - let node = *crate::environment::NODE_ID; - - let response = PROCESS_TABLE[node][pid].execute_mut( + let response = PROCESS_TABLE[pid].execute_mut( ProcessOpMut::AllocateFrameToProcess(frame), PROCESS_TOKEN.get().unwrap()[pid], ); @@ -361,10 +325,7 @@ impl NrProcess

{ pub(crate) fn release_frame_from_process(pid: Pid, fid: FrameId) -> Result { debug_assert!(pid < MAX_PROCESSES, "Invalid PID"); debug_assert!(fid < MAX_FRAMES_PER_PROCESS, "Invalid FID"); - - let node = *crate::environment::NODE_ID; - - let response = PROCESS_TABLE[node][pid].execute_mut( + let response = PROCESS_TABLE[pid].execute_mut( ProcessOpMut::ReleaseFrameFromProcess(fid), PROCESS_TOKEN.get().unwrap()[pid], ); @@ -377,13 +338,10 @@ impl NrProcess

{ pub(crate) fn allocate_dispatchers(pid: Pid, frame: Frame) -> Result { debug_assert!(pid < MAX_PROCESSES, "Invalid PID"); - - let node = *crate::environment::NODE_ID; - #[cfg(feature = "rackscale")] let mid = *crate::environment::MACHINE_ID; - let response = PROCESS_TABLE[node][pid].execute_mut( + let response = PROCESS_TABLE[pid].execute_mut( #[cfg(not(feature = "rackscale"))] ProcessOpMut::DispatcherAllocation(frame), #[cfg(feature = "rackscale")] @@ -399,9 +357,7 @@ impl NrProcess

{ } pub(crate) fn userslice_to_arc_slice(from: UserSlice) -> Result, KError> { - let node = *crate::environment::NODE_ID; - - let response = PROCESS_TABLE[node][from.pid].execute( + let response = PROCESS_TABLE[from.pid].execute( ProcessOp::ReadSlice(from), PROCESS_TOKEN.get().unwrap()[from.pid], ); @@ -413,9 +369,7 @@ impl NrProcess

{ } pub(crate) fn read_string_from_userspace(from: UserSlice) -> Result { - let node = *crate::environment::NODE_ID; - - let response = PROCESS_TABLE[node][from.pid].execute( + let response = PROCESS_TABLE[from.pid].execute( ProcessOp::ReadString(from), PROCESS_TOKEN.get().unwrap()[from.pid], ); @@ -427,10 +381,9 @@ impl NrProcess

{ } pub(crate) fn write_to_userspace(to: &mut UserSlice, kbuf: &[u8]) -> Result<(), KError> { - let node = *crate::environment::NODE_ID; let pid = to.pid; - let response = PROCESS_TABLE[node][pid].execute( + let response = PROCESS_TABLE[pid].execute( ProcessOp::WriteSlice(to, kbuf), PROCESS_TOKEN.get().unwrap()[pid], ); @@ -446,9 +399,7 @@ impl NrProcess

{ on: UserSlice, f: Box KResult<(u64, u64)>>, ) -> Result<(u64, u64), KError> { - let node = *crate::environment::NODE_ID; - - let response = PROCESS_TABLE[node][on.pid].execute( + let response = PROCESS_TABLE[on.pid].execute( ProcessOp::ExecSliceMut(on, f), PROCESS_TOKEN.get().unwrap()[on.pid], ); @@ -463,9 +414,7 @@ impl NrProcess

{ on: &'a UserSlice, f: Box KResult<()>>, ) -> Result<(), KError> { - let node = *crate::environment::NODE_ID; - - let response = PROCESS_TABLE[node][on.pid].execute( + let response = PROCESS_TABLE[on.pid].execute( ProcessOp::ExecSlice(on, f), PROCESS_TOKEN.get().unwrap()[on.pid], ); @@ -477,11 +426,10 @@ impl NrProcess

{ } } -impl Dispatch for NrProcess +impl

Dispatch for NrProcess

where P: Process, P::E: Copy, - M: Allocator + Clone, { type ReadOperation<'buf> = ProcessOp<'buf>; type WriteOperation = ProcessOpMut; diff --git a/kernel/src/process.rs b/kernel/src/process.rs index 5d19bd798..77ad93dd9 100644 --- a/kernel/src/process.rs +++ b/kernel/src/process.rs @@ -58,7 +58,7 @@ pub(crate) const MAX_FRAMES_PER_PROCESS: usize = MAX_CORES; pub(crate) const MAX_WRITEABLE_SECTIONS_PER_PROCESS: usize = 4; /// Abstract definition of a process. -pub(crate) trait Process: FrameManagement { +pub(crate) trait Process: FrameManagement + Clone { type E: Executor + Copy + Sync + Send + Debug + PartialEq; type A: AddressSpace; From 04baf3105b4f571bb9e1783a886484a0b6de22be Mon Sep 17 00:00:00 2001 From: Gerd Zellweger Date: Sun, 24 Sep 2023 21:11:02 -0700 Subject: [PATCH 04/32] Make tests work with new node-replication code. Signed-off-by: Gerd Zellweger --- kernel/src/arch/x86_64/process.rs | 37 +++++++++++++++++++ .../src/arch/x86_64/rackscale/controller.rs | 3 ++ .../x86_64/rackscale/get_shmem_structure.rs | 2 + kernel/src/arch/x86_64/vspace/page_table.rs | 2 +- kernel/src/arch/x86_64/vspace/test.rs | 2 +- kernel/src/nrproc.rs | 5 --- kernel/src/scheduler/mod.rs | 1 - kernel/tests/s04_user_runtime_tests.rs | 1 + kernel/tests/s10_benchmarks.rs | 8 ++-- lib/node-replication2 | 2 +- usr/init/src/vmops/mod.rs | 2 +- 11 files changed, 51 insertions(+), 14 deletions(-) diff --git a/kernel/src/arch/x86_64/process.rs b/kernel/src/arch/x86_64/process.rs index b830712ea..4c0074e7d 100644 --- a/kernel/src/arch/x86_64/process.rs +++ b/kernel/src/arch/x86_64/process.rs @@ -106,6 +106,43 @@ lazy_static! { return process_logs; } + // TODO(dynrep): here we create the Log on the controller for sending it + // to the data-kernels this would probably need to create a + // NodeReplicated NodeReplicated instance + + // NodeReplicated::new(#data-kernels) -> + // - for data_kernel in 0..#data-kernels { + // - change affinity to data_kernel + // - Box::new(bla) [allocator will go go to DCM if necessary] + // - change affinity back to controller + // } + + /* + == Controller: + |afc: AffinityChange| { + let pcm = kcb::per_core_mem(); + match afc { + AffinityChange::Replica(r: MachineId) => { + // We want to allocate the logs in controller shared memory + use crate::memory::shmem_affinity::local_shmem_affinity; + let pcm = per_core_mem(); + pcm.set_mem_affinity(mid_to_shmem_affinity(r)).expect("Can't change affinity"); + } + AffinityChange::Revert(orig) => { + // We want to allocate the logs in controller shared memory + use crate::memory::shmem_affinity::local_shmem_affinity; + let pcm = per_core_mem(); + - pcm.set_mem_affinity(local_shmem_affinity()).expect("Can't change affinity"); + OR + - pcm.set_mem_affinity(orig).expect("Can't change affinity"); + } + } + return 0; // TODO(dynrep): Return error code + + == Data kernel + - The closure when set on controller probably won't work in data-kernel (diff symbol addresses?) + - The binary might be fine because it's identical! + */ let process_logs = { let mut process_logs = Box::try_new(ArrayVec::new()).expect("Can't initialize process log vector."); for _pid in 0..MAX_PROCESSES { diff --git a/kernel/src/arch/x86_64/rackscale/controller.rs b/kernel/src/arch/x86_64/rackscale/controller.rs index 23339f0ef..20def97b5 100644 --- a/kernel/src/arch/x86_64/rackscale/controller.rs +++ b/kernel/src/arch/x86_64/rackscale/controller.rs @@ -79,6 +79,9 @@ pub(crate) fn run() { { } + // TODO(dynrep): here is the point where we have all the memory regions and + // could create NodeReplicated instances for each client. + #[cfg(feature = "test-controller-shmem-alloc")] { if mid == 1 { diff --git a/kernel/src/arch/x86_64/rackscale/get_shmem_structure.rs b/kernel/src/arch/x86_64/rackscale/get_shmem_structure.rs index 42aefaed5..f2d8e4a77 100644 --- a/kernel/src/arch/x86_64/rackscale/get_shmem_structure.rs +++ b/kernel/src/arch/x86_64/rackscale/get_shmem_structure.rs @@ -29,6 +29,8 @@ use crate::process::MAX_PROCESSES; #[derive(Debug, Eq, PartialEq, PartialOrd, Clone, Copy)] #[repr(u8)] pub enum ShmemStructure { + // TODO(dynrep): remove NrProcLogs/NrLog add NodeReplicated and + // NodeReplicated instead that gets sent from controller NrProcLogs = 0, NrLog = 1, WorkQueues = 2, diff --git a/kernel/src/arch/x86_64/vspace/page_table.rs b/kernel/src/arch/x86_64/vspace/page_table.rs index a21e8db17..5ed05cc36 100644 --- a/kernel/src/arch/x86_64/vspace/page_table.rs +++ b/kernel/src/arch/x86_64/vspace/page_table.rs @@ -719,7 +719,7 @@ impl PageTable { return Err(KError::AlreadyMapped { base: vbase }); } else { panic!( - "An existing mapping already covers the 2 MiB range we're trying to map in?" + "An existing mapping already covers the 2 MiB range we're trying to map in? {vbase}" ); } } diff --git a/kernel/src/arch/x86_64/vspace/test.rs b/kernel/src/arch/x86_64/vspace/test.rs index ae357dfc0..242117a8a 100644 --- a/kernel/src/arch/x86_64/vspace/test.rs +++ b/kernel/src/arch/x86_64/vspace/test.rs @@ -95,7 +95,7 @@ proptest! { use TestAction::*; use crate::memory::detmem::DA; - let mut totest = VSpace::new(Box::new(DA::new().expect("Unable to create DA"))).expect("Unable to create vspace");; + let mut totest = VSpace::new().expect("Unable to create vspace");; let mut model: ModelAddressSpace = Default::default(); for action in ops { diff --git a/kernel/src/nrproc.rs b/kernel/src/nrproc.rs index 61e2502de..d573dc2cf 100644 --- a/kernel/src/nrproc.rs +++ b/kernel/src/nrproc.rs @@ -113,11 +113,6 @@ pub(crate) enum ProcessResult { ReadString(String), } -/// Advances the replica of all the processes on the current NUMA node. -pub(crate) fn advance_all() { - unreachable!("shouldn't be called anymore dynrep"); -} - pub(crate) trait ProcessManager { type Process: Process + Sync; diff --git a/kernel/src/scheduler/mod.rs b/kernel/src/scheduler/mod.rs index cd40ec90a..e55a57893 100644 --- a/kernel/src/scheduler/mod.rs +++ b/kernel/src/scheduler/mod.rs @@ -67,7 +67,6 @@ pub(crate) fn schedule() -> ! { // There is no process but we're the "main" thread, // aggressively try and advance the replica let start = rawtime::Instant::now(); - crate::nrproc::advance_all(); crate::arch::advance_fs_replica(); if start.elapsed().as_millis() < 1 { diff --git a/kernel/tests/s04_user_runtime_tests.rs b/kernel/tests/s04_user_runtime_tests.rs index ba0dc4bdd..e8d749895 100644 --- a/kernel/tests/s04_user_runtime_tests.rs +++ b/kernel/tests/s04_user_runtime_tests.rs @@ -31,6 +31,7 @@ fn s04_userspace_multicore() { .user_feature("test-scheduler-smp") .build(); let cmdline = RunnerArgs::new_with_build("userspace-smp", &build) + .nodes(num_cores / 16) .cores(num_cores) .memory(4096) .timeout(120_000); diff --git a/kernel/tests/s10_benchmarks.rs b/kernel/tests/s10_benchmarks.rs index f590ace52..4bff3512d 100644 --- a/kernel/tests/s10_benchmarks.rs +++ b/kernel/tests/s10_benchmarks.rs @@ -487,13 +487,13 @@ fn s10_fxmark_benchmark() { if cfg!(feature = "smoke") { cmdline = cmdline.memory(8192); } else { - cmdline = cmdline.memory(core::cmp::max(73728, cores * 2048)); + cmdline = cmdline.memory(core::cmp::max(87728, cores * 2048)); } if cfg!(feature = "smoke") && cores > 2 { - cmdline = cmdline.nodes(2); + cmdline = cmdline.nodes(std::cmp::max(machine.max_cores() / 16, 1)); } else { - cmdline = cmdline.nodes(machine.max_numa_nodes()); + cmdline = cmdline.nodes(std::cmp::max(machine.max_cores() / 16, machine.max_numa_nodes())); } let mut output = String::new(); @@ -834,7 +834,7 @@ fn s10_leveldb_benchmark() { } #[test] -fn s10_memcached_benchmark_internal() { +fn s10_xmemcached_benchmark_internal() { setup_network(1); let machine = Machine::determine(); diff --git a/lib/node-replication2 b/lib/node-replication2 index f0a91fdca..23822c47c 160000 --- a/lib/node-replication2 +++ b/lib/node-replication2 @@ -1 +1 @@ -Subproject commit f0a91fdca08adfdd5ad8d11c2e78d7b2d852e512 +Subproject commit 23822c47c8be92eee9f440c93e6a836061793967 diff --git a/usr/init/src/vmops/mod.rs b/usr/init/src/vmops/mod.rs index 4ad3f4fb6..c802a23ad 100644 --- a/usr/init/src/vmops/mod.rs +++ b/usr/init/src/vmops/mod.rs @@ -35,7 +35,7 @@ fn maponly_bencher(cores: usize) { // see process.rs the heap split up by core from slots 1..128, so we start from there let vspace_offset = lineup::tls2::Environment::tid().0 + 1; - let mut base: u64 = (128 * PML4_SLOT_SIZE + (PML4_SLOT_SIZE * vspace_offset)) as u64; + let mut base: u64 = (220 * PML4_SLOT_SIZE + (PML4_SLOT_SIZE * vspace_offset)) as u64; info!("start mapping at {:#x}", base); #[cfg(feature = "latency")] From 6353729d9856060a55ae03c06754453b3cd6d4b2 Mon Sep 17 00:00:00 2001 From: Gerd Zellweger Date: Sun, 24 Sep 2023 21:16:46 -0700 Subject: [PATCH 05/32] Update submodule. Signed-off-by: Gerd Zellweger --- .gitmodules | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitmodules b/.gitmodules index f18158aec..c550518e3 100644 --- a/.gitmodules +++ b/.gitmodules @@ -2,3 +2,7 @@ path = lib/node-replication branch = mut_scan_vec url = git@github.com:gz/node-replication.git +[submodule "lib/node-replication2"] + path = lib/node-replication2 + url = git@github.com:gz/node-replication.git + branch = nr-dymanic-replication From d5664111c80d84eb17f719f15ff9f843be6c0960 Mon Sep 17 00:00:00 2001 From: Gerd Zellweger Date: Mon, 25 Sep 2023 01:48:54 -0700 Subject: [PATCH 06/32] Compilable rack-scale code. Signed-off-by: Gerd Zellweger --- kernel/src/arch/x86_64/mod.rs | 39 +++---- kernel/src/arch/x86_64/process.rs | 105 ++++++++++-------- .../x86_64/rackscale/get_shmem_structure.rs | 17 ++- kernel/src/memory/shmemalloc.rs | 1 + kernel/src/nr.rs | 56 +++++----- 5 files changed, 116 insertions(+), 102 deletions(-) diff --git a/kernel/src/arch/x86_64/mod.rs b/kernel/src/arch/x86_64/mod.rs index 7e4a7e4ae..2839519a5 100644 --- a/kernel/src/arch/x86_64/mod.rs +++ b/kernel/src/arch/x86_64/mod.rs @@ -21,18 +21,14 @@ use alloc::sync::Arc; use core::mem::transmute; -use core::num::NonZeroUsize; use core::sync::atomic::AtomicBool; use core::sync::atomic::Ordering; -#[cfg(feature = "rackscale")] -use crate::nr::NR_LOG; pub use bootloader_shared::*; use cnr::Replica as MlnrReplica; use fallible_collections::TryClone; use klogger::sprint; use log::{debug, error, info}; -use nr2::nr::{AffinityChange, NodeReplicated}; use x86::{controlregs, cpuid}; use crate::cmdline::CommandLineArguments; @@ -40,7 +36,6 @@ use crate::fs::cnrfs::MlnrKernelNode; use crate::memory::global::GlobalMemory; use crate::memory::mcache; use crate::memory::per_core::PerCoreMemory; -use crate::nr::KernelNode; use crate::ExitReason; use coreboot::AppCoreArgs; @@ -449,12 +444,16 @@ fn _start(argc: isize, _argv: *const *const u8) -> isize { // Set-up interrupt routing drivers (I/O APIC controllers) irq::ioapic_initialize(); - // Let's go with one replica per NUMA node for now: - let numa_nodes = core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes()); - let numa_nodes = NonZeroUsize::new(numa_nodes).expect("At least one NUMA node"); - #[cfg(not(feature = "rackscale"))] let kernel_node = { + use core::num::NonZeroUsize; + use crate::nr::KernelNode; + use nr2::nr::{AffinityChange, NodeReplicated}; + + // Let's go with one replica per NUMA node for now: + let numa_nodes = core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes()); + let numa_nodes = NonZeroUsize::new(numa_nodes).expect("At least one NUMA node"); + // Create the global operation log and first replica and store it (needs // TLS) let kernel_node: Arc> = Arc::try_new( @@ -516,7 +515,12 @@ fn _start(argc: isize, _argv: *const *const u8) -> isize { // Initialize processes #[cfg(feature = "rackscale")] - lazy_static::initialize(&process::PROCESS_LOGS); + if crate::CMDLINE + .get() + .map_or(false, |c| c.mode == crate::cmdline::Mode::Controller) + { + lazy_static::initialize(&process::PROCESS_TABLE); + } #[cfg(not(feature = "rackscale"))] { @@ -525,7 +529,7 @@ fn _start(argc: isize, _argv: *const *const u8) -> isize { } #[cfg(feature = "rackscale")] - let (log, bsp_replica) = { + let kernel_node = { if crate::CMDLINE .get() .map_or(false, |c| c.mode == crate::cmdline::Mode::Client) @@ -534,15 +538,12 @@ fn _start(argc: isize, _argv: *const *const u8) -> isize { crate::nrproc::register_thread_with_process_replicas(); } - // this calls an RPC on the client, which is why we do this later in initialization than in non-rackscale - lazy_static::initialize(&NR_LOG); + lazy_static::initialize(&crate::nr::KERNEL_NODE_INSTANCE); + let kernel_node = crate::nr::KERNEL_NODE_INSTANCE.clone(); - // For rackscale, only the controller is going to create the base log. - // All clients will use this to create replicas. - let bsp_replica = Replica::::new(&NR_LOG); - let local_ridx = bsp_replica.register().unwrap(); - crate::nr::NR_REPLICA.call_once(|| (bsp_replica.clone(), local_ridx)); - (&NR_LOG.clone(), bsp_replica) + let local_ridx = kernel_node.register(0).unwrap(); + crate::nr::NR_REPLICA.call_once(|| (kernel_node.clone(), local_ridx)); + kernel_node }; #[cfg(feature = "gdb")] diff --git a/kernel/src/arch/x86_64/process.rs b/kernel/src/arch/x86_64/process.rs index 4c0074e7d..b9dbd712f 100644 --- a/kernel/src/arch/x86_64/process.rs +++ b/kernel/src/arch/x86_64/process.rs @@ -20,9 +20,9 @@ use kpi::arch::SaveArea; use kpi::process::{FrameId, ELF_OFFSET, EXECUTOR_OFFSET}; use lazy_static::lazy_static; use log::{debug, info, trace, warn}; -#[cfg(feature = "rackscale")] -use node_replication::{Dispatch, Log, Replica}; -use nr2::nr::NodeReplicated; +use crate::arch::kcb::{self, per_core_mem}; +use core::num::NonZeroUsize; +use nr2::nr::{NodeReplicated, AffinityChange}; use x86::bits64::paging::*; use x86::bits64::rflags; use x86::{controlregs, Ring}; @@ -72,25 +72,16 @@ pub(crate) fn current_pid() -> KResult { #[cfg(feature = "rackscale")] lazy_static! { - pub(crate) static ref PROCESS_LOGS: Box< - ArrayVec< - Arc as Dispatch>::WriteOperation>>, - MAX_PROCESSES, - >, - > = { - + pub(crate) static ref PROCESS_TABLE: ArrayVec>>, MAX_PROCESSES> = { + use crate::memory::shmem_affinity::mid_to_shmem_affinity; - if crate::CMDLINE + if !crate::CMDLINE .get() .map_or(false, |c| c.mode == crate::cmdline::Mode::Controller) { - // We want to allocate the logs in controller shared memory - use crate::memory::shmem_affinity::local_shmem_affinity; - let pcm = per_core_mem(); - pcm.set_mem_affinity(local_shmem_affinity()).expect("Can't change affinity"); - } else { // Get location of the logs from the controller, who will have created them in shared memory - use crate::arch::rackscale::get_shmem_structure::{rpc_get_shmem_structure, ShmemStructure}; + + /*use crate::arch::rackscale::get_shmem_structure::{rpc_get_shmem_structure, ShmemStructure}; let mut log_ptrs = [0u64; MAX_PROCESSES]; rpc_get_shmem_structure(ShmemStructure::NrProcLogs, &mut log_ptrs[..]).expect("Failed to get process log pointers"); @@ -103,12 +94,54 @@ lazy_static! { }; process_logs.push(local_log_arc); } - return process_logs; + return process_logs;*/ + unimplemented!("Need to get NodeReplicated from controller") } - // TODO(dynrep): here we create the Log on the controller for sending it - // to the data-kernels this would probably need to create a - // NodeReplicated NodeReplicated instance + // We want to allocate the logs in controller shared memory + use crate::memory::shmem_affinity::local_shmem_affinity; + let pcm = per_core_mem(); + pcm.set_mem_affinity(local_shmem_affinity()).expect("Can't change affinity"); + + // Want at least one replica... + let num_replicas = + NonZeroUsize::new(core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes())).unwrap(); + let mut processes = ArrayVec::new(); + + for _pid in 0..MAX_PROCESSES { + debug_assert_eq!( + *crate::environment::NODE_ID, + 0, + "Expect initialization to happen on node 0." + ); + + let process: Arc>> = Arc::try_new( + NodeReplicated::new(num_replicas, |afc: AffinityChange| { + let pcm = kcb::per_core_mem(); + match afc { + AffinityChange::Replica(r) => { + pcm.set_mem_affinity(mid_to_shmem_affinity(r)).expect("Can't change affinity"); + } + AffinityChange::Revert(_orig) => { + pcm.set_mem_affinity(local_shmem_affinity()).expect("Can't set affinity") + } + } + return 0; // TODO(dynrep): Return error code + }) + .expect("Not enough memory to initialize system"), + ) + .expect("Not enough memory to initialize system"); + + processes.push(process) + } + + + // Reset mem allocator to use per core memory again + let pcm = per_core_mem(); + pcm.set_mem_affinity(0 as atopology::NodeId).expect("Can't change affinity"); + + processes + // NodeReplicated::new(#data-kernels) -> // - for data_kernel in 0..#data-kernels { @@ -143,31 +176,10 @@ lazy_static! { - The closure when set on controller probably won't work in data-kernel (diff symbol addresses?) - The binary might be fine because it's identical! */ - let process_logs = { - let mut process_logs = Box::try_new(ArrayVec::new()).expect("Can't initialize process log vector."); - for _pid in 0..MAX_PROCESSES { - let log = Arc::try_new( - Log::< as Dispatch>::WriteOperation>::new(LARGE_PAGE_SIZE), - ) - .expect("Can't initialize process logs, out of memory."); - process_logs.push(log); - } - process_logs - }; - - if crate::CMDLINE - .get() - .map_or(false, |c| c.mode == crate::cmdline::Mode::Controller) - { - // Reset mem allocator to use per core memory again - let pcm = per_core_mem(); - pcm.set_mem_affinity(0 as atopology::NodeId).expect("Can't change affinity"); - } - - process_logs }; } +#[cfg(not(feature = "rackscale"))] lazy_static! { pub(crate) static ref PROCESS_TABLE: ArrayVec>>, MAX_PROCESSES> = create_process_table(); @@ -175,10 +187,6 @@ lazy_static! { #[cfg(not(feature = "rackscale"))] fn create_process_table() -> ArrayVec>>, MAX_PROCESSES> { - use crate::arch::kcb; - use core::num::NonZeroUsize; - use nr2::nr::AffinityChange; - // Want at least one replica... let num_replicas = NonZeroUsize::new(core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes())).unwrap(); @@ -214,6 +222,7 @@ fn create_process_table() -> ArrayVec processes } +/* #[cfg(feature = "rackscale")] fn create_process_table( ) -> ArrayVec>>, MAX_PROCESSES>, MAX_NUMA_NODES> @@ -283,7 +292,7 @@ fn create_process_table( numa_cache } - + */ pub(crate) struct ArchProcessManagement; impl crate::nrproc::ProcessManager for ArchProcessManagement { diff --git a/kernel/src/arch/x86_64/rackscale/get_shmem_structure.rs b/kernel/src/arch/x86_64/rackscale/get_shmem_structure.rs index f2d8e4a77..bf3b8f49b 100644 --- a/kernel/src/arch/x86_64/rackscale/get_shmem_structure.rs +++ b/kernel/src/arch/x86_64/rackscale/get_shmem_structure.rs @@ -9,19 +9,19 @@ use core2::io::Write; use atopology::NodeId; use crossbeam_queue::ArrayQueue; -use nr2::nr::{Dispatch, Log}; +use nr2::nr::{Dispatch, Log, NodeReplicated}; use rpc::rpc::*; use super::client_state::CLIENT_STATE; use super::kernelrpc::*; use crate::arch::kcb::per_core_mem; -use crate::arch::process::{Ring3Process, PROCESS_LOGS}; +use crate::arch::process::{Ring3Process, PROCESS_TABLE}; use crate::arch::tlb::{Shootdown, RACKSCALE_CLIENT_WORKQUEUES}; use crate::error::{KError, KResult}; use crate::memory::shmem_affinity::local_shmem_affinity; use crate::memory::vspace::TlbFlushHandle; use crate::memory::{kernel_vaddr_to_paddr, paddr_to_kernel_vaddr, PAddr, VAddr}; -use crate::nr::{Op, NR_LOG}; +use crate::nr::{Op, KERNEL_NODE_INSTANCE}; use crate::nrproc::NrProcess; use crate::process::MAX_PROCESSES; @@ -127,20 +127,17 @@ pub(crate) fn handle_get_shmem_structure( ShmemStructure::NrProcLogs => { let mut logs = [0u64; MAX_PROCESSES]; - for i in 0..PROCESS_LOGS.len() { + for i in 0..PROCESS_TABLE.len() { // Create a clone in shared memory, and get the raw representation of it // The clone increments the strong counter, and the into_raw consumes this clone of the arc. - let client_clone = Arc::into_raw(Arc::clone(&PROCESS_LOGS[i])); + let client_clone = Arc::into_raw(Arc::clone(&PROCESS_TABLE[i])); // Send the raw pointer to the client clone address. To do this, we'll convert the kernel address // to a physical address, and then change it to a shmem offset by subtracting the shmem base. // TODO(rackscale): try to simplify this, and below? let arc_log_paddr = kernel_vaddr_to_paddr(VAddr::from_u64( (*&client_clone - as *const Log< - 'static, - as Dispatch>::WriteOperation, - >) as u64, + as *const NodeReplicated>) as u64, )); logs[i] = arc_log_paddr.as_u64(); } @@ -150,7 +147,7 @@ pub(crate) fn handle_get_shmem_structure( hdr.msg_len = core::mem::size_of::<[u64; MAX_PROCESSES]>() as MsgLen; } ShmemStructure::NrLog => { - let log_clone = Arc::into_raw(Arc::clone(&NR_LOG)); + let log_clone = Arc::into_raw(Arc::clone(&KERNEL_NODE_INSTANCE)); let log_paddr = kernel_vaddr_to_paddr(VAddr::from_u64((*&log_clone as *const Log) as u64)) .as_u64(); diff --git a/kernel/src/memory/shmemalloc.rs b/kernel/src/memory/shmemalloc.rs index e1f39d842..2b2c13b5d 100644 --- a/kernel/src/memory/shmemalloc.rs +++ b/kernel/src/memory/shmemalloc.rs @@ -20,6 +20,7 @@ pub(crate) struct ShmemAlloc { } impl ShmemAlloc { + #[allow(dead_code)] pub(crate) fn new(affinity: NodeId) -> ShmemAlloc { assert!( is_shmem_affinity(affinity) diff --git a/kernel/src/nr.rs b/kernel/src/nr.rs index 09de748c5..538ab55e0 100644 --- a/kernel/src/nr.rs +++ b/kernel/src/nr.rs @@ -3,12 +3,14 @@ use crate::prelude::*; use core::fmt::Debug; +use core::num::NonZeroUsize; use alloc::sync::Arc; use hashbrown::HashMap; use log::{error, trace}; -use nr2::nr::{Dispatch, NodeReplicated, ThreadToken}; +use nr2::nr::{Dispatch, NodeReplicated, ThreadToken, AffinityChange}; use spin::Once; +use crate::arch::kcb; #[cfg(feature = "rackscale")] use lazy_static::lazy_static; @@ -25,39 +27,43 @@ pub(crate) static NR_REPLICA: Once<(Arc>, ThreadToken // clones to client so they can create replicas of their own. #[cfg(feature = "rackscale")] lazy_static! { - pub(crate) static ref NR_LOG: Arc> = { + pub(crate) static ref KERNEL_NODE_INSTANCE: Arc> = { + use crate::memory::shmem_affinity::mid_to_shmem_affinity; + use crate::memory::shmem_affinity::local_shmem_affinity; + if crate::CMDLINE .get() .map_or(false, |c| c.mode == crate::cmdline::Mode::Controller) { - use nr2::nr::Log; - use crate::arch::kcb::per_core_mem; - use crate::memory::{LARGE_PAGE_SIZE, shmem_affinity::local_shmem_affinity}; - - let pcm = per_core_mem(); - pcm.set_mem_affinity(local_shmem_affinity()) - .expect("Can't change affinity"); - - let log = Arc::try_new(Log::::new(LARGE_PAGE_SIZE)).expect("Not enough memory to initialize system"); - - // Reset mem allocator to use per core memory again - let pcm = per_core_mem(); - pcm.set_mem_affinity(0 as atopology::NodeId) - .expect("Can't change affinity"); - - log + // Want at least one replica... + let num_replicas = + NonZeroUsize::new(core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes())).unwrap(); + Arc::try_new( + NodeReplicated::new(num_replicas, |afc: AffinityChange| { + let pcm = kcb::per_core_mem(); + match afc { + AffinityChange::Replica(r) => { + pcm.set_mem_affinity(mid_to_shmem_affinity(r)).expect("Can't change affinity"); + } + AffinityChange::Revert(_orig) => { + pcm.set_mem_affinity(local_shmem_affinity()).expect("Can't set affinity") + } + } + return 0; // TODO(dynrep): Return error code + }) + .expect("Not enough memory to initialize system"), + ) + .expect("Not enough memory to initialize system") } else { - use nr2::nr::Log; use crate::memory::{paddr_to_kernel_vaddr, PAddr}; - use crate::arch::rackscale::get_shmem_structure::{rpc_get_shmem_structure, ShmemStructure}; // Get location of the nr log from the controller, who will created them in shared memory - let mut log_ptrs = [0u64; 1]; - rpc_get_shmem_structure(ShmemStructure::NrLog, &mut log_ptrs).expect("Failed to get nr log from controller"); - let log_ptr = paddr_to_kernel_vaddr(PAddr::from(log_ptrs[0])); - let local_log_arc = unsafe { Arc::from_raw(log_ptr.as_u64() as *const Log<'static, Op>) }; - local_log_arc + let mut node_replicated_ptrs = [0u64; 1]; + rpc_get_shmem_structure(ShmemStructure::NrLog, &mut node_replicated_ptrs).expect("Failed to get nr log from controller"); + let nr_ptr = paddr_to_kernel_vaddr(PAddr::from(node_replicated_ptrs[0])); + let nr_instance = unsafe { Arc::from_raw(nr_ptr.as_u64() as *const NodeReplicated) }; + nr_instance } }; } From 135246953d5adf5e9d298e4390b29ce26ead15eb Mon Sep 17 00:00:00 2001 From: Gerd Zellweger Date: Sun, 15 Oct 2023 22:33:48 -0700 Subject: [PATCH 07/32] Add syscalls to add/remove replica. Signed-off-by: Gerd Zellweger --- kernel/src/arch/unix/process.rs | 1 + kernel/src/arch/unix/syscalls.rs | 4 ++ kernel/src/arch/x86_64/mod.rs | 1 + kernel/src/arch/x86_64/process.rs | 34 +++++++------- .../arch/x86_64/rackscale/get_shmem_frames.rs | 3 +- .../x86_64/rackscale/get_shmem_structure.rs | 18 ++++---- kernel/src/arch/x86_64/rackscale/syscalls.rs | 4 ++ kernel/src/arch/x86_64/syscall.rs | 12 +++++ kernel/src/arch/x86_64/vspace/mod.rs | 9 ++-- kernel/src/arch/x86_64/vspace/page_table.rs | 2 +- kernel/src/nr.rs | 44 +++++++++++++------ kernel/src/nrproc.rs | 18 ++++++++ kernel/src/syscalls.rs | 4 ++ lib/kpi/src/lib.rs | 3 ++ lib/kpi/src/syscalls/process.rs | 18 ++++++++ usr/init/Cargo.toml | 1 + usr/init/src/init.rs | 9 ++++ 17 files changed, 139 insertions(+), 46 deletions(-) diff --git a/kernel/src/arch/unix/process.rs b/kernel/src/arch/unix/process.rs index 2117d844f..694e89195 100644 --- a/kernel/src/arch/unix/process.rs +++ b/kernel/src/arch/unix/process.rs @@ -18,6 +18,7 @@ use kpi::process::FrameId; use lazy_static::lazy_static; use nr2::nr::{AffinityChange, Dispatch, NodeReplicated, ThreadToken}; +//use nr2::nr::rwlock::RwLock; use crate::arch::kcb::get_kcb; use crate::error::{KError, KResult}; diff --git a/kernel/src/arch/unix/syscalls.rs b/kernel/src/arch/unix/syscalls.rs index b7a684300..0fee973f1 100644 --- a/kernel/src/arch/unix/syscalls.rs +++ b/kernel/src/arch/unix/syscalls.rs @@ -27,6 +27,10 @@ impl SystemDispatch for UnixSystemCalls { } impl ProcessDispatch for UnixSystemCalls { + fn set_replicas(&self, add: u64, rid: u64) -> KResult<(u64, u64)> { + todo!() + } + fn log(&self, _buffer_arg: UserSlice) -> KResult<(u64, u64)> { todo!() } diff --git a/kernel/src/arch/x86_64/mod.rs b/kernel/src/arch/x86_64/mod.rs index 2839519a5..b4fdfcfcd 100644 --- a/kernel/src/arch/x86_64/mod.rs +++ b/kernel/src/arch/x86_64/mod.rs @@ -542,6 +542,7 @@ fn _start(argc: isize, _argv: *const *const u8) -> isize { let kernel_node = crate::nr::KERNEL_NODE_INSTANCE.clone(); let local_ridx = kernel_node.register(0).unwrap(); + log::info!("Kernel node replica idx is {:?}", local_ridx); crate::nr::NR_REPLICA.call_once(|| (kernel_node.clone(), local_ridx)); kernel_node }; diff --git a/kernel/src/arch/x86_64/process.rs b/kernel/src/arch/x86_64/process.rs index b9dbd712f..aeea07e07 100644 --- a/kernel/src/arch/x86_64/process.rs +++ b/kernel/src/arch/x86_64/process.rs @@ -20,7 +20,7 @@ use kpi::arch::SaveArea; use kpi::process::{FrameId, ELF_OFFSET, EXECUTOR_OFFSET}; use lazy_static::lazy_static; use log::{debug, info, trace, warn}; -use crate::arch::kcb::{self, per_core_mem}; +use crate::arch::kcb; use core::num::NonZeroUsize; use nr2::nr::{NodeReplicated, AffinityChange}; use x86::bits64::paging::*; @@ -74,28 +74,28 @@ pub(crate) fn current_pid() -> KResult { lazy_static! { pub(crate) static ref PROCESS_TABLE: ArrayVec>>, MAX_PROCESSES> = { use crate::memory::shmem_affinity::mid_to_shmem_affinity; + use crate::arch::kcb::per_core_mem; if !crate::CMDLINE .get() .map_or(false, |c| c.mode == crate::cmdline::Mode::Controller) { - // Get location of the logs from the controller, who will have created them in shared memory - - /*use crate::arch::rackscale::get_shmem_structure::{rpc_get_shmem_structure, ShmemStructure}; - - let mut log_ptrs = [0u64; MAX_PROCESSES]; - rpc_get_shmem_structure(ShmemStructure::NrProcLogs, &mut log_ptrs[..]).expect("Failed to get process log pointers"); - let mut process_logs = Box::new(ArrayVec::new()); - for i in 0..log_ptrs.len() { - let log_ptr = paddr_to_kernel_vaddr(PAddr::from(log_ptrs[i])); - let local_log_arc = unsafe { - Arc::from_raw(log_ptr.as_u64() - as *const Log<'static, as Dispatch>::WriteOperation>) + // Get the NodeReplicated instances from the controller, + // who will have created them in shared memory + use crate::arch::rackscale::get_shmem_structure::{rpc_get_shmem_structure, ShmemStructure}; + + let mut nr_ptrs = [0u64; MAX_PROCESSES]; + rpc_get_shmem_structure(ShmemStructure::NrProcess, &mut nr_ptrs[..]).expect("Failed to get process log pointers"); + let mut processes = ArrayVec::new(); + for i in 0..nr_ptrs.len() { + let nrproc_ptr = paddr_to_kernel_vaddr(PAddr::from(nr_ptrs[i])); + let nr_process = unsafe { + Arc::from_raw(nrproc_ptr.as_u64() + as *const NodeReplicated>) }; - process_logs.push(local_log_arc); + processes.push(nr_process); } - return process_logs;*/ - unimplemented!("Need to get NodeReplicated from controller") + return processes; } // We want to allocate the logs in controller shared memory @@ -1671,14 +1671,12 @@ impl FrameManagement for Ring3Process { #[cfg(target_os = "none")] pub(crate) fn spawn(binary: &'static str) -> Result { use crate::process::make_process; - let pid = make_process::(binary)?; // Let the controller pick the initial core for the process #[cfg(feature = "rackscale")] { use crate::arch::rackscale::processops::request_core::rpc_request_core; - let (_gtid, _) = rpc_request_core(pid, true, INVALID_EXECUTOR_START.as_u64()) .expect("Failed to get core for newly spawned process"); } diff --git a/kernel/src/arch/x86_64/rackscale/get_shmem_frames.rs b/kernel/src/arch/x86_64/rackscale/get_shmem_frames.rs index 205d9d803..2bbd1807b 100644 --- a/kernel/src/arch/x86_64/rackscale/get_shmem_frames.rs +++ b/kernel/src/arch/x86_64/rackscale/get_shmem_frames.rs @@ -147,9 +147,10 @@ pub(crate) fn handle_get_shmem_frames( // TODO(error_handling): should handle errors gracefully here, maybe percolate to client? let mut manager = &mut SHMEM_MEMSLICE_ALLOCATORS[mid - 1].lock(); - let frame = manager + let mut frame = manager .allocate_large_page() .expect("DCM OK'd allocation, this should succeed"); + unsafe { frame.zero() }; assert!(frame.affinity == mid_to_shmem_affinity(mid)); regions.push(ShmemRegion { base: frame.base.as_u64(), diff --git a/kernel/src/arch/x86_64/rackscale/get_shmem_structure.rs b/kernel/src/arch/x86_64/rackscale/get_shmem_structure.rs index bf3b8f49b..268e99971 100644 --- a/kernel/src/arch/x86_64/rackscale/get_shmem_structure.rs +++ b/kernel/src/arch/x86_64/rackscale/get_shmem_structure.rs @@ -21,7 +21,7 @@ use crate::error::{KError, KResult}; use crate::memory::shmem_affinity::local_shmem_affinity; use crate::memory::vspace::TlbFlushHandle; use crate::memory::{kernel_vaddr_to_paddr, paddr_to_kernel_vaddr, PAddr, VAddr}; -use crate::nr::{Op, KERNEL_NODE_INSTANCE}; +use crate::nr::{KERNEL_NODE_INSTANCE, KernelNode}; use crate::nrproc::NrProcess; use crate::process::MAX_PROCESSES; @@ -29,9 +29,7 @@ use crate::process::MAX_PROCESSES; #[derive(Debug, Eq, PartialEq, PartialOrd, Clone, Copy)] #[repr(u8)] pub enum ShmemStructure { - // TODO(dynrep): remove NrProcLogs/NrLog add NodeReplicated and - // NodeReplicated instead that gets sent from controller - NrProcLogs = 0, + NrProcess = 0, NrLog = 1, WorkQueues = 2, } @@ -50,7 +48,7 @@ pub(crate) fn rpc_get_shmem_structure( // Construct result buffer and call RPC log::debug!("Calling GetShmemStructure({:?})", shmem_structure); let res_size = match shmem_structure { - ShmemStructure::NrProcLogs => core::mem::size_of::<[u64; MAX_PROCESSES]>(), + ShmemStructure::NrProcess => core::mem::size_of::<[u64; MAX_PROCESSES]>(), _ => core::mem::size_of::<[u64; 1]>(), }; @@ -63,7 +61,7 @@ pub(crate) fn rpc_get_shmem_structure( unsafe { encode(&req, &mut (&mut req_data).as_mut()) } .expect("Failed to encode shmem structure request"); - // Make buffer max size of MAX_PROCESS (for NrProcLogs), 1 (for NrLog) + // Make buffer max size of MAX_PROCESS (for NrProcess), 1 (for NrLog) let mut res_data = [0u8; core::mem::size_of::<[u64; MAX_PROCESSES]>()]; CLIENT_STATE .rpc_client @@ -76,7 +74,7 @@ pub(crate) fn rpc_get_shmem_structure( .unwrap(); let decode_result = match shmem_structure { - ShmemStructure::NrProcLogs => { + ShmemStructure::NrProcess => { unsafe { decode::<[u64; MAX_PROCESSES]>(&mut res_data[..res_size]) } .map(|(ret, remaining)| (&ret[..], remaining.len())) } @@ -124,7 +122,7 @@ pub(crate) fn handle_get_shmem_structure( }; match shmem_structure { - ShmemStructure::NrProcLogs => { + ShmemStructure::NrProcess => { let mut logs = [0u64; MAX_PROCESSES]; for i in 0..PROCESS_TABLE.len() { @@ -148,9 +146,11 @@ pub(crate) fn handle_get_shmem_structure( } ShmemStructure::NrLog => { let log_clone = Arc::into_raw(Arc::clone(&KERNEL_NODE_INSTANCE)); + let log_paddr = - kernel_vaddr_to_paddr(VAddr::from_u64((*&log_clone as *const Log) as u64)) + kernel_vaddr_to_paddr(VAddr::from_u64((*&log_clone as *const NodeReplicated) as u64)) .as_u64(); + log::info!("nr_node addr {:?} &KERNEL_NODE_INSTANCE = {:p}", log_paddr, &KERNEL_NODE_INSTANCE); // Modify header and write into output buffer unsafe { encode(&[log_paddr], &mut payload) }.unwrap(); diff --git a/kernel/src/arch/x86_64/rackscale/syscalls.rs b/kernel/src/arch/x86_64/rackscale/syscalls.rs index 9f6beec1b..7384bf327 100644 --- a/kernel/src/arch/x86_64/rackscale/syscalls.rs +++ b/kernel/src/arch/x86_64/rackscale/syscalls.rs @@ -283,6 +283,10 @@ impl FsDispatch for Arch86LwkSystemCall { } impl ProcessDispatch for Arch86LwkSystemCall { + fn set_replicas(&self, add: u64, rid: u64) -> KResult<(u64, u64)> { + self.local.set_replicas(add, rid) + } + fn log(&self, uslice: UserSlice) -> KResult<(u64, u64)> { let msg: String = uslice.try_into()?; rpc_log(msg).map_err(|e| e.into()) diff --git a/kernel/src/arch/x86_64/syscall.rs b/kernel/src/arch/x86_64/syscall.rs index 76bb76555..a8c225ce9 100644 --- a/kernel/src/arch/x86_64/syscall.rs +++ b/kernel/src/arch/x86_64/syscall.rs @@ -292,6 +292,18 @@ impl ProcessDispatch for T { Ok((0, 0)) } + fn set_replicas(&self, add: u64, rid: u64) -> crate::error::KResult<(u64, u64)> { + let pid = current_pid()?; + if add > 0 { + NrProcess::::add_replica(pid, rid as usize).expect("add_replica"); + } + else { + NrProcess::::remove_replica(pid, rid as usize).expect("remove_replica"); + } + + Ok((0,0)) + } + fn exit(&self, code: u64) -> Result<(u64, u64), KError> { debug!("Process got exit, we are done for now..."); // TODO: For now just a dummy version that exits Qemu diff --git a/kernel/src/arch/x86_64/vspace/mod.rs b/kernel/src/arch/x86_64/vspace/mod.rs index 42ea4effb..325508e4f 100644 --- a/kernel/src/arch/x86_64/vspace/mod.rs +++ b/kernel/src/arch/x86_64/vspace/mod.rs @@ -121,7 +121,6 @@ impl AddressSpace for VSpace { // virtual addr should be aligned to page-size return Err(KError::InvalidBase); } - let tomap_range = base.as_usize()..base.as_usize() + frame.size; // Check all mapping in that region to see if we can allow this map: @@ -149,9 +148,8 @@ impl AddressSpace for VSpace { }); } } - - self.mappings - .try_insert(base, MappingInfo::new(frame, action))?; + //self.mappings + // .try_insert(base, MappingInfo::new(frame, action))?; let r = self.page_table.map_frame(base, frame, action); r } @@ -200,6 +198,9 @@ impl Drop for VSpace { impl VSpace { pub(crate) fn new() -> Result { + let mut btree = BTreeMap::new(); + btree.try_insert(VAddr(0x0), MappingInfo::new(Frame::empty(), MapAction::none())).expect("fail"); + Ok(VSpace { mappings: BTreeMap::new(), page_table: PageTable::new()?, diff --git a/kernel/src/arch/x86_64/vspace/page_table.rs b/kernel/src/arch/x86_64/vspace/page_table.rs index 5ed05cc36..9dd2fd79f 100644 --- a/kernel/src/arch/x86_64/vspace/page_table.rs +++ b/kernel/src/arch/x86_64/vspace/page_table.rs @@ -819,7 +819,7 @@ impl PageTable { fn alloc_frame(&self) -> Frame { let frame_ptr = unsafe { - let ptr = alloc::alloc::alloc(PT_LAYOUT); + let ptr = alloc::alloc::alloc_zeroed(PT_LAYOUT); debug_assert!(!ptr.is_null()); let nptr = NonNull::new_unchecked(ptr); diff --git a/kernel/src/nr.rs b/kernel/src/nr.rs index 538ab55e0..3876e62fb 100644 --- a/kernel/src/nr.rs +++ b/kernel/src/nr.rs @@ -1,16 +1,14 @@ // Copyright © 2021 VMware, Inc. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 OR MIT - +#![allow(dead_code, warnings)] use crate::prelude::*; use core::fmt::Debug; -use core::num::NonZeroUsize; use alloc::sync::Arc; use hashbrown::HashMap; use log::{error, trace}; -use nr2::nr::{Dispatch, NodeReplicated, ThreadToken, AffinityChange}; +use nr2::nr::{Dispatch, NodeReplicated, ThreadToken}; use spin::Once; -use crate::arch::kcb; #[cfg(feature = "rackscale")] use lazy_static::lazy_static; @@ -28,8 +26,11 @@ pub(crate) static NR_REPLICA: Once<(Arc>, ThreadToken #[cfg(feature = "rackscale")] lazy_static! { pub(crate) static ref KERNEL_NODE_INSTANCE: Arc> = { + use core::num::NonZeroUsize; + use nr2::nr::AffinityChange; use crate::memory::shmem_affinity::mid_to_shmem_affinity; use crate::memory::shmem_affinity::local_shmem_affinity; + use crate::arch::kcb; if crate::CMDLINE .get() @@ -37,10 +38,19 @@ lazy_static! { { // Want at least one replica... let num_replicas = - NonZeroUsize::new(core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes())).unwrap(); - Arc::try_new( + NonZeroUsize::new(core::cmp::max(1, *crate::environment::NUM_MACHINES-1)).unwrap(); + log::info!("NodeReplicated creating with {} replicas", num_replicas); + + let cur_affinity = { + let pcm = kcb::per_core_mem(); + let cur_affinity = pcm.physical_memory.borrow().affinity; + let ret = pcm.set_mem_affinity(local_shmem_affinity()).expect("Can't set affinity"); + cur_affinity + }; + + let nr = Arc::try_new( NodeReplicated::new(num_replicas, |afc: AffinityChange| { - let pcm = kcb::per_core_mem(); + /*let pcm = kcb::per_core_mem(); match afc { AffinityChange::Replica(r) => { pcm.set_mem_affinity(mid_to_shmem_affinity(r)).expect("Can't change affinity"); @@ -48,12 +58,19 @@ lazy_static! { AffinityChange::Revert(_orig) => { pcm.set_mem_affinity(local_shmem_affinity()).expect("Can't set affinity") } - } + }*/ return 0; // TODO(dynrep): Return error code }) .expect("Not enough memory to initialize system"), ) - .expect("Not enough memory to initialize system") + .expect("Not enough memory to initialize system"); + + { + let pcm = kcb::per_core_mem(); + pcm.set_mem_affinity(cur_affinity).expect("Can't set affinity"); + } + + nr } else { use crate::memory::{paddr_to_kernel_vaddr, PAddr}; use crate::arch::rackscale::get_shmem_structure::{rpc_get_shmem_structure, ShmemStructure}; @@ -113,10 +130,11 @@ pub(crate) struct KernelNode { impl Default for KernelNode { fn default() -> KernelNode { - KernelNode { - process_map: HashMap::new(), // with_capacity(MAX_PROCESSES), - scheduler_map: HashMap::new(), // with_capacity(MAX_CORES), or, for rackscale, with_capacity(MAX_CORES * MAX_MACHINES) - } + let k = KernelNode { + process_map: HashMap::with_capacity(MAX_PROCESSES), // with_capacity(MAX_PROCESSES), + scheduler_map: HashMap::with_capacity(24), // with_capacity(MAX_CORES), or, for rackscale, with_capacity(MAX_CORES * MAX_MACHINES) + }; + k } } diff --git a/kernel/src/nrproc.rs b/kernel/src/nrproc.rs index d573dc2cf..240cb5573 100644 --- a/kernel/src/nrproc.rs +++ b/kernel/src/nrproc.rs @@ -141,6 +141,24 @@ impl NrProcess

{ } impl NrProcess

{ + pub(crate) fn add_replica(pid: Pid, rid: usize) -> Result<(), KError>{ + debug_assert!(pid < MAX_PROCESSES, "Invalid PID"); + let max_nodes = *crate::environment::NUM_MACHINES; + debug_assert!(rid < max_nodes, "Invalid Node ID"); + log::info!("add_replica {pid} {rid}"); + PROCESS_TABLE[pid].add_replica(rid).expect("add_replica failed"); + Ok(()) + } + + pub(crate) fn remove_replica(pid: Pid, rid: usize) -> Result<(), KError>{ + debug_assert!(pid < MAX_PROCESSES, "Invalid PID"); + let max_nodes = *crate::environment::NUM_MACHINES; + debug_assert!(rid < max_nodes, "Invalid Node ID"); + log::info!("remove_replica {pid} {rid}"); + PROCESS_TABLE[pid].remove_replica(rid).expect("remove_replica failed"); + Ok(()) + } + pub(crate) fn load( pid: Pid, module_name: String, diff --git a/kernel/src/syscalls.rs b/kernel/src/syscalls.rs index 6b65fdfc0..f736a160a 100644 --- a/kernel/src/syscalls.rs +++ b/kernel/src/syscalls.rs @@ -113,6 +113,7 @@ pub(crate) trait ProcessDispatch + LowerHex + Debug + Copy + Clone> fn release_core(&self, cord_id: W) -> KResult<(W, W)>; fn allocate_physical(&self, page_size: W, affinity: W) -> KResult<(W, W)>; fn release_physical(&self, page_id: W) -> KResult<(W, W)>; + fn set_replicas(&self, add: W, replicas: W) -> KResult<(W, W)>; fn exit(&self, code: W) -> KResult<(W, W)>; } @@ -127,6 +128,7 @@ enum ProcessOperationArgs { ReleaseCore(W), AllocatePhysical(W, W), ReleasePhysical(W), + SetReplicas(W, W) } impl + LowerHex + Debug + Copy + Clone> ProcessOperationArgs { @@ -149,6 +151,7 @@ impl + LowerHex + Debug + Copy + Clone> ProcessOperationArgs { ProcessOperation::ReleaseCore => Ok(Self::ReleaseCore(arg2)), ProcessOperation::AllocatePhysical => Ok(Self::AllocatePhysical(arg2, arg3)), ProcessOperation::ReleasePhysical => Ok(Self::ReleasePhysical(arg2)), + ProcessOperation::SetReplicas => Ok(Self::SetReplicas(arg2, arg3)), ProcessOperation::SubscribeEvent => { error!("SubscribeEvent is not implemented"); Err(KError::InvalidProcessOperation { a: arg1.into() }) @@ -284,6 +287,7 @@ pub(crate) trait SystemCallDispatch + LowerHex + Debug + Copy + Clo self.allocate_physical(page_size, affinity) } Poa::ReleasePhysical(frame_id) => self.release_physical(frame_id), + Poa::SetReplicas(add, replicas) => self.set_replicas(add, replicas), } } diff --git a/lib/kpi/src/lib.rs b/lib/kpi/src/lib.rs index a7f99343e..6ebd2000d 100644 --- a/lib/kpi/src/lib.rs +++ b/lib/kpi/src/lib.rs @@ -104,6 +104,8 @@ pub enum ProcessOperation { AllocatePhysical = 9, /// Release a physical memory page from the process. ReleasePhysical = 10, + /// Set Replicas + SetReplicas = 11 } impl ProcessOperation { @@ -120,6 +122,7 @@ impl ProcessOperation { 8 => Some(Self::ReleaseCore), 9 => Some(Self::AllocatePhysical), 10 => Some(Self::ReleasePhysical), + 11 => Some(Self::SetReplicas), _ => None, } } diff --git a/lib/kpi/src/syscalls/process.rs b/lib/kpi/src/syscalls/process.rs index 762f72297..4110a6500 100644 --- a/lib/kpi/src/syscalls/process.rs +++ b/lib/kpi/src/syscalls/process.rs @@ -14,6 +14,24 @@ use x86::bits64::paging::VAddr; pub struct Process; impl Process { + pub fn set_replicas(add: bool, replica_idx: usize) -> Result<(), SystemCallError>{ + let r = unsafe { + syscall!( + SystemCall::Process as u64, + ProcessOperation::SetReplicas as u64, + add as u64, + replica_idx as u64, + 1 + ) + }; + + if r == 0 { + Ok(()) + } else { + Err(SystemCallError::from(r)) + } + } + /// Request to run on `core_id` starting at `entry_point`. pub fn request_core(core_id: usize, entry_point: VAddr) -> Result { let (r, gtid, _eid) = unsafe { diff --git a/usr/init/Cargo.toml b/usr/init/Cargo.toml index 39572a20b..d97673f89 100644 --- a/usr/init/Cargo.toml +++ b/usr/init/Cargo.toml @@ -52,6 +52,7 @@ test-phys-alloc = [] test-core-alloc = [] test-rackscale-shootdown = [] test-concurrent-shootdown = [] +test-dynamic-replication = [] # Simple micro-benchmarks bench-vmops = [] diff --git a/usr/init/src/init.rs b/usr/init/src/init.rs index bf70e1454..2b3a2669e 100644 --- a/usr/init/src/init.rs +++ b/usr/init/src/init.rs @@ -987,8 +987,15 @@ pub fn upcall_test() { info!("upcall_test OK"); } +pub fn dynamic_replication_test() { + vibrio::syscalls::Process::set_replicas(false, 1).expect("Can't read vcpu control area."); + vibrio::syscalls::Process::set_replicas(true, 1).expect("Can't read vcpu control area."); + info!("dynamic_replication OK"); +} + #[no_mangle] pub extern "C" fn _start() -> ! { + sys_println!("_start()"); unsafe { log::set_logger(&vibrio::writer::LOGGER) .map(|()| log::set_max_level(Level::Debug.to_level_filter())) @@ -1062,6 +1069,8 @@ pub extern "C" fn _start() -> ! { #[cfg(feature = "test-core-alloc")] core_alloc_test(); + + dynamic_replication_test(); #[cfg(feature = "test-scheduler")] scheduler_test(); From adf90842b1e725c09b5808c0be55010633710fe3 Mon Sep 17 00:00:00 2001 From: Gerd Zellweger Date: Wed, 18 Oct 2023 14:36:49 -0700 Subject: [PATCH 08/32] Wrap NR instance in rwlock. Signed-off-by: Gerd Zellweger --- kernel/src/arch/unix/process.rs | 9 ++--- kernel/src/arch/x86_64/process.rs | 25 ++++++++------ kernel/src/environment.rs | 3 ++ kernel/src/nrproc.rs | 44 ++++++++++++------------ kernel/testutils/src/rackscale_runner.rs | 6 ++-- lib/node-replication2 | 2 +- 6 files changed, 48 insertions(+), 41 deletions(-) diff --git a/kernel/src/arch/unix/process.rs b/kernel/src/arch/unix/process.rs index 694e89195..206e5b00c 100644 --- a/kernel/src/arch/unix/process.rs +++ b/kernel/src/arch/unix/process.rs @@ -6,6 +6,7 @@ use alloc::boxed::Box; use alloc::sync::Arc; use alloc::vec::Vec; use bootloader_shared::Module; +use nr2::nr::rwlock::RwLock; use core::alloc::Allocator; use core::cell::RefCell; use core::num::NonZeroUsize; @@ -66,7 +67,7 @@ pub(crate) fn swap_current_executor(_current_executor: Box) -> Optio } lazy_static! { - pub(crate) static ref PROCESS_TABLE: ArrayVec>>, MAX_PROCESSES> = { + pub(crate) static ref PROCESS_TABLE: ArrayVec>>>, MAX_PROCESSES> = { debug_assert_eq!(*crate::environment::NODE_ID, 0, "Expect initialization to happen on node 0."); // Want at least one replica... let num_replicas = NonZeroUsize::new(core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes())).expect("At least one numa node"); @@ -74,9 +75,9 @@ lazy_static! { let mut processes = ArrayVec::new(); for pid in 0..MAX_PROCESSES { processes.push( - Arc::try_new(NodeReplicated::>::new(num_replicas, |afc: AffinityChange| { + Arc::try_new(RwLock::new(NodeReplicated::>::new(num_replicas, |afc: AffinityChange| { return 0; // TODO(dynrep): Return error code - }).expect("Not enough memory to initialize system")).expect("Not enough memory to initialize system")); + }).expect("Not enough memory to initialize system"))).expect("Not enough memory to initialize system")); } processes }; @@ -89,7 +90,7 @@ impl crate::nrproc::ProcessManager for ArchProcessManagement { fn process_table( &self, - ) -> &'static ArrayVec>>, MAX_PROCESSES> { + ) -> &'static ArrayVec>>>, MAX_PROCESSES> { &super::process::PROCESS_TABLE } } diff --git a/kernel/src/arch/x86_64/process.rs b/kernel/src/arch/x86_64/process.rs index aeea07e07..1e2fc94d2 100644 --- a/kernel/src/arch/x86_64/process.rs +++ b/kernel/src/arch/x86_64/process.rs @@ -23,6 +23,7 @@ use log::{debug, info, trace, warn}; use crate::arch::kcb; use core::num::NonZeroUsize; use nr2::nr::{NodeReplicated, AffinityChange}; +use nr2::nr::rwlock::RwLock; use x86::bits64::paging::*; use x86::bits64::rflags; use x86::{controlregs, Ring}; @@ -72,7 +73,7 @@ pub(crate) fn current_pid() -> KResult { #[cfg(feature = "rackscale")] lazy_static! { - pub(crate) static ref PROCESS_TABLE: ArrayVec>>, MAX_PROCESSES> = { + pub(crate) static ref PROCESS_TABLE: ArrayVec>>>, MAX_PROCESSES> = { use crate::memory::shmem_affinity::mid_to_shmem_affinity; use crate::arch::kcb::per_core_mem; @@ -91,7 +92,7 @@ lazy_static! { let nrproc_ptr = paddr_to_kernel_vaddr(PAddr::from(nr_ptrs[i])); let nr_process = unsafe { Arc::from_raw(nrproc_ptr.as_u64() - as *const NodeReplicated>) + as *const RwLock>>) }; processes.push(nr_process); } @@ -115,8 +116,8 @@ lazy_static! { "Expect initialization to happen on node 0." ); - let process: Arc>> = Arc::try_new( - NodeReplicated::new(num_replicas, |afc: AffinityChange| { + let process: Arc>>> = Arc::try_new( + RwLock::new(NodeReplicated::new(num_replicas, |afc: AffinityChange| { let pcm = kcb::per_core_mem(); match afc { AffinityChange::Replica(r) => { @@ -129,7 +130,7 @@ lazy_static! { return 0; // TODO(dynrep): Return error code }) .expect("Not enough memory to initialize system"), - ) + )) .expect("Not enough memory to initialize system"); processes.push(process) @@ -181,12 +182,12 @@ lazy_static! { #[cfg(not(feature = "rackscale"))] lazy_static! { - pub(crate) static ref PROCESS_TABLE: ArrayVec>>, MAX_PROCESSES> = + pub(crate) static ref PROCESS_TABLE: ArrayVec>>>, MAX_PROCESSES> = create_process_table(); } #[cfg(not(feature = "rackscale"))] -fn create_process_table() -> ArrayVec>>, MAX_PROCESSES> { +fn create_process_table() -> ArrayVec>>>, MAX_PROCESSES> { // Want at least one replica... let num_replicas = NonZeroUsize::new(core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes())).unwrap(); @@ -199,7 +200,7 @@ fn create_process_table() -> ArrayVec "Expect initialization to happen on node 0." ); - let process: Arc>> = Arc::try_new( + let process: Arc>>> = Arc::try_new(RwLock::new( NodeReplicated::new(num_replicas, |afc: AffinityChange| { let pcm = kcb::per_core_mem(); match afc { @@ -212,7 +213,7 @@ fn create_process_table() -> ArrayVec } return 0; // TODO(dynrep): Return error code }) - .expect("Not enough memory to initialize system"), + .expect("Not enough memory to initialize system")), ) .expect("Not enough memory to initialize system"); @@ -293,14 +294,16 @@ fn create_process_table( numa_cache } */ -pub(crate) struct ArchProcessManagement; + + pub(crate) struct ArchProcessManagement; + impl crate::nrproc::ProcessManager for ArchProcessManagement { type Process = Ring3Process; fn process_table( &self, - ) -> &'static ArrayVec>>, MAX_PROCESSES> { + ) -> &'static ArrayVec>>>, MAX_PROCESSES> { &*super::process::PROCESS_TABLE } } diff --git a/kernel/src/environment.rs b/kernel/src/environment.rs index 449be8ed4..c9ae95be0 100644 --- a/kernel/src/environment.rs +++ b/kernel/src/environment.rs @@ -13,6 +13,9 @@ use crate::arch::{MAX_CORES, MAX_MACHINES, MAX_NUMA_NODES}; pub(crate) static CORE_ID: Lazy = Lazy::new(|| new_gtid(atopology::MACHINE_TOPOLOGY.current_thread().id, *MACHINE_ID)); +#[thread_local] +pub(crate) static MT_ID: Lazy = Lazy::new(|| kpi::system::mtid_from_gtid(*crate::environment::CORE_ID)); + /// The NUMA node id of the current core (hardware thread). #[thread_local] pub(crate) static NODE_ID: Lazy = Lazy::new(|| { diff --git a/kernel/src/nrproc.rs b/kernel/src/nrproc.rs index 240cb5573..0a6260d5d 100644 --- a/kernel/src/nrproc.rs +++ b/kernel/src/nrproc.rs @@ -11,7 +11,7 @@ use arrayvec::ArrayVec; use fallible_collections::vec::FallibleVec; use kpi::process::{FrameId, ProcessInfo}; use kpi::MemType; -use nr2::nr::{Dispatch, NodeReplicated, ThreadToken}; +use nr2::nr::{Dispatch, NodeReplicated, ThreadToken, rwlock::RwLock}; use spin::Once; use crate::arch::process::PROCESS_TABLE; @@ -40,7 +40,7 @@ pub(crate) fn register_thread_with_process_replicas() { for pid in 0..MAX_PROCESSES { debug_assert!(PROCESS_TABLE.len() > pid, "Invalid PID"); - let token = PROCESS_TABLE[pid].register(node); + let token = PROCESS_TABLE[pid].write(*crate::environment::MT_ID).register(node); tokens.push(token.expect("Need to be able to register")); } @@ -119,7 +119,7 @@ pub(crate) trait ProcessManager { #[allow(clippy::type_complexity)] // fix this once `associated_type_defaults` works fn process_table( &self, - ) -> &'static ArrayVec>>, MAX_PROCESSES>; + ) -> &ArrayVec>>>, MAX_PROCESSES>; } /// A node-replicated process. @@ -146,7 +146,7 @@ impl NrProcess

{ let max_nodes = *crate::environment::NUM_MACHINES; debug_assert!(rid < max_nodes, "Invalid Node ID"); log::info!("add_replica {pid} {rid}"); - PROCESS_TABLE[pid].add_replica(rid).expect("add_replica failed"); + PROCESS_TABLE[pid].write(*crate::environment::MT_ID).add_replica(rid).expect("add_replica failed"); Ok(()) } @@ -155,7 +155,7 @@ impl NrProcess

{ let max_nodes = *crate::environment::NUM_MACHINES; debug_assert!(rid < max_nodes, "Invalid Node ID"); log::info!("remove_replica {pid} {rid}"); - PROCESS_TABLE[pid].remove_replica(rid).expect("remove_replica failed"); + PROCESS_TABLE[pid].write(*crate::environment::MT_ID).remove_replica(rid).expect("remove_replica failed"); Ok(()) } @@ -165,7 +165,7 @@ impl NrProcess

{ writeable_sections: Vec, ) -> Result<(), KError> { debug_assert!(pid < MAX_PROCESSES, "Invalid PID"); - let response = PROCESS_TABLE[pid].execute_mut( + let response = PROCESS_TABLE[pid].read(*crate::environment::MT_ID).execute_mut( ProcessOpMut::Load(pid, module_name, writeable_sections), PROCESS_TOKEN.get().unwrap()[pid], ); @@ -179,7 +179,7 @@ impl NrProcess

{ pub(crate) fn resolve(pid: Pid, base: VAddr) -> Result<(u64, u64), KError> { debug_assert!(pid < MAX_PROCESSES, "Invalid PID"); debug_assert!(base.as_u64() < kpi::KERNEL_BASE, "Invalid base"); - let response = PROCESS_TABLE[pid].execute( + let response = PROCESS_TABLE[pid].read(*crate::environment::MT_ID).execute( ProcessOp::MemResolve(base), PROCESS_TOKEN.get().unwrap()[pid], ); @@ -192,7 +192,7 @@ impl NrProcess

{ pub(crate) fn synchronize(pid: Pid) { debug_assert!(pid < MAX_PROCESSES, "Invalid PID"); - PROCESS_TABLE[pid].sync(PROCESS_TOKEN.get().unwrap()[pid]); + PROCESS_TABLE[pid].read(*crate::environment::MT_ID).sync(PROCESS_TOKEN.get().unwrap()[pid]); } pub(crate) fn map_device_frame( @@ -201,7 +201,7 @@ impl NrProcess

{ action: MapAction, ) -> Result<(u64, u64), KError> { debug_assert!(pid < MAX_PROCESSES, "Invalid PID"); - let response = PROCESS_TABLE[pid].execute_mut( + let response = PROCESS_TABLE[pid].read(*crate::environment::MT_ID).execute_mut( ProcessOpMut::MemMapDevice(frame, action), PROCESS_TOKEN.get().unwrap()[pid], ); @@ -214,7 +214,7 @@ impl NrProcess

{ pub(crate) fn unmap(pid: Pid, base: VAddr) -> Result, KError> { debug_assert!(pid < MAX_PROCESSES, "Invalid PID"); - let response = PROCESS_TABLE[pid].execute_mut( + let response = PROCESS_TABLE[pid].read(*crate::environment::MT_ID).execute_mut( ProcessOpMut::MemUnmap(base), PROCESS_TOKEN.get().unwrap()[pid], ); @@ -233,7 +233,7 @@ impl NrProcess

{ ) -> Result<(PAddr, usize), KError> { debug_assert!(pid < MAX_PROCESSES, "Invalid PID"); //action.multiple_mappings(true); - let response = PROCESS_TABLE[pid].execute_mut( + let response = PROCESS_TABLE[pid].read(*crate::environment::MT_ID).execute_mut( ProcessOpMut::MemMapFrameId(base, frame_id, action), PROCESS_TOKEN.get().unwrap()[pid], ); @@ -253,7 +253,7 @@ impl NrProcess

{ debug_assert!(pid < MAX_PROCESSES, "Invalid PID"); let mut virtual_offset = 0; for frame in frames { - let response = PROCESS_TABLE[pid].execute_mut( + let response = PROCESS_TABLE[pid].read(*crate::environment::MT_ID).execute_mut( ProcessOpMut::MemMapFrame(base + virtual_offset, frame, action), PROCESS_TOKEN.get().unwrap()[pid], ); @@ -277,7 +277,7 @@ impl NrProcess

{ pub(crate) fn pinfo(pid: Pid) -> Result { debug_assert!(pid < MAX_PROCESSES, "Invalid PID"); let response = - PROCESS_TABLE[pid].execute(ProcessOp::ProcessInfo, PROCESS_TOKEN.get().unwrap()[pid]); + PROCESS_TABLE[pid].read(*crate::environment::MT_ID).execute(ProcessOp::ProcessInfo, PROCESS_TOKEN.get().unwrap()[pid]); match response { Ok(ProcessResult::ProcessInfo(pinfo)) => Ok(pinfo), Err(e) => Err(e), @@ -293,7 +293,7 @@ impl NrProcess

{ let gtid = *crate::environment::CORE_ID; let node = *crate::environment::NODE_ID; - let response = pm.process_table()[pid].execute_mut( + let response = pm.process_table()[pid].read(*crate::environment::MT_ID).execute_mut( ProcessOpMut::AssignExecutor(gtid, node), PROCESS_TOKEN.get().unwrap()[pid], ); @@ -324,7 +324,7 @@ impl NrProcess

{ pub(crate) fn allocate_frame_to_process(pid: Pid, frame: Frame) -> Result { debug_assert!(pid < MAX_PROCESSES, "Invalid PID"); - let response = PROCESS_TABLE[pid].execute_mut( + let response = PROCESS_TABLE[pid].read(*crate::environment::MT_ID).execute_mut( ProcessOpMut::AllocateFrameToProcess(frame), PROCESS_TOKEN.get().unwrap()[pid], ); @@ -338,7 +338,7 @@ impl NrProcess

{ pub(crate) fn release_frame_from_process(pid: Pid, fid: FrameId) -> Result { debug_assert!(pid < MAX_PROCESSES, "Invalid PID"); debug_assert!(fid < MAX_FRAMES_PER_PROCESS, "Invalid FID"); - let response = PROCESS_TABLE[pid].execute_mut( + let response = PROCESS_TABLE[pid].read(*crate::environment::MT_ID).execute_mut( ProcessOpMut::ReleaseFrameFromProcess(fid), PROCESS_TOKEN.get().unwrap()[pid], ); @@ -354,7 +354,7 @@ impl NrProcess

{ #[cfg(feature = "rackscale")] let mid = *crate::environment::MACHINE_ID; - let response = PROCESS_TABLE[pid].execute_mut( + let response = PROCESS_TABLE[pid].read(*crate::environment::MT_ID).execute_mut( #[cfg(not(feature = "rackscale"))] ProcessOpMut::DispatcherAllocation(frame), #[cfg(feature = "rackscale")] @@ -370,7 +370,7 @@ impl NrProcess

{ } pub(crate) fn userslice_to_arc_slice(from: UserSlice) -> Result, KError> { - let response = PROCESS_TABLE[from.pid].execute( + let response = PROCESS_TABLE[from.pid].read(*crate::environment::MT_ID).execute( ProcessOp::ReadSlice(from), PROCESS_TOKEN.get().unwrap()[from.pid], ); @@ -382,7 +382,7 @@ impl NrProcess

{ } pub(crate) fn read_string_from_userspace(from: UserSlice) -> Result { - let response = PROCESS_TABLE[from.pid].execute( + let response = PROCESS_TABLE[from.pid].read(*crate::environment::MT_ID).execute( ProcessOp::ReadString(from), PROCESS_TOKEN.get().unwrap()[from.pid], ); @@ -396,7 +396,7 @@ impl NrProcess

{ pub(crate) fn write_to_userspace(to: &mut UserSlice, kbuf: &[u8]) -> Result<(), KError> { let pid = to.pid; - let response = PROCESS_TABLE[pid].execute( + let response = PROCESS_TABLE[pid].read(*crate::environment::MT_ID).execute( ProcessOp::WriteSlice(to, kbuf), PROCESS_TOKEN.get().unwrap()[pid], ); @@ -412,7 +412,7 @@ impl NrProcess

{ on: UserSlice, f: Box KResult<(u64, u64)>>, ) -> Result<(u64, u64), KError> { - let response = PROCESS_TABLE[on.pid].execute( + let response = PROCESS_TABLE[on.pid].read(*crate::environment::MT_ID).execute( ProcessOp::ExecSliceMut(on, f), PROCESS_TOKEN.get().unwrap()[on.pid], ); @@ -427,7 +427,7 @@ impl NrProcess

{ on: &'a UserSlice, f: Box KResult<()>>, ) -> Result<(), KError> { - let response = PROCESS_TABLE[on.pid].execute( + let response = PROCESS_TABLE[on.pid].read(*crate::environment::MT_ID).execute( ProcessOp::ExecSlice(on, f), PROCESS_TOKEN.get().unwrap()[on.pid], ); diff --git a/kernel/testutils/src/rackscale_runner.rs b/kernel/testutils/src/rackscale_runner.rs index cda022e43..892443c48 100644 --- a/kernel/testutils/src/rackscale_runner.rs +++ b/kernel/testutils/src/rackscale_runner.rs @@ -512,8 +512,8 @@ impl RackscaleBench { // Find max cores, max numa, and max cores per node let machine = Machine::determine(); - let max_cores = if is_smoke { 2 } else { machine.max_cores() }; - let max_numa = machine.max_numa_nodes(); + let max_cores = if is_smoke { 8 } else { machine.max_cores() }; + let max_numa = if is_smoke { 4 } else { machine.max_numa_nodes() }; let total_cores_per_node = core::cmp::max(1, max_cores / max_numa); // Do initial network configuration @@ -524,7 +524,7 @@ impl RackscaleBench { setup_network(num_clients + 1); } - let mut total_cores = 1; + let mut total_cores = 4; while total_cores < max_cores { // Round up to get the number of clients let new_num_clients = (total_cores + (total_cores_per_node - 1)) / total_cores_per_node; diff --git a/lib/node-replication2 b/lib/node-replication2 index 23822c47c..23df3df94 160000 --- a/lib/node-replication2 +++ b/lib/node-replication2 @@ -1 +1 @@ -Subproject commit 23822c47c8be92eee9f440c93e6a836061793967 +Subproject commit 23df3df94d60ad64abc078c62fbbb229f7f01dff From 5b7d359a3b96ac6ae235adda1ca9d30216d0bcb9 Mon Sep 17 00:00:00 2001 From: Gerd Zellweger Date: Mon, 6 Nov 2023 00:58:41 -0800 Subject: [PATCH 09/32] Wip. Signed-off-by: Gerd Zellweger --- kernel/src/arch/unix/vspace.rs | 4 + kernel/src/arch/x86_64/irq.rs | 51 +++++++++- kernel/src/arch/x86_64/process.rs | 77 +++++---------- kernel/src/arch/x86_64/syscall.rs | 16 ++- kernel/src/arch/x86_64/tlb.rs | 10 ++ kernel/src/arch/x86_64/vspace/mod.rs | 8 +- kernel/src/arch/x86_64/vspace/page_table.rs | 102 +++++++++++++++++++- kernel/src/environment.rs | 13 ++- kernel/src/fs/fd.rs | 10 ++ kernel/src/memory/vspace.rs | 5 +- kernel/src/nrproc.rs | 90 +++++++++++------ kernel/src/process.rs | 22 ++--- kernel/tests/s04_user_runtime_tests.rs | 24 +++++ kernel/tests/s06_rackscale_tests.rs | 3 + kernel/tests/s10_benchmarks.rs | 2 +- kernel/testutils/src/rackscale_runner.rs | 4 +- usr/init/src/init.rs | 15 ++- 17 files changed, 346 insertions(+), 110 deletions(-) diff --git a/kernel/src/arch/unix/vspace.rs b/kernel/src/arch/unix/vspace.rs index 989539228..4695a3f07 100644 --- a/kernel/src/arch/unix/vspace.rs +++ b/kernel/src/arch/unix/vspace.rs @@ -69,6 +69,10 @@ impl VSpace { } impl AddressSpace for VSpace { + fn root(&self) -> PAddr { + PAddr::zero() + } + fn map_frame(&mut self, base: VAddr, frame: Frame, action: MapAction) -> Result<(), KError> { let ma = MappingInfo::new(frame, action); self.mappings.insert(ma.vrange(base), ma); diff --git a/kernel/src/arch/x86_64/irq.rs b/kernel/src/arch/x86_64/irq.rs index c332dc112..de43bc7da 100644 --- a/kernel/src/arch/x86_64/irq.rs +++ b/kernel/src/arch/x86_64/irq.rs @@ -34,6 +34,7 @@ #![allow(warnings)] // TODO(fix) the unaligned accesses... use alloc::boxed::Box; +use core::borrow::BorrowMut; use core::cell::{Cell, RefCell}; use core::fmt; @@ -47,7 +48,9 @@ use x86::segmentation::{ BuildDescriptor, DescriptorBuilder, GateDescriptorBuilder, SegmentSelector, }; use x86::{dtables, Ring}; +use spin::Lazy; +use crate::arch::process::CURRENT_EXECUTOR; use crate::memory::vspace::MapAction; use crate::memory::Frame; use crate::panic::{backtrace, backtrace_from}; @@ -503,6 +506,9 @@ unsafe fn bkp_handler(a: &ExceptionArguments) { } } +#[thread_local] +pub(crate) static REPLICA_STATE: Lazy = Lazy::new(|| 0); + /// Handler for the timer exception. /// /// We currently use it to periodically make sure that a replica @@ -518,10 +524,39 @@ unsafe fn timer_handler(_a: &ExceptionArguments) { // Periodically advance replica state, then resume immediately nr::KernelNode::synchronize().expect("Synchronized failed?"); - let kcb = get_kcb(); for pid in 0..crate::process::MAX_PROCESSES { nrproc::NrProcess::::synchronize(pid); } + + if *crate::environment::MT_ID == 0 { + use crate::arch::process::current_pid; + let pid = current_pid().expect("dont have a pid?"); + + if rawtime::BOOT_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(10) && *REPLICA_STATE == 0 { + info!("got a timer after 10s, remove rid 1"); + let handles = nrproc::NrProcess::::remove_replica(pid, 1).expect("removed"); + #[cfg(not(feature = "rackscale"))] + super::tlb::shootdown(handles[0].clone()); + unsafe { *REPLICA_STATE.as_mut_ptr() = 1 }; + } + if rawtime::BOOT_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(20) && *REPLICA_STATE == 1 { + info!("got a timer after 20s, add rid 1"); + let handles = nrproc::NrProcess::::add_replica(pid, 1).expect("added"); + #[cfg(not(feature = "rackscale"))] + super::tlb::shootdown(handles[0].clone()); + unsafe { *REPLICA_STATE.as_mut_ptr() = 2 }; + } + if rawtime::BOOT_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(30) && *REPLICA_STATE == 2 { + info!("got a timer after 30s"); + unsafe { *REPLICA_STATE.as_mut_ptr() = 3 }; + } + if rawtime::BOOT_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(40) && *REPLICA_STATE == 3 { + info!("got a timer after 40s"); + unsafe { *REPLICA_STATE.as_mut_ptr() = 4 }; + } + + } + let kcb = get_kcb(); if super::process::has_executor() { // TODO(process-mgmt): Ensures that we still periodically @@ -738,13 +773,20 @@ pub extern "C" fn handle_generic_exception(a: ExceptionArguments) -> ! { gdb_serial_handler(&a); } else if a.vector == TLB_WORK_PENDING.into() { let kcb = get_kcb(); - trace!("got an interrupt {:?}", core_id); + info!("got an interrupt {:?}", core_id); super::tlb::dequeue(core_id); if super::process::has_executor() { // Return immediately TLB_TIME.update(|t| t + x86::time::rdtsc() - start); + + let mut pborrow = super::process::CURRENT_EXECUTOR.borrow_mut(); + let p = pborrow.as_ref().unwrap(); + p.maybe_switch_vspace(); + drop(pborrow); + kcb_iret_handle(kcb).resume() + } else { // Go to scheduler instead crate::scheduler::schedule() @@ -755,6 +797,11 @@ pub extern "C" fn handle_generic_exception(a: ExceptionArguments) -> ! { let kcb = get_kcb(); if super::process::has_executor() { + + //let mut pborrow = super::process::CURRENT_EXECUTOR.borrow_mut(); + //let p = pborrow.as_ref().unwrap(); + //p.maybe_switch_vspace(); + kcb_iret_handle(kcb).resume() } else { loop { diff --git a/kernel/src/arch/x86_64/process.rs b/kernel/src/arch/x86_64/process.rs index 1e2fc94d2..456b7033d 100644 --- a/kernel/src/arch/x86_64/process.rs +++ b/kernel/src/arch/x86_64/process.rs @@ -1015,10 +1015,11 @@ impl Executor for Ring3Executor { } fn maybe_switch_vspace(&self) { + let replica_pml4 = NrProcess::::ptroot(self.pid).expect("Can't read pml4"); unsafe { let current_pml4 = PAddr::from(controlregs::cr3()); - if current_pml4 != self.pml4 { - trace!("Switching to 0x{:x}", self.pml4); + if current_pml4 != replica_pml4 { + info!("Switching from 0x{:x} to 0x{:x}", current_pml4, replica_pml4); controlregs::cr3_write(self.pml4.into()); } } @@ -1076,8 +1077,7 @@ impl Default for NrProcess { impl Clone for Ring3Process { fn clone(&self) -> Self { - unimplemented!("Clone not implemented for Ring3Process") - /*Ring3Process { + Ring3Process { pid: self.pid, current_eid: self.current_eid, vspace: self.vspace.clone(), @@ -1090,7 +1090,7 @@ impl Clone for Ring3Process { pfm: self.pfm.clone(), writeable_sections: self.writeable_sections.clone(), read_only_offset: self.read_only_offset, - }*/ + } } } @@ -1188,7 +1188,8 @@ impl elfloader::ElfLoader for Ring3Process { }; info!( - "ELF Allocate: {:#x} -- {:#x} align to {:#x} with flags {:?} ({:?})", + "{}: ELF Allocate: {:#x} -- {:#x} align to {:#x} with flags {:?} ({:?})", + *crate::environment::MT_ID, page_base, page_base + size_page, align_to, @@ -1498,57 +1499,25 @@ impl Process for Ring3Process { fn allocate_executors( &mut self, memory: Frame, - - #[cfg(feature = "rackscale")] mid: kpi::system::MachineId, ) -> Result { let executor_space_requirement = Ring3Executor::EXECUTOR_SPACE_REQUIREMENT; let executors_to_create = memory.size() / executor_space_requirement; // Only map to kernel space for local (valid) frames - #[cfg(feature = "rackscale")] - if mid == *crate::environment::MACHINE_ID { - KernelAllocator::try_refill_tcache(20, 0, MemType::Mem).expect("Refill didn't work"); - self.vspace - .map_frame( - self.executor_offset, - memory, - MapAction::user() | MapAction::write(), - ) - .expect("Can't map user-space executor memory."); - log::debug!( - "executor space base expanded {:#x} size: {} end {:#x}", - self.executor_offset, - memory.size(), - self.executor_offset + memory.size() - ); - } else { - log::debug!( - "skipping executor space vspace mapping for mid={:?} on mid={:?} {:#x} size: {} end {:#x}", - mid, - *crate::environment::MACHINE_ID, - self.executor_offset, - memory.size(), - self.executor_offset + memory.size() - ); - } - - #[cfg(not(feature = "rackscale"))] - { - KernelAllocator::try_refill_tcache(20, 0, MemType::Mem).expect("Refill didn't work"); - self.vspace - .map_frame( - self.executor_offset, - memory, - MapAction::user() | MapAction::write(), - ) - .expect("Can't map user-space executor memory."); - log::debug!( - "executor space base expanded {:#x} size: {} end {:#x}", + KernelAllocator::try_refill_tcache(20, 0, MemType::Mem).expect("Refill didn't work"); + self.vspace + .map_frame( self.executor_offset, - memory.size(), - self.executor_offset + memory.size() - ); - } + memory, + MapAction::user() | MapAction::write(), + ) + .expect("Can't map user-space executor memory."); + log::debug!( + "executor space base expanded {:#x} size: {} end {:#x}", + self.executor_offset, + memory.size(), + self.executor_offset + memory.size() + ); let executor_space = executor_space_requirement * executors_to_create; let prange = memory.base..memory.base + executor_space; @@ -1581,11 +1550,11 @@ impl Process for Ring3Process { memory.affinity, ))?; - #[cfg(not(feature = "rackscale"))] let index = memory.affinity as usize; - #[cfg(feature = "rackscale")] - let index = self.get_executor_index(memory.affinity, mid); + //TODO: xxx + //#[cfg(feature = "rackscale")] + //let index = self.get_executor_index(memory.affinity, mid); // TODO(error-handling): Needs to properly unwind on alloc errors // (e.g., have something that frees vcpu mem etc. on drop()) diff --git a/kernel/src/arch/x86_64/syscall.rs b/kernel/src/arch/x86_64/syscall.rs index a8c225ce9..f95793cd5 100644 --- a/kernel/src/arch/x86_64/syscall.rs +++ b/kernel/src/arch/x86_64/syscall.rs @@ -294,12 +294,20 @@ impl ProcessDispatch for T { fn set_replicas(&self, add: u64, rid: u64) -> crate::error::KResult<(u64, u64)> { let pid = current_pid()?; - if add > 0 { - NrProcess::::add_replica(pid, rid as usize).expect("add_replica"); + let handles = if add > 0 { + NrProcess::::add_replica(pid, rid as usize).expect("add_replica") } else { - NrProcess::::remove_replica(pid, rid as usize).expect("remove_replica"); - } + NrProcess::::remove_replica(pid, rid as usize).expect("remove_replica") + }; + + #[cfg(feature = "rackscale")] + super::tlb::remote_shootdown(handles); + + // There will only be one handle in non-rackscale build + #[cfg(not(feature = "rackscale"))] + super::tlb::shootdown(handles[0].clone()); + Ok((0,0)) } diff --git a/kernel/src/arch/x86_64/tlb.rs b/kernel/src/arch/x86_64/tlb.rs index 9fb4f3413..657ffe835 100644 --- a/kernel/src/arch/x86_64/tlb.rs +++ b/kernel/src/arch/x86_64/tlb.rs @@ -159,6 +159,11 @@ impl Shootdown { // before this function completes: self.acknowledge(); + if self.vregion.start == 0u64 && self.vregion.end == 0u64 { + log::info!("got special unmap for 0..0, skipping TLB flush"); + return; + } + let it = self.vregion.clone().step_by(BASE_PAGE_SIZE); if it.count() > 20 { trace!("flush the entire TLB"); @@ -397,6 +402,11 @@ pub(crate) fn shootdown(handle: TlbFlushHandle) { let shootdown = Shootdown::new(range); shootdown.process(); + //use crate::process::Executor; + //let pborrow = super::process::CURRENT_EXECUTOR.borrow_mut(); + //let p = pborrow.as_ref().unwrap(); + //p.maybe_switch_vspace(); + // Wait synchronously on cores to complete while !shootdowns.is_empty() { // Make progress on our work while we wait for others diff --git a/kernel/src/arch/x86_64/vspace/mod.rs b/kernel/src/arch/x86_64/vspace/mod.rs index 325508e4f..e28d1cf7c 100644 --- a/kernel/src/arch/x86_64/vspace/mod.rs +++ b/kernel/src/arch/x86_64/vspace/mod.rs @@ -92,7 +92,7 @@ lazy_static! { /// - This clearly needs a better solution. See also the part where we patch /// this into the process page-table. pub(crate) unsafe fn init_large_objects_pml4() { - log::info!("init_large_objects_pml4()"); + //log::info!("init_large_objects_pml4()"); let mut vspace = INITIAL_VSPACE.lock(); let frame_ptr = alloc::alloc::alloc(PT_LAYOUT); @@ -103,12 +103,17 @@ pub(crate) unsafe fn init_large_objects_pml4() { (*vspace.pml4)[132] = PML4Entry::new(frame.base, PML4Flags::P | PML4Flags::RW); } +#[derive(Clone)] pub(crate) struct VSpace { pub mappings: BTreeMap, pub page_table: PageTable, } impl AddressSpace for VSpace { + fn root(&self) -> PAddr { + self.pml4_address() + } + fn map_frame(&mut self, base: VAddr, frame: Frame, action: MapAction) -> Result<(), KError> { if frame.size() == 0 { return Err(KError::InvalidFrame); @@ -148,6 +153,7 @@ impl AddressSpace for VSpace { }); } } + //log::info!("VSpace::map_frame base={:x} frame={:?} action={:?}", base.as_usize(), frame, action); //self.mappings // .try_insert(base, MappingInfo::new(frame, action))?; let r = self.page_table.map_frame(base, frame, action); diff --git a/kernel/src/arch/x86_64/vspace/page_table.rs b/kernel/src/arch/x86_64/vspace/page_table.rs index 9dd2fd79f..1429a6c02 100644 --- a/kernel/src/arch/x86_64/vspace/page_table.rs +++ b/kernel/src/arch/x86_64/vspace/page_table.rs @@ -35,10 +35,100 @@ pub(crate) struct PageTable { pub pml4: Pin>, } +impl Clone for PageTable { + fn clone(&self) -> Self { + fn alloc_frame() -> Frame { + let frame_ptr = unsafe { + let ptr = alloc::alloc::alloc_zeroed(PT_LAYOUT); + debug_assert!(!ptr.is_null()); + + let nptr = NonNull::new_unchecked(ptr); + NonNull::slice_from_raw_parts(nptr, PT_LAYOUT.size()) + }; + let vaddr = VAddr::from(frame_ptr.as_ptr() as *const u8 as u64); + let paddr = crate::arch::memory::kernel_vaddr_to_paddr(vaddr); + let mut frame = Frame::new(paddr, PT_LAYOUT.size(), 0); + unsafe { frame.zero() }; + frame + } + + fn new_pt() -> PDEntry { + let frame = alloc_frame(); + return PDEntry::new(frame.base, PDFlags::P | PDFlags::RW | PDFlags::US); + } + + fn new_pd() -> PDPTEntry { + let frame = alloc_frame(); + return PDPTEntry::new(frame.base, PDPTFlags::P | PDPTFlags::RW | PDPTFlags::US); + } + + fn new_pdpt() -> PML4Entry { + let frame = alloc_frame(); + return PML4Entry::new(frame.base, PML4Flags::P | PML4Flags::RW | PML4Flags::US); + } + + let mut cloned_pt = PageTable::new().expect("Can't clone PT"); + + // Do a DFS and find all mapped entries and replicate them in the new `pt` + for pml4_idx in 0..PAGE_SIZE_ENTRIES { + if pml4_idx < pml4_index(KERNEL_BASE.into()) && self.pml4[pml4_idx].is_present() { + cloned_pt.pml4[pml4_idx] = new_pdpt(); + + for pdpt_idx in 0..PAGE_SIZE_ENTRIES { + let pdpt = self.get_pdpt(self.pml4[pml4_idx]); + let cloned_pdpt = cloned_pt.get_pdpt_mut(cloned_pt.pml4[pml4_idx]); + + if pdpt[pdpt_idx].is_present() { + if !pdpt[pdpt_idx].is_page() { + cloned_pdpt[pdpt_idx] = new_pd(); + let cloned_pdpt_entry = cloned_pdpt[pdpt_idx]; + drop(cloned_pdpt); + + + for pd_idx in 0..PAGE_SIZE_ENTRIES { + let pd = self.get_pd(pdpt[pdpt_idx]); + let cloned_pd = cloned_pt.get_pd_mut(cloned_pdpt_entry); + + if pd[pd_idx].is_present() { + if !pd[pd_idx].is_page() { + cloned_pd[pd_idx] = new_pt(); + let cloned_pd_entry = cloned_pd[pd_idx]; + drop(cloned_pd); + + for pt_idx in 0..PAGE_SIZE_ENTRIES { + let pt = self.get_pt(pd[pd_idx]); + let cloned_pt = cloned_pt.get_pt_mut(cloned_pd_entry); + + if pt[pt_idx].is_present() { + cloned_pt[pt_idx] = pt[pt_idx]; + } + } + } + } else { + // Encountered a 2 MiB mapping + cloned_pd[pd_idx] = pd[pd_idx]; + + } + } + } else { + cloned_pdpt[pdpt_idx] = pdpt[pdpt_idx]; + } + } + } + } + } + + cloned_pt + } +} + impl Drop for PageTable { + #[allow(unreachable_code)] fn drop(&mut self) { - use alloc::alloc::dealloc; + log::info!("calling drop in PageTable, skipping for now"); + return; + use alloc::alloc::dealloc; // Do a DFS and free all page-table memory allocated below kernel-base, // don't free the mapped frames -- we return them later through NR for pml4_idx in 0..PAGE_SIZE_ENTRIES { @@ -85,6 +175,10 @@ impl Drop for PageTable { } impl AddressSpace for PageTable { + fn root(&self) -> PAddr { + PAddr::from(self.pml4.as_ptr() as u64) + } + fn map_frame(&mut self, base: VAddr, frame: Frame, action: MapAction) -> Result<(), KError> { // These assertion are checked with error returns in `VSpace` debug_assert!(frame.size() > 0); @@ -562,7 +656,7 @@ impl PageTable { let cur_rights: MapAction = pt[pt_idx].flags().into(); if address != pbase + mapped || cur_rights != rights { panic!( - "Trying to map 4 KiB page but it conflicts with existing mapping {:x}", + "Trying to map 4 KiB page at vbase={vbase:#x} pbase={pbase:#x} but it conflicts with existing mapping {:x}", address ); } @@ -932,6 +1026,10 @@ impl<'a> ReadOnlyPageTable<'a> { } impl<'a> AddressSpace for ReadOnlyPageTable<'a> { + fn root(&self) -> PAddr { + PAddr::from(self.pml4.as_ptr() as u64) + } + fn resolve(&self, addr: VAddr) -> Result<(PAddr, MapAction), KError> { let pml4_idx = pml4_index(addr); if self.pml4[pml4_idx].is_present() { diff --git a/kernel/src/environment.rs b/kernel/src/environment.rs index c9ae95be0..805f0822f 100644 --- a/kernel/src/environment.rs +++ b/kernel/src/environment.rs @@ -14,7 +14,18 @@ pub(crate) static CORE_ID: Lazy = Lazy::new(|| new_gtid(atopology::MACHINE_TOPOLOGY.current_thread().id, *MACHINE_ID)); #[thread_local] -pub(crate) static MT_ID: Lazy = Lazy::new(|| kpi::system::mtid_from_gtid(*crate::environment::CORE_ID)); +pub(crate) static MT_ID: Lazy = Lazy::new(|| { + #[cfg(feature = "rackscale")] + return (*crate::environment::MACHINE_ID - 1) * (atopology::MACHINE_TOPOLOGY.num_threads()) + atopology::MACHINE_TOPOLOGY.current_thread().id; + #[cfg(not(feature = "rackscale"))] + *CORE_ID +}); + +/// Number of nodes in the current deployment. +#[allow(unused)] +pub(crate) static NUM_NODES: Lazy = + Lazy::new(|| atopology::MACHINE_TOPOLOGY.num_nodes()); + /// The NUMA node id of the current core (hardware thread). #[thread_local] diff --git a/kernel/src/fs/fd.rs b/kernel/src/fs/fd.rs index d89c0016c..283a20433 100644 --- a/kernel/src/fs/fd.rs +++ b/kernel/src/fs/fd.rs @@ -105,6 +105,16 @@ pub(crate) struct FileDescriptorEntry { offset: AtomicUsize, } +impl Clone for FileDescriptorEntry { + fn clone(&self) -> Self { + Self { + mnode: self.mnode, + flags: self.flags, + offset: AtomicUsize::new(self.offset()), + } + } +} + impl FileDescriptorEntry { pub(super) fn update(&mut self, mnode: MnodeNum, flags: FileFlags) { self.mnode = mnode; diff --git a/kernel/src/memory/vspace.rs b/kernel/src/memory/vspace.rs index 23149390d..e27610b3b 100644 --- a/kernel/src/memory/vspace.rs +++ b/kernel/src/memory/vspace.rs @@ -100,7 +100,7 @@ impl Iterator for CoreBitMapIter { } #[cfg_attr(not(target_os = "none"), allow(dead_code))] -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, Copy, Clone)] pub(crate) enum MappingType { _ElfText, _ElfData, @@ -108,6 +108,7 @@ pub(crate) enum MappingType { Heap, } +#[derive(PartialEq, Clone)] pub(crate) struct MappingInfo { pub frame: Frame, pub rights: MapAction, @@ -141,6 +142,8 @@ impl fmt::Debug for MappingInfo { /// Generic address space functionality. pub(crate) trait AddressSpace { + fn root(&self) -> PAddr; + /// Maps a list of `frames` at `base` in the address space /// with the access rights defined by `action`. fn map_frames(&mut self, base: VAddr, frames: &[(Frame, MapAction)]) -> Result<(), KError> { diff --git a/kernel/src/nrproc.rs b/kernel/src/nrproc.rs index 0a6260d5d..67ddf3957 100644 --- a/kernel/src/nrproc.rs +++ b/kernel/src/nrproc.rs @@ -32,7 +32,11 @@ pub(crate) static PROCESS_TOKEN: Once> /// /// Should be called on each core. pub(crate) fn register_thread_with_process_replicas() { + #[cfg(not(feature = "rackscale"))] let node = *crate::environment::NODE_ID; + #[cfg(feature = "rackscale")] + let node = 0; //*crate::environment::MACHINE_ID + debug_assert!(PROCESS_TABLE.len() > node, "Invalid Node ID"); PROCESS_TOKEN.call_once(|| { @@ -40,7 +44,8 @@ pub(crate) fn register_thread_with_process_replicas() { for pid in 0..MAX_PROCESSES { debug_assert!(PROCESS_TABLE.len() > pid, "Invalid PID"); - let token = PROCESS_TABLE[pid].write(*crate::environment::MT_ID).register(node); + let token = PROCESS_TABLE[pid].read(*crate::environment::MT_ID).register(node); + log::debug!("MT_ID is {}: {node} registered {pid} {token:?}", *crate::environment::MT_ID); tokens.push(token.expect("Need to be able to register")); } @@ -69,6 +74,7 @@ pub(crate) enum ProcessOp<'buf> { #[allow(unused)] ExecSliceMut(UserSlice, SliceExecMutFn<'buf>), ExecSlice(&'buf UserSlice, SliceExecFn<'buf>), + GetPtRoot, } /// Mutable operations on the NrProcess. @@ -84,10 +90,6 @@ pub(crate) enum ProcessOpMut { /// Remove a physical frame previosuly allocated to the process (returns a Frame). ReleaseFrameFromProcess(FrameId), - #[cfg(feature = "rackscale")] - DispatcherAllocation(Frame, kpi::system::MachineId), - - #[cfg(not(feature = "rackscale"))] DispatcherAllocation(Frame), MemMapFrame(VAddr, Frame, MapAction), @@ -111,6 +113,7 @@ pub(crate) enum ProcessResult { Frame(Frame), ReadSlice(Arc<[u8]>), ReadString(String), + PtRoot(PAddr), } pub(crate) trait ProcessManager { @@ -141,22 +144,44 @@ impl NrProcess

{ } impl NrProcess

{ - pub(crate) fn add_replica(pid: Pid, rid: usize) -> Result<(), KError>{ + pub(crate) fn add_replica(pid: Pid, rid: usize) -> Result, KError> { debug_assert!(pid < MAX_PROCESSES, "Invalid PID"); + #[cfg(feature = "rackscale")] let max_nodes = *crate::environment::NUM_MACHINES; + #[cfg(not(feature = "rackscale"))] + let max_nodes = *crate::environment::NUM_NODES; + debug_assert!(rid < max_nodes, "Invalid Node ID"); log::info!("add_replica {pid} {rid}"); + // we use unmap of 0x0 to get a snapshot of where the core is running on + let handle = NrProcess::

::unmap(pid, VAddr::from(0x0)); + if !handle.is_ok() { + panic!("couldn't get snapshot"); + } + PROCESS_TABLE[pid].write(*crate::environment::MT_ID).add_replica(rid).expect("add_replica failed"); - Ok(()) + log::info!("added_replica {pid} {rid}"); + + handle } - pub(crate) fn remove_replica(pid: Pid, rid: usize) -> Result<(), KError>{ + pub(crate) fn remove_replica(pid: Pid, rid: usize) -> Result, KError> { debug_assert!(pid < MAX_PROCESSES, "Invalid PID"); + #[cfg(feature = "rackscale")] let max_nodes = *crate::environment::NUM_MACHINES; - debug_assert!(rid < max_nodes, "Invalid Node ID"); - log::info!("remove_replica {pid} {rid}"); + #[cfg(not(feature = "rackscale"))] + let max_nodes = *crate::environment::NUM_NODES; + + debug_assert!(rid < max_nodes, "Invalid Node ID {rid} max_nodes {max_nodes}"); + + // we use unmap of 0x0 to get a snapshot of where the core is running on + let handle = NrProcess::

::unmap(pid, VAddr::from(0x0)); + if !handle.is_ok() { + panic!("couldn't get snapshot"); + } PROCESS_TABLE[pid].write(*crate::environment::MT_ID).remove_replica(rid).expect("remove_replica failed"); - Ok(()) + + handle } pub(crate) fn load( @@ -274,6 +299,18 @@ impl NrProcess

{ Ok((base.as_u64(), virtual_offset as u64)) } + + pub(crate) fn ptroot(pid: Pid) -> Result { + debug_assert!(pid < MAX_PROCESSES, "Invalid PID"); + let response = + PROCESS_TABLE[pid].read(*crate::environment::MT_ID).execute(ProcessOp::GetPtRoot, PROCESS_TOKEN.get().unwrap()[pid]); + match response { + Ok(ProcessResult::PtRoot(paddr)) => Ok(paddr), + Err(e) => Err(e), + _ => unreachable!("Got unexpected response"), + } + } + pub(crate) fn pinfo(pid: Pid) -> Result { debug_assert!(pid < MAX_PROCESSES, "Invalid PID"); let response = @@ -351,14 +388,9 @@ impl NrProcess

{ pub(crate) fn allocate_dispatchers(pid: Pid, frame: Frame) -> Result { debug_assert!(pid < MAX_PROCESSES, "Invalid PID"); - #[cfg(feature = "rackscale")] - let mid = *crate::environment::MACHINE_ID; let response = PROCESS_TABLE[pid].read(*crate::environment::MT_ID).execute_mut( - #[cfg(not(feature = "rackscale"))] ProcessOpMut::DispatcherAllocation(frame), - #[cfg(feature = "rackscale")] - ProcessOpMut::DispatcherAllocation(frame, mid), PROCESS_TOKEN.get().unwrap()[pid], ); @@ -450,6 +482,9 @@ where fn dispatch<'buf>(&self, op: Self::ReadOperation<'_>) -> Self::Response { match op { + ProcessOp::GetPtRoot => { + Ok(ProcessResult::PtRoot(self.process.vspace().root())) + } ProcessOp::ProcessInfo => Ok(ProcessResult::ProcessInfo(*self.process.pinfo())), ProcessOp::MemResolve(base) => { let (paddr, rights) = self.process.vspace().resolve(base)?; @@ -511,18 +546,11 @@ where Ok(ProcessResult::Ok) } - #[cfg(not(feature = "rackscale"))] ProcessOpMut::DispatcherAllocation(frame) => { let how_many = self.process.allocate_executors(frame)?; Ok(ProcessResult::ExecutorsCreated(how_many)) } - #[cfg(feature = "rackscale")] - ProcessOpMut::DispatcherAllocation(frame, mid) => { - let how_many = self.process.allocate_executors(frame, mid)?; - Ok(ProcessResult::ExecutorsCreated(how_many)) - } - ProcessOpMut::MemMapFrame(base, frame, action) => { crate::memory::KernelAllocator::try_refill_tcache(7, 0, MemType::Mem)?; self.process.vspace_mut().map_frame(base, frame, action)?; @@ -545,12 +573,18 @@ where } ProcessOpMut::MemUnmap(vaddr) => { - let shootdown_handle = self.process.vspace_mut().unmap(vaddr)?; - if shootdown_handle.flags.is_aliasable() { - self.process - .remove_frame_mapping(shootdown_handle.paddr, shootdown_handle.vaddr) - .expect("is_aliasable implies this op can't fail"); + let shootdown_handle = if vaddr.as_u64() != 0x0 { + let shootdown_handle = self.process.vspace_mut().unmap(vaddr)?; + if shootdown_handle.flags.is_aliasable() { + self.process + .remove_frame_mapping(shootdown_handle.paddr, shootdown_handle.vaddr) + .expect("is_aliasable implies this op can't fail"); + } + shootdown_handle } + else { + TlbFlushHandle::new(0x0.into(), 0x0.into(), 0x0, MapAction::none()) + }; let num_machines = *crate::environment::NUM_MACHINES; let mut shootdown_handles = Vec::try_with_capacity(num_machines) diff --git a/kernel/src/process.rs b/kernel/src/process.rs index 77ad93dd9..e2a6ebd68 100644 --- a/kernel/src/process.rs +++ b/kernel/src/process.rs @@ -27,7 +27,7 @@ use crate::arch::MAX_CORES; use crate::cmdline::CommandLineArguments; use crate::error::{KError, KResult}; use crate::fs::fd::FileDescriptorEntry; -use crate::memory::backends::PhysicalPageProvider; +//use crate::memory::backends::PhysicalPageProvider; use crate::memory::vspace::AddressSpace; use crate::memory::{Frame, KernelAllocator, PAddr, VAddr, KERNEL_BASE}; use crate::prelude::overlaps; @@ -79,16 +79,8 @@ pub(crate) trait Process: FrameManagement + Clone { affinity: atopology::NodeId, ) -> Result<(), alloc::collections::TryReserveError>; - #[cfg(not(feature = "rackscale"))] fn allocate_executors(&mut self, frame: Frame) -> Result; - #[cfg(feature = "rackscale")] - fn allocate_executors( - &mut self, - frame: Frame, - mid: kpi::system::MachineId, - ) -> Result; - fn vspace_mut(&mut self) -> &mut Self::A; fn vspace(&self) -> &Self::A; @@ -121,6 +113,7 @@ pub(crate) trait FrameManagement { } /// Implementation for managing a process' frames. +#[derive(Clone)] pub(crate) struct ProcessFrames { /// Physical frame objects registered to the process. frames: ArrayVec<(Option, usize), MAX_FRAMES_PER_PROCESS>, @@ -589,9 +582,14 @@ pub(crate) fn allocate_dispatchers(pid: Pid, affinity: NodeId) -> Re KernelAllocator::try_refill_tcache(20, 1, MemType::Mem)?; let mut frame = { let pcm = crate::arch::kcb::per_core_mem(); - pcm.gmanager.unwrap().node_caches[affinity] - .lock() - .allocate_large_page()? + + #[cfg(feature = "rackscale")] + pcm.set_mem_affinity(affinity).expect("Can't change affinity"); + let frame = pcm.mem_manager().allocate_large_page()?; + + #[cfg(feature = "rackscale")] + pcm.set_mem_affinity(crate::memory::shmem_affinity::local_shmem_affinity()).expect("Can't reset affinity"); + frame }; unsafe { diff --git a/kernel/tests/s04_user_runtime_tests.rs b/kernel/tests/s04_user_runtime_tests.rs index e8d749895..333aa87c9 100644 --- a/kernel/tests/s04_user_runtime_tests.rs +++ b/kernel/tests/s04_user_runtime_tests.rs @@ -19,6 +19,30 @@ use testutils::helpers::{ }; use testutils::runner_args::{check_for_successful_exit, wait_for_sigterm, RunnerArgs}; +/// Makes sure we can change the number of replicas for a process. +#[cfg(not(feature = "baremetal"))] +#[test] +fn s04_dynamic_replication() { + let build = BuildArgs::default() + .user_feature("test-dynamic-replication") + .build(); + let cmdline = RunnerArgs::new_with_build("userspace-smp", &build) + .nodes(4) + .cores(64) + .memory(4096) + .timeout(120_000); + + let mut output = String::new(); + let mut qemu_run = || -> Result { + let mut p = spawn_nrk(&cmdline)?; + output += p.exp_string("dynamic_replication OK")?.as_str(); + output = p.exp_eof()?; + p.process.exit() + }; + + check_for_successful_exit(&cmdline, qemu_run(), output); +} + /// Tests the lineup scheduler multi-core ability. /// /// Makes sure we can request cores and spawn threads on said cores. diff --git a/kernel/tests/s06_rackscale_tests.rs b/kernel/tests/s06_rackscale_tests.rs index 0954febdb..544899d83 100644 --- a/kernel/tests/s06_rackscale_tests.rs +++ b/kernel/tests/s06_rackscale_tests.rs @@ -39,6 +39,7 @@ fn rackscale_userspace_smoke_test(transport: RackscaleTransport) { "test-upcall", "test-scheduler", "test-syscalls", + //"test-dynamic-replication" ]) .set_rackscale(true) .release() @@ -58,6 +59,7 @@ fn rackscale_userspace_smoke_test(transport: RackscaleTransport) { *output += proc.exp_string("map_test OK")?.as_str(); *output += proc.exp_string("alloc_test OK")?.as_str(); *output += proc.exp_string("scheduler_test OK")?.as_str(); + //*output += proc.exp_string("dynamic_replication OK")?.as_str(); Ok(()) } @@ -65,6 +67,7 @@ fn rackscale_userspace_smoke_test(transport: RackscaleTransport) { test_run.client_match_fn = client_match_fn; test_run.transport = transport; test_run.wait_for_client = true; + test_run.shmem_size *= 2; test_run.run_rackscale(); } diff --git a/kernel/tests/s10_benchmarks.rs b/kernel/tests/s10_benchmarks.rs index 4bff3512d..c39001adc 100644 --- a/kernel/tests/s10_benchmarks.rs +++ b/kernel/tests/s10_benchmarks.rs @@ -850,7 +850,7 @@ fn s10_xmemcached_benchmark_internal() { // Throw out everything above 28 since we have some non-deterministic // bug on larger machines that leads to threads calling sched_yield and // no readrandom is performed... - .filter(|&t| t <= 28) + .filter(|&t| t <= 16) .collect(); // memcached arguments // currently not there. diff --git a/kernel/testutils/src/rackscale_runner.rs b/kernel/testutils/src/rackscale_runner.rs index 892443c48..3b8af826c 100644 --- a/kernel/testutils/src/rackscale_runner.rs +++ b/kernel/testutils/src/rackscale_runner.rs @@ -413,9 +413,9 @@ impl RackscaleRun { } for client_ret in client_rets { - client_ret.unwrap(); + client_ret.expect("client_ret"); } - controller_ret.unwrap(); + controller_ret.expect("controller_ret"); } pub fn run_baseline(&self) { diff --git a/usr/init/src/init.rs b/usr/init/src/init.rs index 2b3a2669e..17cb706fd 100644 --- a/usr/init/src/init.rs +++ b/usr/init/src/init.rs @@ -988,8 +988,18 @@ pub fn upcall_test() { } pub fn dynamic_replication_test() { - vibrio::syscalls::Process::set_replicas(false, 1).expect("Can't read vcpu control area."); - vibrio::syscalls::Process::set_replicas(true, 1).expect("Can't read vcpu control area."); + info!("dynamic_replication_test START"); + + vibrio::syscalls::Process::set_replicas(false, 0).expect("Can't remove replica 0."); + + //vibrio::syscalls::Process::set_replicas(false, 1).expect("Can't remove replica 1."); + //vibrio::syscalls::Process::set_replicas(false, 2).expect("Can't remove replica 2."); + //vibrio::syscalls::Process::set_replicas(false, 3).expect("Can't remove replica 3."); + //info!("dynamic_replication_test ALL REMOVED"); + //vibrio::syscalls::Process::set_replicas(true, 3).expect("Can't add replica 3."); + //vibrio::syscalls::Process::set_replicas(true, 2).expect("Can't add replica 2."); + //vibrio::syscalls::Process::set_replicas(true, 1).expect("Can't add replica 1."); + info!("dynamic_replication_test ALL ADDED BACK"); info!("dynamic_replication OK"); } @@ -1070,6 +1080,7 @@ pub extern "C" fn _start() -> ! { #[cfg(feature = "test-core-alloc")] core_alloc_test(); + #[cfg(feature = "test-dynamic-replication")] dynamic_replication_test(); #[cfg(feature = "test-scheduler")] From b9f89b21ec80ab465aa201553f0386797df9f6fe Mon Sep 17 00:00:00 2001 From: Gerd Zellweger Date: Mon, 6 Nov 2023 01:00:04 -0800 Subject: [PATCH 10/32] wiwipp Signed-off-by: Gerd Zellweger --- lib/node-replication2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/node-replication2 b/lib/node-replication2 index 23df3df94..b5ffc6a67 160000 --- a/lib/node-replication2 +++ b/lib/node-replication2 @@ -1 +1 @@ -Subproject commit 23df3df94d60ad64abc078c62fbbb229f7f01dff +Subproject commit b5ffc6a676ac036ebaed6f3de1d04f4ab4d36177 From 92c79d1c06aeab5500e83c7c53ecc10773e28a3f Mon Sep 17 00:00:00 2001 From: Gerd Zellweger Date: Thu, 16 Nov 2023 15:08:29 -0800 Subject: [PATCH 11/32] Wip. Signed-off-by: Gerd Zellweger --- kernel/src/arch/x86_64/mod.rs | 9 --------- kernel/src/arch/x86_64/process.rs | 10 ++++++---- .../src/arch/x86_64/rackscale/controller.rs | 3 +++ kernel/src/memory/mod.rs | 5 +++++ kernel/src/nrproc.rs | 20 +++++++++++++++++-- kernel/src/process.rs | 3 ++- kernel/tests/s06_rackscale_tests.rs | 5 +++-- 7 files changed, 37 insertions(+), 18 deletions(-) diff --git a/kernel/src/arch/x86_64/mod.rs b/kernel/src/arch/x86_64/mod.rs index b4fdfcfcd..dac19add9 100644 --- a/kernel/src/arch/x86_64/mod.rs +++ b/kernel/src/arch/x86_64/mod.rs @@ -513,15 +513,6 @@ fn _start(argc: isize, _argv: *const *const u8) -> isize { // Intialize PCI crate::pci::init(); - // Initialize processes - #[cfg(feature = "rackscale")] - if crate::CMDLINE - .get() - .map_or(false, |c| c.mode == crate::cmdline::Mode::Controller) - { - lazy_static::initialize(&process::PROCESS_TABLE); - } - #[cfg(not(feature = "rackscale"))] { lazy_static::initialize(&process::PROCESS_TABLE); diff --git a/kernel/src/arch/x86_64/process.rs b/kernel/src/arch/x86_64/process.rs index 456b7033d..3df58bd15 100644 --- a/kernel/src/arch/x86_64/process.rs +++ b/kernel/src/arch/x86_64/process.rs @@ -76,6 +76,7 @@ lazy_static! { pub(crate) static ref PROCESS_TABLE: ArrayVec>>>, MAX_PROCESSES> = { use crate::memory::shmem_affinity::mid_to_shmem_affinity; use crate::arch::kcb::per_core_mem; + use crate::environment::NUM_MACHINES; if !crate::CMDLINE .get() @@ -106,7 +107,7 @@ lazy_static! { // Want at least one replica... let num_replicas = - NonZeroUsize::new(core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes())).unwrap(); + NonZeroUsize::new(core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes() * (*NUM_MACHINES))).unwrap(); let mut processes = ArrayVec::new(); for _pid in 0..MAX_PROCESSES { @@ -1499,6 +1500,7 @@ impl Process for Ring3Process { fn allocate_executors( &mut self, memory: Frame, + #[cfg(feature = "rackscale")] mid: kpi::system::MachineId, ) -> Result { let executor_space_requirement = Ring3Executor::EXECUTOR_SPACE_REQUIREMENT; let executors_to_create = memory.size() / executor_space_requirement; @@ -1550,11 +1552,11 @@ impl Process for Ring3Process { memory.affinity, ))?; + #[cfg(not(feature = "rackscale"))] let index = memory.affinity as usize; - //TODO: xxx - //#[cfg(feature = "rackscale")] - //let index = self.get_executor_index(memory.affinity, mid); + #[cfg(feature = "rackscale")] + let index = self.get_executor_index(memory.affinity, mid); // TODO(error-handling): Needs to properly unwind on alloc errors // (e.g., have something that frees vcpu mem etc. on drop()) diff --git a/kernel/src/arch/x86_64/rackscale/controller.rs b/kernel/src/arch/x86_64/rackscale/controller.rs index 20def97b5..3989403c8 100644 --- a/kernel/src/arch/x86_64/rackscale/controller.rs +++ b/kernel/src/arch/x86_64/rackscale/controller.rs @@ -81,6 +81,9 @@ pub(crate) fn run() { // TODO(dynrep): here is the point where we have all the memory regions and // could create NodeReplicated instances for each client. + // Initialize processes + use crate::arch::process::PROCESS_TABLE; + lazy_static::initialize(&PROCESS_TABLE); #[cfg(feature = "test-controller-shmem-alloc")] { diff --git a/kernel/src/memory/mod.rs b/kernel/src/memory/mod.rs index ab3939bdf..ce9b67085 100644 --- a/kernel/src/memory/mod.rs +++ b/kernel/src/memory/mod.rs @@ -345,6 +345,10 @@ impl KernelAllocator { use crate::arch::rackscale::get_shmem_frames::rpc_get_shmem_frames; use crate::arch::rackscale::CLIENT_STATE; + if needed_base_pages == 0 && needed_large_pages == 0 { + return Ok(()); + } + // We only request at large page granularity let mut total_needed_large_pages = needed_large_pages; let mut total_needed_base_pages = needed_base_pages; @@ -418,6 +422,7 @@ impl KernelAllocator { } frames } else { + log::info!("try-refill-shmem needed_base_pages={needed_base_pages} needed_large_pages={needed_large_pages} total_needed_base_pages={total_needed_base_pages} total_needed_large_pages={total_needed_large_pages}"); rpc_get_shmem_frames(None, total_needed_large_pages)? }; diff --git a/kernel/src/nrproc.rs b/kernel/src/nrproc.rs index 67ddf3957..8246fc217 100644 --- a/kernel/src/nrproc.rs +++ b/kernel/src/nrproc.rs @@ -90,8 +90,12 @@ pub(crate) enum ProcessOpMut { /// Remove a physical frame previosuly allocated to the process (returns a Frame). ReleaseFrameFromProcess(FrameId), + #[cfg(feature = "rackscale")] + DispatcherAllocation(Frame, kpi::system::MachineId), + + #[cfg(not(feature = "rackscale"))] DispatcherAllocation(Frame), - + MemMapFrame(VAddr, Frame, MapAction), MemMapDevice(Frame, MapAction), MemMapFrameId(VAddr, FrameId, MapAction), @@ -389,11 +393,16 @@ impl NrProcess

{ pub(crate) fn allocate_dispatchers(pid: Pid, frame: Frame) -> Result { debug_assert!(pid < MAX_PROCESSES, "Invalid PID"); + #[cfg(feature = "rackscale")] + let mid = *crate::environment::MACHINE_ID; let response = PROCESS_TABLE[pid].read(*crate::environment::MT_ID).execute_mut( + #[cfg(not(feature = "rackscale"))] ProcessOpMut::DispatcherAllocation(frame), + #[cfg(feature = "rackscale")] + ProcessOpMut::DispatcherAllocation(frame, mid), PROCESS_TOKEN.get().unwrap()[pid], ); - + match response { Ok(ProcessResult::ExecutorsCreated(how_many)) => Ok(how_many), Err(e) => Err(e), @@ -546,11 +555,18 @@ where Ok(ProcessResult::Ok) } + #[cfg(not(feature = "rackscale"))] ProcessOpMut::DispatcherAllocation(frame) => { let how_many = self.process.allocate_executors(frame)?; Ok(ProcessResult::ExecutorsCreated(how_many)) } + #[cfg(feature = "rackscale")] + ProcessOpMut::DispatcherAllocation(frame, mid) => { + let how_many = self.process.allocate_executors(frame, mid)?; + Ok(ProcessResult::ExecutorsCreated(how_many)) + } + ProcessOpMut::MemMapFrame(base, frame, action) => { crate::memory::KernelAllocator::try_refill_tcache(7, 0, MemType::Mem)?; self.process.vspace_mut().map_frame(base, frame, action)?; diff --git a/kernel/src/process.rs b/kernel/src/process.rs index e2a6ebd68..f3c13db20 100644 --- a/kernel/src/process.rs +++ b/kernel/src/process.rs @@ -79,7 +79,7 @@ pub(crate) trait Process: FrameManagement + Clone { affinity: atopology::NodeId, ) -> Result<(), alloc::collections::TryReserveError>; - fn allocate_executors(&mut self, frame: Frame) -> Result; + fn allocate_executors(&mut self, frame: Frame, #[cfg(feature = "rackscale")] mid: kpi::system::MachineId) -> Result; fn vspace_mut(&mut self) -> &mut Self::A; @@ -288,6 +288,7 @@ impl elfloader::ElfLoader for DataSecAllocator { } }; + log::info!("DataSecAllocator::allocate"); let shmem_frames = rpc_get_shmem_frames(Some(self.pid), large_pages) .expect("Failed to get shmem frames for elf loading"); diff --git a/kernel/tests/s06_rackscale_tests.rs b/kernel/tests/s06_rackscale_tests.rs index 544899d83..25e9518af 100644 --- a/kernel/tests/s06_rackscale_tests.rs +++ b/kernel/tests/s06_rackscale_tests.rs @@ -39,7 +39,7 @@ fn rackscale_userspace_smoke_test(transport: RackscaleTransport) { "test-upcall", "test-scheduler", "test-syscalls", - //"test-dynamic-replication" + "test-dynamic-replication" ]) .set_rackscale(true) .release() @@ -59,7 +59,7 @@ fn rackscale_userspace_smoke_test(transport: RackscaleTransport) { *output += proc.exp_string("map_test OK")?.as_str(); *output += proc.exp_string("alloc_test OK")?.as_str(); *output += proc.exp_string("scheduler_test OK")?.as_str(); - //*output += proc.exp_string("dynamic_replication OK")?.as_str(); + *output += proc.exp_string("dynamic_replication OK")?.as_str(); Ok(()) } @@ -68,6 +68,7 @@ fn rackscale_userspace_smoke_test(transport: RackscaleTransport) { test_run.transport = transport; test_run.wait_for_client = true; test_run.shmem_size *= 2; + test_run.num_clients = 3; test_run.run_rackscale(); } From 91811e61af9aba191d4871eb214a6144c5b905f5 Mon Sep 17 00:00:00 2001 From: Gerd Zellweger Date: Sat, 18 Nov 2023 20:11:32 -0800 Subject: [PATCH 12/32] Rework KernelNode init, change affinity correctly. Signed-off-by: Gerd Zellweger --- kernel/src/arch/unix/mod.rs | 27 +-- kernel/src/arch/x86_64/coreboot.rs | 7 - kernel/src/arch/x86_64/mod.rs | 70 +++----- kernel/src/arch/x86_64/process.rs | 20 ++- .../src/arch/x86_64/rackscale/controller.rs | 1 + kernel/src/nr.rs | 84 +++++++--- kernel/src/process.rs | 19 +-- kernel/src/scheduler/mod.rs | 98 ++++++----- kernel/tests/s11_rackscale_benchmarks.rs | 155 ++++++++++++++++++ lib/node-replication2 | 2 +- 10 files changed, 310 insertions(+), 173 deletions(-) diff --git a/kernel/src/arch/unix/mod.rs b/kernel/src/arch/unix/mod.rs index a0d2fedad..7b8c5f147 100644 --- a/kernel/src/arch/unix/mod.rs +++ b/kernel/src/arch/unix/mod.rs @@ -103,33 +103,14 @@ fn init_setup() { unsafe { kcb::PER_CORE_MEMORY.set_global_mem(global_memory_static) }; debug!("Memory allocation should work at this point..."); - let kernel_node = { + { // Create the global operation log and first replica and store it (needs // TLS) - let kernel_node: Arc> = Arc::try_new( - NodeReplicated::new(NonZeroUsize::new(1).unwrap(), |afc: AffinityChange| { - return 0; // xxx - }) - .expect("Not enough memory to initialize system"), - ) - .expect("Not enough memory to initialize system"); - - let local_ridx = kernel_node.register(0).unwrap(); - crate::nr::NR_REPLICA.call_once(|| (kernel_node.clone(), local_ridx)); - kernel_node + lazy_static::initialize(&crate::nr::KERNEL_NODE_INSTANCE); + let local_ridx = crate::nr::KERNEL_NODE_INSTANCE.register(0).unwrap(); + crate::nr::NR_REPLICA_REGISTRATION.call_once(|| local_ridx); }; - /* - - let log: Arc> = Arc::try_new(Log::::new(LARGE_PAGE_SIZE)) - .expect("Not enough memory to initialize system"); - let bsp_replica = Replica::::new(&log); - let local_ridx = bsp_replica - .register() - .expect("Failed to register with Replica."); - crate::nr::NR_REPLICA.call_once(|| (bsp_replica.clone(), local_ridx)); - - */ // Starting to initialize file-system let fs_logs = crate::fs::cnrfs::allocate_logs(); // Construct the first replica diff --git a/kernel/src/arch/x86_64/coreboot.rs b/kernel/src/arch/x86_64/coreboot.rs index e3cdfa9a4..9bd4c0b5a 100644 --- a/kernel/src/arch/x86_64/coreboot.rs +++ b/kernel/src/arch/x86_64/coreboot.rs @@ -18,7 +18,6 @@ use fallible_collections::FallibleVecGlobal; use fallible_collections::TryClone; use log::debug; use log::trace; -use nr2::nr::NodeReplicated; use x86::apic::ApicId; use x86::current::paging::PAddr; @@ -29,7 +28,6 @@ use crate::memory::backends::PhysicalPageProvider; use crate::memory::global::GlobalMemory; use crate::memory::vspace::MapAction; use crate::memory::Frame; -use crate::nr::KernelNode; use crate::round_up; use crate::stack::OwnedStack; use crate::stack::Stack; @@ -57,7 +55,6 @@ pub(crate) struct AppCoreArgs { pub(super) global_pmem: &'static GlobalMemory, pub(super) thread: atopology::ThreadId, pub(super) node: atopology::NodeId, - pub(super) replica: Arc>, pub(super) fs_replica: Option>>, } @@ -332,7 +329,6 @@ pub(crate) unsafe fn initialize( /// - Initialized topology /// - Local APIC driver pub(super) fn boot_app_cores( - kernel_node: Arc>, fs_logs: Vec>>, fs_replica: Option>>, ) { @@ -427,9 +423,6 @@ pub(super) fn boot_app_cores( global_memory, global_pmem, thread: thread.id, - replica: kernel_node - .try_clone() - .expect("Not enough memory to initialize system"), fs_replica: thread_fs_replica, }) .expect("Not enough memory to initialize system"); diff --git a/kernel/src/arch/x86_64/mod.rs b/kernel/src/arch/x86_64/mod.rs index dac19add9..e7c9f82da 100644 --- a/kernel/src/arch/x86_64/mod.rs +++ b/kernel/src/arch/x86_64/mod.rs @@ -188,20 +188,23 @@ pub(crate) fn start_app_core(args: Arc, initialized: &AtomicBool) { serial::init(); { - let local_ridx = args.replica.register(args.node).unwrap(); - crate::nr::NR_REPLICA.call_once(|| (args.replica.clone(), local_ridx)); - #[cfg(feature = "rackscale")] if crate::CMDLINE .get() .map_or(false, |c| c.mode == crate::cmdline::Mode::Client) { + let local_ridx = crate::nr::KERNEL_NODE_INSTANCE.register(args.node).unwrap(); + crate::nr::NR_REPLICA_REGISTRATION.call_once(|| local_ridx); crate::nrproc::register_thread_with_process_replicas(); crate::arch::rackscale::client_state::create_client_rpc_shmem_buffers(); } #[cfg(not(feature = "rackscale"))] - crate::nrproc::register_thread_with_process_replicas(); + { + let local_ridx = crate::nr::KERNEL_NODE_INSTANCE.register(args.node).unwrap(); + crate::nr::NR_REPLICA_REGISTRATION.call_once(|| local_ridx); + crate::nrproc::register_thread_with_process_replicas(); + } // For rackscale, only the controller needs cnrfs if let Some(core_fs_replica) = &args.fs_replica { @@ -210,9 +213,8 @@ pub(crate) fn start_app_core(args: Arc, initialized: &AtomicBool) { // Don't modify this line without adjusting `coreboot` integration test: info!( - "Core #{} initialized (replica idx {:?}) in {:?}.", + "Core #{} initialized in {:?}.", args.thread, - local_ridx, start.elapsed() ); } @@ -445,38 +447,11 @@ fn _start(argc: isize, _argv: *const *const u8) -> isize { irq::ioapic_initialize(); #[cfg(not(feature = "rackscale"))] - let kernel_node = { - use core::num::NonZeroUsize; - use crate::nr::KernelNode; - use nr2::nr::{AffinityChange, NodeReplicated}; - - // Let's go with one replica per NUMA node for now: - let numa_nodes = core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes()); - let numa_nodes = NonZeroUsize::new(numa_nodes).expect("At least one NUMA node"); - - // Create the global operation log and first replica and store it (needs - // TLS) - let kernel_node: Arc> = Arc::try_new( - NodeReplicated::new(numa_nodes, |afc: AffinityChange| { - let pcm = kcb::per_core_mem(); - match afc { - AffinityChange::Replica(r) => { - pcm.set_mem_affinity(r).expect("Can't set affinity") - } - AffinityChange::Revert(orig) => { - pcm.set_mem_affinity(orig).expect("Can't set affinity") - } - } - return 0; // xxx - }) - .expect("Not enough memory to initialize system"), - ) - .expect("Not enough memory to initialize system"); - - let local_ridx = kernel_node.register(0).unwrap(); - crate::nr::NR_REPLICA.call_once(|| (kernel_node.clone(), local_ridx)); - kernel_node - }; + { + lazy_static::initialize(&crate::nr::KERNEL_NODE_INSTANCE); + let local_ridx = crate::nr::KERNEL_NODE_INSTANCE.register(0).unwrap(); + crate::nr::NR_REPLICA_REGISTRATION.call_once(|| local_ridx); + } // Starting to initialize file-system #[cfg(not(feature = "rackscale"))] @@ -520,22 +495,21 @@ fn _start(argc: isize, _argv: *const *const u8) -> isize { } #[cfg(feature = "rackscale")] - let kernel_node = { + { if crate::CMDLINE .get() .map_or(false, |c| c.mode == crate::cmdline::Mode::Client) { lazy_static::initialize(&process::PROCESS_TABLE); crate::nrproc::register_thread_with_process_replicas(); - } - lazy_static::initialize(&crate::nr::KERNEL_NODE_INSTANCE); - let kernel_node = crate::nr::KERNEL_NODE_INSTANCE.clone(); - - let local_ridx = kernel_node.register(0).unwrap(); - log::info!("Kernel node replica idx is {:?}", local_ridx); - crate::nr::NR_REPLICA.call_once(|| (kernel_node.clone(), local_ridx)); - kernel_node + lazy_static::initialize(&crate::nr::KERNEL_NODE_INSTANCE); + let kernel_node = crate::nr::KERNEL_NODE_INSTANCE.clone(); + + let local_ridx = kernel_node.register(0).unwrap(); + log::info!("Kernel node replica idx is {:?}", local_ridx); + crate::nr::NR_REPLICA_REGISTRATION.call_once(|| local_ridx); + } }; #[cfg(feature = "gdb")] @@ -548,7 +522,7 @@ fn _start(argc: isize, _argv: *const *const u8) -> isize { } // Bring up the rest of the system (needs topology, APIC, and global memory) - coreboot::boot_app_cores(kernel_node, fs_logs, fs_replica); + coreboot::boot_app_cores(fs_logs, fs_replica); // Done with initialization, now we go in // the arch-independent part: diff --git a/kernel/src/arch/x86_64/process.rs b/kernel/src/arch/x86_64/process.rs index 3df58bd15..54b0c6095 100644 --- a/kernel/src/arch/x86_64/process.rs +++ b/kernel/src/arch/x86_64/process.rs @@ -120,15 +120,18 @@ lazy_static! { let process: Arc>>> = Arc::try_new( RwLock::new(NodeReplicated::new(num_replicas, |afc: AffinityChange| { let pcm = kcb::per_core_mem(); + //log::info!("Got AffinityChange: {:?}", afc); match afc { AffinityChange::Replica(r) => { - pcm.set_mem_affinity(mid_to_shmem_affinity(r)).expect("Can't change affinity"); + let affinity = { pcm.physical_memory.borrow().affinity }; + pcm.set_mem_affinity(mid_to_shmem_affinity(r)).expect("Can't set affinity"); + return affinity; } - AffinityChange::Revert(_orig) => { - pcm.set_mem_affinity(local_shmem_affinity()).expect("Can't set affinity") + AffinityChange::Revert(orig) => { + pcm.set_mem_affinity(orig).expect("Can't set affinity"); + return 0; } } - return 0; // TODO(dynrep): Return error code }) .expect("Not enough memory to initialize system"), )) @@ -204,15 +207,18 @@ fn create_process_table() -> ArrayVec>>> = Arc::try_new(RwLock::new( NodeReplicated::new(num_replicas, |afc: AffinityChange| { let pcm = kcb::per_core_mem(); + //log::info!("Got AffinityChange: {:?}", afc); match afc { AffinityChange::Replica(r) => { - pcm.set_mem_affinity(r).expect("Can't set affinity") + let affinity = { pcm.physical_memory.borrow().affinity }; + pcm.set_mem_affinity(crate::memory::shmem_affinity::mid_to_shmem_affinity(r)).expect("Can't set affinity"); + return affinity; } AffinityChange::Revert(orig) => { - pcm.set_mem_affinity(orig).expect("Can't set affinity") + pcm.set_mem_affinity(orig).expect("Can't set affinity"); + return 0; } } - return 0; // TODO(dynrep): Return error code }) .expect("Not enough memory to initialize system")), ) diff --git a/kernel/src/arch/x86_64/rackscale/controller.rs b/kernel/src/arch/x86_64/rackscale/controller.rs index 3989403c8..24c313e57 100644 --- a/kernel/src/arch/x86_64/rackscale/controller.rs +++ b/kernel/src/arch/x86_64/rackscale/controller.rs @@ -84,6 +84,7 @@ pub(crate) fn run() { // Initialize processes use crate::arch::process::PROCESS_TABLE; lazy_static::initialize(&PROCESS_TABLE); + lazy_static::initialize(&crate::nr::KERNEL_NODE_INSTANCE); #[cfg(feature = "test-controller-shmem-alloc")] { diff --git a/kernel/src/nr.rs b/kernel/src/nr.rs index 3876e62fb..ad7cf168a 100644 --- a/kernel/src/nr.rs +++ b/kernel/src/nr.rs @@ -9,8 +9,8 @@ use hashbrown::HashMap; use log::{error, trace}; use nr2::nr::{Dispatch, NodeReplicated, ThreadToken}; use spin::Once; +use crate::arch::kcb; -#[cfg(feature = "rackscale")] use lazy_static::lazy_static; use crate::error::KError; @@ -19,7 +19,7 @@ use crate::process::{Pid, MAX_PROCESSES}; /// Kernel scheduler / process mgmt. replica #[thread_local] -pub(crate) static NR_REPLICA: Once<(Arc>, ThreadToken)> = Once::new(); +pub(crate) static NR_REPLICA_REGISTRATION: Once = Once::new(); // Base nr log. The rackscale controller needs to save a reference to this, so it can give // clones to client so they can create replicas of their own. @@ -30,7 +30,6 @@ lazy_static! { use nr2::nr::AffinityChange; use crate::memory::shmem_affinity::mid_to_shmem_affinity; use crate::memory::shmem_affinity::local_shmem_affinity; - use crate::arch::kcb; if crate::CMDLINE .get() @@ -50,16 +49,19 @@ lazy_static! { let nr = Arc::try_new( NodeReplicated::new(num_replicas, |afc: AffinityChange| { - /*let pcm = kcb::per_core_mem(); + let pcm = kcb::per_core_mem(); + //log::info!("Got AffinityChange: {:?}", afc); match afc { AffinityChange::Replica(r) => { - pcm.set_mem_affinity(mid_to_shmem_affinity(r)).expect("Can't change affinity"); + let affinity = { pcm.physical_memory.borrow().affinity }; + pcm.set_mem_affinity(mid_to_shmem_affinity(r)).expect("Can't set affinity"); + return affinity; } - AffinityChange::Revert(_orig) => { - pcm.set_mem_affinity(local_shmem_affinity()).expect("Can't set affinity") + AffinityChange::Revert(orig) => { + pcm.set_mem_affinity(orig).expect("Can't set affinity"); + return 0; } - }*/ - return 0; // TODO(dynrep): Return error code + } }) .expect("Not enough memory to initialize system"), ) @@ -85,6 +87,41 @@ lazy_static! { }; } +#[cfg(not(feature = "rackscale"))] +lazy_static! { + pub(crate) static ref KERNEL_NODE_INSTANCE: Arc> = { + use core::num::NonZeroUsize; + use crate::nr::KernelNode; + use nr2::nr::{AffinityChange, NodeReplicated}; + + // Let's go with one replica per NUMA node for now: + let numa_nodes = core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes()); + let numa_nodes = NonZeroUsize::new(numa_nodes).expect("At least one NUMA node"); + + // Create the global operation log and first replica and store it (needs + // TLS) + let kernel_node: Arc> = Arc::try_new( + NodeReplicated::new(numa_nodes, |afc: AffinityChange| { + let pcm = kcb::per_core_mem(); + match afc { + AffinityChange::Replica(r) => { + pcm.set_mem_affinity(r).expect("Can't set affinity") + } + AffinityChange::Revert(orig) => { + pcm.set_mem_affinity(orig).expect("Can't set affinity") + } + } + return 0; // xxx + }) + .expect("Not enough memory to initialize system"), + ) + .expect("Not enough memory to initialize system"); + + kernel_node + }; +} + + #[derive(PartialEq, Clone, Copy, Debug)] pub(crate) enum ReadOps { CurrentProcess(kpi::system::GlobalThreadId), @@ -140,12 +177,8 @@ impl Default for KernelNode { impl KernelNode { pub(crate) fn synchronize() -> Result<(), KError> { - NR_REPLICA - .get() - .map_or(Err(KError::ReplicaNotSet), |(replica, token)| { - replica.sync(*token); - Ok(()) - }) + KERNEL_NODE_INSTANCE.sync(*NR_REPLICA_REGISTRATION.get().unwrap()); + Ok(()) } pub(crate) fn allocate_core_to_process( @@ -154,18 +187,17 @@ impl KernelNode { affinity: Option, gtid: Option, ) -> Result { - NR_REPLICA - .get() - .map_or(Err(KError::ReplicaNotSet), |(replica, token)| { - let op = Op::SchedAllocateCore(pid, affinity, gtid, entry_point); - let response = replica.execute_mut(op, *token); + // todo node id + crate::nr::NR_REPLICA_REGISTRATION.call_once(|| crate::nr::KERNEL_NODE_INSTANCE.register(0).unwrap()); - match response { - Ok(NodeResult::CoreAllocated(rgtid)) => Ok(rgtid), - Err(e) => Err(e), - Ok(_) => unreachable!("Got unexpected response"), - } - }) + let op = Op::SchedAllocateCore(pid, affinity, gtid, entry_point); + let response = KERNEL_NODE_INSTANCE.execute_mut(op, *NR_REPLICA_REGISTRATION.get().unwrap()); + + match response { + Ok(NodeResult::CoreAllocated(rgtid)) => Ok(rgtid), + Err(e) => Err(e), + Ok(_) => unreachable!("Got unexpected response"), + } } pub(crate) fn release_core_from_process( diff --git a/kernel/src/process.rs b/kernel/src/process.rs index f3c13db20..5fe7a0f20 100644 --- a/kernel/src/process.rs +++ b/kernel/src/process.rs @@ -462,17 +462,14 @@ impl elfloader::ElfLoader for DataSecAllocator { /// Create an initial VSpace pub(crate) fn make_process(binary: &'static str) -> Result { // Allocate a new process - let pid = - crate::nr::NR_REPLICA - .get() - .map_or(Err(KError::ReplicaNotSet), |(replica, token)| { - let response = replica.execute_mut(crate::nr::Op::AllocatePid, *token)?; - if let crate::nr::NodeResult::PidAllocated(pid) = response { - Ok(pid) - } else { - Err(KError::ProcessLoadingFailed) - } - })?; + let pid = { + let response = crate::nr::KERNEL_NODE_INSTANCE.execute_mut(crate::nr::Op::AllocatePid, *crate::nr::NR_REPLICA_REGISTRATION.get().unwrap())?; + if let crate::nr::NodeResult::PidAllocated(pid) = response { + Ok(pid) + } else { + Err(KError::ProcessLoadingFailed) + } + }?; #[cfg(feature = "rackscale")] let affinity = if crate::CMDLINE diff --git a/kernel/src/scheduler/mod.rs b/kernel/src/scheduler/mod.rs index e55a57893..fd83a11fa 100644 --- a/kernel/src/scheduler/mod.rs +++ b/kernel/src/scheduler/mod.rs @@ -9,7 +9,7 @@ use crate::arch::process::ArchProcessManagement; use crate::arch::timer; use crate::error::KError; use crate::nr; -use crate::nr::NR_REPLICA; +use crate::nr::{KERNEL_NODE_INSTANCE, NR_REPLICA_REGISTRATION}; use crate::nrproc::NrProcess; use crate::process::{Executor, ResumeHandle}; @@ -36,62 +36,60 @@ pub(crate) fn schedule() -> ! { // No process assigned to core? Figure out if there is one now: if unlikely(!crate::arch::process::has_executor()) { - if let Some((replica, token)) = NR_REPLICA.get() { - loop { - let response = replica.execute( - nr::ReadOps::CurrentProcess(*crate::environment::CORE_ID), - *token, - ); + loop { + let response = KERNEL_NODE_INSTANCE.execute( + nr::ReadOps::CurrentProcess(*crate::environment::CORE_ID), + *NR_REPLICA_REGISTRATION.get().unwrap(), + ); - match response { - Ok(nr::NodeResult::CoreInfo(ci)) => { - let executor = - NrProcess::allocate_executor(&apm, ci.pid).expect("This should work"); - unsafe { - (*executor.vcpu_kernel()).resume_with_upcall = ci.entry_point; - } + match response { + Ok(nr::NodeResult::CoreInfo(ci)) => { + let executor = + NrProcess::allocate_executor(&apm, ci.pid).expect("This should work"); + unsafe { + (*executor.vcpu_kernel()).resume_with_upcall = ci.entry_point; + } - // info!("Start execution of {} on gtid {}", executor.eid, gtid); - let no = crate::arch::process::swap_current_executor(executor); - assert!(no.is_none(), "Handle the case where we replace a process."); - if is_replica_main_thread { - // Make sure we periodically try and advance the replica on main-thread - // even if we're running something (e.g., if everything polls in - // user-space we can livelock) - timer::set(timer::DEFAULT_TIMER_DEADLINE); - } - break; + // info!("Start execution of {} on gtid {}", executor.eid, gtid); + let no = crate::arch::process::swap_current_executor(executor); + assert!(no.is_none(), "Handle the case where we replace a process."); + if is_replica_main_thread { + // Make sure we periodically try and advance the replica on main-thread + // even if we're running something (e.g., if everything polls in + // user-space we can livelock) + timer::set(timer::DEFAULT_TIMER_DEADLINE); } - Err(KError::NoExecutorForCore) => { - if is_replica_main_thread { - // There is no process but we're the "main" thread, - // aggressively try and advance the replica - let start = rawtime::Instant::now(); - crate::arch::advance_fs_replica(); + break; + } + Err(KError::NoExecutorForCore) => { + if is_replica_main_thread { + // There is no process but we're the "main" thread, + // aggressively try and advance the replica + let start = rawtime::Instant::now(); + crate::arch::advance_fs_replica(); - if start.elapsed().as_millis() < 1 { - // Wait for a bit in case we don't end up doing - // any work, otherwise this causes too much - // contention and tput drops around ~300k - for _i in 0..25_000 { - core::hint::spin_loop(); - } + if start.elapsed().as_millis() < 1 { + // Wait for a bit in case we don't end up doing + // any work, otherwise this causes too much + // contention and tput drops around ~300k + for _i in 0..25_000 { + core::hint::spin_loop(); } - continue; - } else { - // There is no process, set a timer and go to sleep - timer::set(timer::DEFAULT_TIMER_DEADLINE); } - crate::arch::halt(); - } - other => { - unreachable!( - "Unexpected return from ReadOps::CurrentExecutor {:?}.", - other - ); + continue; + } else { + // There is no process, set a timer and go to sleep + timer::set(timer::DEFAULT_TIMER_DEADLINE); } - }; - } + crate::arch::halt(); + } + other => { + unreachable!( + "Unexpected return from ReadOps::CurrentExecutor {:?}.", + other + ); + } + }; } } debug_assert!( diff --git a/kernel/tests/s11_rackscale_benchmarks.rs b/kernel/tests/s11_rackscale_benchmarks.rs index 349a3326e..f171fd47d 100644 --- a/kernel/tests/s11_rackscale_benchmarks.rs +++ b/kernel/tests/s11_rackscale_benchmarks.rs @@ -880,6 +880,161 @@ fn rackscale_memcached_dcm(transport: RackscaleTransport, dcm_config: Option, + ) -> Result<()> { + let _config = arg.expect("match function expects a memcached config"); + + // match the title + let (prev, matched) = proc.exp_regex(r#"INTERNAL BENCHMARK CONFIGURE"#)?; + + *output += prev.as_str(); + *output += matched.as_str(); + + // x_benchmark_mem = 10 MB + let (prev, matched) = proc.exp_regex(r#"x_benchmark_mem = (\d+) MB"#)?; + println!("> {}", matched); + let b_mem = matched.replace("x_benchmark_mem = ", "").replace(" MB", ""); + + *output += prev.as_str(); + *output += matched.as_str(); + + // number of threads: 3 + let (prev, matched) = proc.exp_regex(r#"number of threads: (\d+)"#)?; + println!("> {}", matched); + let b_threads = matched.replace("number of threads: ", ""); + + *output += prev.as_str(); + *output += matched.as_str(); + + // number of keys: 131072 + let (prev, matched) = proc.exp_regex(r#"number of keys: (\d+)"#)?; + println!("> {}", matched); + + *output += prev.as_str(); + *output += matched.as_str(); + + let (prev, matched) = proc.exp_regex(r#"Executing (\d+) queries with (\d+) threads"#)?; + println!("> {}", matched); + + *output += prev.as_str(); + *output += matched.as_str(); + + // benchmark took 129 seconds + let (prev, matched) = proc.exp_regex(r#"benchmark took (\d+) ms"#)?; + println!("> {}", matched); + let b_time = matched.replace("benchmark took ", "").replace(" ms", ""); + + *output += prev.as_str(); + *output += matched.as_str(); + + // benchmark took 7937984 queries / second + let (prev, matched) = proc.exp_regex(r#"benchmark took (\d+) queries / second"#)?; + println!("> {}", matched); + let b_thpt = matched + .replace("benchmark took ", "") + .replace(" queries / second", ""); + + *output += prev.as_str(); + *output += matched.as_str(); + + let (prev, matched) = proc.exp_regex(r#"benchmark executed (\d+)"#)?; + println!("> {}", matched); + let b_queries = matched + .replace("benchmark executed ", "") + .split(" ") + .next() + .unwrap() + .to_string(); + + *output += prev.as_str(); + *output += matched.as_str(); + + // Append parsed results to a CSV file + let write_headers = !Path::new(file_name).exists(); + let mut csv_file = OpenOptions::new() + .append(true) + .create(true) + .open(file_name) + .expect("Can't open file"); + if write_headers { + let row = "git_rev,benchmark,nthreads,mem,queries,time,thpt,num_clients,num_replicas\n"; + let r = csv_file.write(row.as_bytes()); + assert!(r.is_ok()); + } + + let actual_num_clients = if is_baseline { 0 } else { num_clients }; + + let r = csv_file.write(format!("{},", env!("GIT_HASH")).as_bytes()); + assert!(r.is_ok()); + let out = format!( + "memcached,{},{},{},{},{},{},{}", + b_threads, b_mem, b_queries, b_time, b_thpt, actual_num_clients, num_clients + ); + let r = csv_file.write(out.as_bytes()); + assert!(r.is_ok()); + let r = csv_file.write("\n".as_bytes()); + assert!(r.is_ok()); + + Ok(()) + } + + let config = if is_smoke { + MemcachedInternalConfig { + num_queries: 100_000, + mem_size: 16, + } + } else { + MemcachedInternalConfig { + num_queries: 1_000_000, // TODO(rackscale): should be 100_000_000, + mem_size: 16, // TODO(rackscale): should be 32_000, + } + }; + + let mut test = RackscaleRun::new("userspace-smp".to_string(), built); + test.controller_match_fn = controller_match_fn; + test.transport = transport; + test.shmem_size *= 2; + test.use_affinity_shmem = cfg!(feature = "affinity-shmem"); + test.use_qemu_huge_pages = cfg!(feature = "affinity-shmem"); + test.file_name = file_name.to_string(); + test.run_dhcpd_for_baseline = true; + test.num_clients = 2; + test.cores_per_client = 4; + test.cmd = format!( + r#"init=memcachedbench.bin initargs={} appcmd='--x-benchmark-mem={} --x-benchmark-queries={}'"#, + 2*4, config.mem_size, config.num_queries + ); + test.arg = Some(config); + test.run_rackscale(); +} + #[test] #[cfg(not(feature = "baremetal"))] fn s11_rackscale_monetdb_benchmark() { diff --git a/lib/node-replication2 b/lib/node-replication2 index b5ffc6a67..eba8f64b5 160000 --- a/lib/node-replication2 +++ b/lib/node-replication2 @@ -1 +1 @@ -Subproject commit b5ffc6a676ac036ebaed6f3de1d04f4ab4d36177 +Subproject commit eba8f64b55cbde8e510565a382465f77472c8238 From f5e28218d78817be18ddaadc20d1c3b2e516773a Mon Sep 17 00:00:00 2001 From: Gerd Zellweger Date: Sat, 18 Nov 2023 20:12:16 -0800 Subject: [PATCH 13/32] [revert] Relax two asserts, need to revert investigate why. Signed-off-by: Gerd Zellweger --- kernel/src/memory/mcache.rs | 2 +- kernel/src/memory/mod.rs | 2 +- kernel/src/nrproc.rs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/src/memory/mcache.rs b/kernel/src/memory/mcache.rs index d3a02dc66..3277a62d1 100644 --- a/kernel/src/memory/mcache.rs +++ b/kernel/src/memory/mcache.rs @@ -325,7 +325,7 @@ impl GrowBackend for MCache { for frame in free_list { assert_eq!(frame.size(), LARGE_PAGE_SIZE); assert_eq!(frame.base % LARGE_PAGE_SIZE, 0); - assert_eq!(frame.affinity, self.node); + //assert_eq!(frame.affinity, self.node); self.large_page_addresses .try_push(frame.base) diff --git a/kernel/src/memory/mod.rs b/kernel/src/memory/mod.rs index ce9b67085..8d087cced 100644 --- a/kernel/src/memory/mod.rs +++ b/kernel/src/memory/mod.rs @@ -760,7 +760,7 @@ unsafe impl GlobalAlloc for KernelAllocator { } else if is_shmem_affinity(affinity) && !is_shmem_addr_with_affinity(ptr as u64, affinity, true) { // TODO(rackscale): should switch to non-shmem affinity for alloc below. // TODO(rackscale): check if shmem is a match for id? - panic!("Trying to realloc shmem to wrong or non- shmem allocator"); + //panic!("Trying to realloc shmem to wrong or non- shmem allocator"); } else if !is_shmem_affinity(affinity) && is_shmem_addr(ptr as u64, false, true) { // TODO(rackscale): should switch to use shmem affinity for alloc below. // TODO(rackscale): check if shmem is a match for id? diff --git a/kernel/src/nrproc.rs b/kernel/src/nrproc.rs index 8246fc217..9bdaf1f2f 100644 --- a/kernel/src/nrproc.rs +++ b/kernel/src/nrproc.rs @@ -45,7 +45,7 @@ pub(crate) fn register_thread_with_process_replicas() { debug_assert!(PROCESS_TABLE.len() > pid, "Invalid PID"); let token = PROCESS_TABLE[pid].read(*crate::environment::MT_ID).register(node); - log::debug!("MT_ID is {}: {node} registered {pid} {token:?}", *crate::environment::MT_ID); + log::info!("MT_ID is {}: {node} registered {pid} {token:?}", *crate::environment::MT_ID); tokens.push(token.expect("Need to be able to register")); } From 13abf373f7286256704e31b75854c8f6fb9397bd Mon Sep 17 00:00:00 2001 From: Gerd Zellweger Date: Sun, 19 Nov 2023 23:52:47 -0800 Subject: [PATCH 14/32] A workign configuration. Signed-off-by: Gerd Zellweger --- kernel/Cargo.toml | 2 ++ kernel/src/arch/x86_64/irq.rs | 3 ++- kernel/tests/s11_rackscale_benchmarks.rs | 7 +++++-- usr/rkapps/build.rs | 4 ++-- 4 files changed, 11 insertions(+), 5 deletions(-) diff --git a/kernel/Cargo.toml b/kernel/Cargo.toml index 7656474c0..ff2b39eed 100644 --- a/kernel/Cargo.toml +++ b/kernel/Cargo.toml @@ -103,6 +103,8 @@ rackscale = ["rpc"] gdb = [] # Use to enable 4kiB only in kernel - useful for memcached, similar benchmarks pages-4k = [] +# dynrep benchmarking +dynrep = [] # # The following features are used for testing *only* and should not be enabled diff --git a/kernel/src/arch/x86_64/irq.rs b/kernel/src/arch/x86_64/irq.rs index de43bc7da..8aef27e5c 100644 --- a/kernel/src/arch/x86_64/irq.rs +++ b/kernel/src/arch/x86_64/irq.rs @@ -528,7 +528,8 @@ unsafe fn timer_handler(_a: &ExceptionArguments) { nrproc::NrProcess::::synchronize(pid); } - if *crate::environment::MT_ID == 0 { + #[cfg(feature = "dynrep")] + if *crate::environment::MT_ID == 4 { use crate::arch::process::current_pid; let pid = current_pid().expect("dont have a pid?"); diff --git a/kernel/tests/s11_rackscale_benchmarks.rs b/kernel/tests/s11_rackscale_benchmarks.rs index f171fd47d..22130ea90 100644 --- a/kernel/tests/s11_rackscale_benchmarks.rs +++ b/kernel/tests/s11_rackscale_benchmarks.rs @@ -897,6 +897,7 @@ fn s11_rackscale_memcached_dynrep_benchmark_internal() { .user_feature("rkapps:memcached-bench") .set_rackscale(true) .kernel_feature("pages-4k") + .kernel_feature("dynrep") .release() .build(); @@ -1002,17 +1003,19 @@ fn s11_rackscale_memcached_dynrep_benchmark_internal() { let r = csv_file.write("\n".as_bytes()); assert!(r.is_ok()); + println!("> {}", output); + Ok(()) } let config = if is_smoke { MemcachedInternalConfig { - num_queries: 100_000, + num_queries: 100_000_000, mem_size: 16, } } else { MemcachedInternalConfig { - num_queries: 1_000_000, // TODO(rackscale): should be 100_000_000, + num_queries: 100_000_000, // TODO(rackscale): should be 100_000_000, mem_size: 16, // TODO(rackscale): should be 32_000, } }; diff --git a/usr/rkapps/build.rs b/usr/rkapps/build.rs index 534bbdd80..064c22d36 100644 --- a/usr/rkapps/build.rs +++ b/usr/rkapps/build.rs @@ -138,11 +138,11 @@ fn main() { .unwrap(); println!( - "CHECKOUT be303d8bfc2c40d63704848bb3acd9e075dd61e4 {:?}", + "CHECKOUT 1f36e49a3c5fc529cd80278a1bd73b9941c813e6 {:?}", out_dir ); Command::new("git") - .args(&["checkout", "be303d8bfc2c40d63704848bb3acd9e075dd61e4"]) + .args(&["checkout", "1f36e49a3c5fc529cd80278a1bd73b9941c813e6"]) .current_dir(&Path::new(&out_dir)) .status() .unwrap(); From 64e2fec819c82007ddae414c8caf93969d466e55 Mon Sep 17 00:00:00 2001 From: Gerd Zellweger Date: Mon, 27 Nov 2023 15:23:55 -0800 Subject: [PATCH 15/32] merge fixups. Signed-off-by: Gerd Zellweger --- Cargo.lock | 47 ++++++++++++++++++++++++++++++++++++++++++++ kernel/src/nr.rs | 20 ++++++++----------- kernel/src/nrproc.rs | 2 -- 3 files changed, 55 insertions(+), 14 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 050df3015..e9763ca33 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -238,6 +238,12 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8fe8f5a8a398345e52358e18ff07cc17a568fbca5c6f73873d3a62056309603" +[[package]] +name = "base64ct" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b" + [[package]] name = "beef" version = "0.5.2" @@ -323,6 +329,15 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array 0.14.6", +] + [[package]] name = "bootloader" version = "0.0.1" @@ -590,6 +605,16 @@ dependencies = [ "cfg-if 1.0.0", ] +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array 0.14.6", + "typenum", +] + [[package]] name = "cstr_core" version = "0.2.5" @@ -659,6 +684,16 @@ dependencies = [ "num_cpus", ] +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + [[package]] name = "displaydoc" version = "0.2.3" @@ -1159,12 +1194,14 @@ name = "init" version = "0.1.0" dependencies = [ "arrayvec", + "base64ct", "cstr_core", "hashbrown 0.11.2", "kpi", "lazy_static", "lineup", "log", + "md-5", "num-traits", "proptest 1.0.0 (git+https://github.com/gz/proptest.git?branch=x86-asm)", "rawtime", @@ -1401,6 +1438,16 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "60302e4db3a61da70c0cb7991976248362f30319e88850c487b9b95bbf059e00" +[[package]] +name = "md-5" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" +dependencies = [ + "cfg-if 1.0.0", + "digest", +] + [[package]] name = "memchr" version = "2.5.0" diff --git a/kernel/src/nr.rs b/kernel/src/nr.rs index ad7cf168a..fb62458b8 100644 --- a/kernel/src/nr.rs +++ b/kernel/src/nr.rs @@ -205,18 +205,14 @@ impl KernelNode { affinity: Option, gtid: kpi::system::GlobalThreadId, ) -> Result<(), KError> { - NR_REPLICA - .get() - .map_or(Err(KError::ReplicaNotSet), |(replica, token)| { - let op = Op::SchedReleaseCore(pid, affinity, gtid); - let response = replica.execute_mut(op, *token); - - match response { - Ok(NodeResult::CoreReleased) => Ok(()), - Err(e) => Err(e), - Ok(_) => unreachable!("Got unexpected response"), - } - }) + let op = Op::SchedReleaseCore(pid, affinity, gtid); + let response = KERNEL_NODE_INSTANCE + .execute_mut(op,*NR_REPLICA_REGISTRATION.get().unwrap()); + match response { + Ok(NodeResult::CoreReleased) => Ok(()), + Err(e) => Err(e), + Ok(_) => unreachable!("Got unexpected response"), + } } } diff --git a/kernel/src/nrproc.rs b/kernel/src/nrproc.rs index 9bdaf1f2f..8a0512474 100644 --- a/kernel/src/nrproc.rs +++ b/kernel/src/nrproc.rs @@ -15,8 +15,6 @@ use nr2::nr::{Dispatch, NodeReplicated, ThreadToken, rwlock::RwLock}; use spin::Once; use crate::arch::process::PROCESS_TABLE; -use crate::arch::MAX_NUMA_NODES; -use crate::arch::Module; use crate::error::{KError, KResult}; use crate::memory::vspace::{AddressSpace, MapAction, TlbFlushHandle}; use crate::memory::{Frame, PAddr, VAddr}; From ef9b59ce6318a0d75ee3132091d99f1041668601 Mon Sep 17 00:00:00 2001 From: Gerd Zellweger Date: Mon, 27 Nov 2023 17:22:22 -0800 Subject: [PATCH 16/32] JUST LOGGING. Signed-off-by: Gerd Zellweger --- kernel/src/arch/x86_64/mod.rs | 7 ++++ .../src/arch/x86_64/rackscale/client_state.rs | 6 +++- .../src/arch/x86_64/rackscale/controller.rs | 6 +++- .../arch/x86_64/rackscale/controller_state.rs | 4 +-- kernel/src/arch/x86_64/tlb.rs | 13 +++++++- kernel/src/memory/mod.rs | 32 +++++++++++++++++-- kernel/src/transport/shmem.rs | 22 ++++++++++--- lib/rpc/src/transport/shmem/queue_mpmc.rs | 12 ++++++- 8 files changed, 90 insertions(+), 12 deletions(-) diff --git a/kernel/src/arch/x86_64/mod.rs b/kernel/src/arch/x86_64/mod.rs index e7c9f82da..bee96be57 100644 --- a/kernel/src/arch/x86_64/mod.rs +++ b/kernel/src/arch/x86_64/mod.rs @@ -419,7 +419,9 @@ fn _start(argc: isize, _argv: *const *const u8) -> isize { { use crate::arch::rackscale::controller_state::CONTROLLER_SHMEM_CACHES; lazy_static::initialize(&CONTROLLER_SHMEM_CACHES); + log::info!("before lazy_static::initialize DCM_CLIENT"); lazy_static::initialize(&crate::arch::rackscale::dcm::DCM_CLIENT); + log::info!("after lazy_static::initialize DCM_CLIENT"); } else { use crate::arch::irq::{ REMOTE_TLB_WORK_PENDING_SHMEM_VECTOR, REMOTE_TLB_WORK_PENDING_VECTOR, @@ -437,6 +439,7 @@ fn _start(argc: isize, _argv: *const *const u8) -> isize { } } // Initialize the workqueues used for distributed TLB shootdowns + log::info!("after lazy_static::initialize RACKSCALE_CLIENT_WORKQUEUES"); lazy_static::initialize(&crate::arch::tlb::RACKSCALE_CLIENT_WORKQUEUES); log::info!("Finished inititializing client work queues"); } @@ -472,13 +475,17 @@ fn _start(argc: isize, _argv: *const *const u8) -> isize { .get() .map_or(false, |c| c.mode == crate::cmdline::Mode::Controller) { + log::info!("1 cnrfs"); let fs_logs = crate::fs::cnrfs::allocate_logs(); let fs_logs_cloned = fs_logs .try_clone() .expect("Not enough memory to initialize system"); // Construct the first replica + log::info!("2 cnrfs"); let fs_replica = MlnrReplica::::new(fs_logs_cloned); crate::fs::cnrfs::init_cnrfs_on_thread(fs_replica.clone()); + log::info!("3 cnrfs"); + (fs_logs, Some(fs_replica)) } else { use alloc::vec::Vec; diff --git a/kernel/src/arch/x86_64/rackscale/client_state.rs b/kernel/src/arch/x86_64/rackscale/client_state.rs index daf91843a..795cbbaf8 100644 --- a/kernel/src/arch/x86_64/rackscale/client_state.rs +++ b/kernel/src/arch/x86_64/rackscale/client_state.rs @@ -45,6 +45,7 @@ impl ClientState { .get() .map_or(false, |c| c.transport == Transport::Ethernet) { + log::info!("before client ready"); Arc::new(Mutex::new( crate::transport::ethernet::init_ethernet_rpc( smoltcp::wire::IpAddress::v4(172, 31, 0, 11), @@ -59,14 +60,17 @@ impl ClientState { crate::transport::shmem::init_shmem_rpc(true) .expect("Failed to initialize shmem RPC"), )) + }; + log::info!("per_process_base_pages"); let mut per_process_base_pages = ArrayVec::new(); for _i in 0..MAX_PROCESSES { // TODO(rackscale): this is a bogus affinity because it should really be "ANY_SHMEM" per_process_base_pages.push(Mutex::new(FrameCacheBase::new(local_shmem_affinity()))); } + log::info!("affinity_base_pages"); let mut affinity_base_pages = ArrayVec::new(); for i in 0..MAX_MACHINES { affinity_base_pages.push(Mutex::new(Box::new(FrameCacheBase::new( @@ -74,7 +78,7 @@ impl ClientState { )) as Box)); } - log::debug!("Finished initializing client state"); + log::info!("Finished initializing client state"); ClientState { rpc_client, affinity_base_pages: Arc::new(affinity_base_pages), diff --git a/kernel/src/arch/x86_64/rackscale/controller.rs b/kernel/src/arch/x86_64/rackscale/controller.rs index 24c313e57..d0e989ce8 100644 --- a/kernel/src/arch/x86_64/rackscale/controller.rs +++ b/kernel/src/arch/x86_64/rackscale/controller.rs @@ -65,20 +65,24 @@ pub(crate) fn run() { ClientReadyCount.fetch_add(1, Ordering::SeqCst); + log::info!("before DCMServerReady"); // Wait for all clients to connect before fulfilling any RPCs. while !DCMServerReady.load(Ordering::SeqCst) {} + log::info!("after DCMServerReady"); server .add_client(&CLIENT_REGISTRAR) .expect("Failed to accept client"); ClientReadyCount.fetch_add(1, Ordering::SeqCst); - + + log::info!("before ClientReadyCount"); // Wait for all clients to connect before fulfilling any RPCs. while ClientReadyCount.load(Ordering::SeqCst) != (*crate::environment::NUM_MACHINES - 1) as u64 { } + log::info!("before lazy_static::initialize(&PROCESS_TABLE);"); // TODO(dynrep): here is the point where we have all the memory regions and // could create NodeReplicated instances for each client. // Initialize processes diff --git a/kernel/src/arch/x86_64/rackscale/controller_state.rs b/kernel/src/arch/x86_64/rackscale/controller_state.rs index 8b698c5c3..ff1e09f33 100644 --- a/kernel/src/arch/x86_64/rackscale/controller_state.rs +++ b/kernel/src/arch/x86_64/rackscale/controller_state.rs @@ -25,7 +25,7 @@ lazy_static! { pub(crate) static ref CONTROLLER_SHMEM_CACHES: Arc>, MAX_MACHINES>> = { let mut shmem_caches = ArrayVec::new(); // TODO(rackscale): think about how we should constrain the mcache? - shmem_caches.push(Mutex::new(Box::new(MCache::<2048, 65536>::new_with_frame::<2048, 65536>( + shmem_caches.push(Mutex::new(Box::new(MCache::<2048, 2048>::new_with_frame::<2048, 2048>( local_shmem_affinity(), get_affinity_shmem(), )) as Box)); @@ -33,7 +33,7 @@ lazy_static! { shmem_caches.push(Mutex::new(Box::new(FrameCacheBase::new(mid_to_shmem_affinity(i))) as Box)); } - + log::info!("CONTROLLER_SHMEM_CACHES initialized"); Arc::new(shmem_caches) }; } diff --git a/kernel/src/arch/x86_64/tlb.rs b/kernel/src/arch/x86_64/tlb.rs index 657ffe835..87ecbb987 100644 --- a/kernel/src/arch/x86_64/tlb.rs +++ b/kernel/src/arch/x86_64/tlb.rs @@ -54,7 +54,8 @@ lazy_static! { use crate::arch::kcb::per_core_mem; use crate::memory::shmem_affinity::local_shmem_affinity; let local_affinity = local_shmem_affinity(); - + + log::info!("before affinity"); // We want to allocate the queues in shared memory let affinity = { let pcm = per_core_mem(); @@ -62,11 +63,19 @@ lazy_static! { pcm.set_mem_affinity(local_affinity).expect("Can't change affinity"); affinity }; + log::info!("changed affinity to {} local_affinity={}", affinity, local_affinity); + use core::alloc::Layout; + let ptr = unsafe { alloc::alloc::alloc(Layout::from_size_align(0xbeef, 16).unwrap()) }; + log::info!("ptr {:p}", ptr); let channels = { let num_clients = *crate::environment::NUM_MACHINES - 1; + log::info!("{num_clients} machines={}", *crate::environment::NUM_MACHINES); + let mut channels = Vec::try_with_capacity(num_clients).expect("Not enough memory to initialize system"); + log::info!("channels done"); + for _i in 0..num_clients { // ArrayQueue does memory allocation on `new`, maybe have try_new, // but this is fine since it's during initialization @@ -75,12 +84,14 @@ lazy_static! { Arc::new(channels) }; + log::info!("after channels"); // Reset mem allocator to use per core memory again if affinity != local_affinity { let pcm = per_core_mem(); pcm.set_mem_affinity(affinity).expect("Can't change affinity"); } + log::info!("after affinity"); channels } else { diff --git a/kernel/src/memory/mod.rs b/kernel/src/memory/mod.rs index 8d087cced..38e38c7e0 100644 --- a/kernel/src/memory/mod.rs +++ b/kernel/src/memory/mod.rs @@ -86,8 +86,18 @@ impl KernelAllocator { let mut zone_allocator = pcm.ezone_allocator()?; zone_allocator.allocate(layout).map_err(|e| e.into()) } else { + if layout.size() == 0xbeef { + log::info!("before 0xbeef bytes"); + } let mut zone_allocator = pcm.zone_allocator()?; - zone_allocator.allocate(layout).map_err(|e| e.into()) + if layout.size() == 0xbeef { + log::info!("after getting zone allocator"); + } + let ptr = zone_allocator.allocate(layout).map_err(|e| e.into()); + if layout.size() == 0xbeef { + log::info!("after alloc {:?}", ptr); + } + ptr } } AllocatorType::MemManager if layout.size() <= LARGE_PAGE_SIZE => { @@ -219,7 +229,13 @@ impl KernelAllocator { (AllocatorType::Zone, KError::CacheExhausted) => { let (needed_base_pages, needed_large_pages) = KernelAllocator::refill_amount(layout); + if layout.size() == 0xbeef { + log::info!("before maybe_refill_tcache"); + } self.maybe_refill_tcache(needed_base_pages, needed_large_pages)?; + if layout.size() == 0xbeef { + log::info!("before try_refill_zone"); + } self.try_refill_zone(layout) } (AllocatorType::MapBig, _) => { @@ -561,14 +577,26 @@ impl KernelAllocator { } } else { // Needs a large page - let frame = cas.pmanager.allocate_large_page()?; + if layout.size() == 0xbeef { + log::info!("before allocate_large_page"); + } + + let mut frame = cas.pmanager.allocate_large_page()?; unsafe { let large_page_ptr: *mut slabmalloc::LargeObjectPage = frame .uninitialized::() .as_mut_ptr(); + if layout.size() == 0xbeef { + log::info!("before zero frame={:?}", frame); + frame.zero(); + log::info!("before refill_large frame={:?}", frame); + } cas.zone_allocator .refill_large(layout, &mut *large_page_ptr) .expect("This should always succeed"); + if layout.size() == 0xbeef { + log::info!("after refill_large"); + } } } } diff --git a/kernel/src/transport/shmem.rs b/kernel/src/transport/shmem.rs index fa60fae48..60b178262 100644 --- a/kernel/src/transport/shmem.rs +++ b/kernel/src/transport/shmem.rs @@ -176,22 +176,34 @@ impl ShmemDevice { ) .expect("Failed to write potential shmem memory region addresses"); - /* // Note: leaving this code as a comment as a way to test if all shmem is writeable. + #[cfg(feature = "rackscale")] match crate::CMDLINE.get().map_or(Mode::Native, |c| c.mode) { Mode::Controller => { + //let mut f= Frame::new(PAddr::from(0x70003f600000u64),2*1024*1024,12); + //log::info!("zeroing out shmem"); + //unsafe {f.zero()}; + log::info!("zeroing out shmem {:x} -- {:x}", mem_region.address, mem_region.address + mem_region.size); + for offset in 1..512 { + let myptr: *mut u8 = + (KERNEL_BASE + mem_region.address + (offset*4096)) as *mut u8; + log::info!("at addr {:x}", (myptr as u64 - KERNEL_BASE)); + unsafe { *myptr = 0x0; } + } + /* let mymemslice = unsafe { core::slice::from_raw_parts_mut( (KERNEL_BASE + mem_region.address) as *mut u8, mem_region.size as usize, ) - }; - mymemslice.fill(0); + }; + mymemslice.fill(0);*/ + log::info!("after out shmem {:x} -- {:x}", mem_region.address, mem_region.address + mem_region.size); } _ => {} } - */ + // Map the MSI-X table into kernel space kvspace @@ -344,6 +356,8 @@ pub(crate) fn create_shmem_transport(mid: MachineId) -> KResult= SHMEM_TRANSPORT_SIZE); let allocator = ShmemAllocator::new(base_addr.as_u64(), SHMEM_TRANSPORT_SIZE); + crate::CMDLINE.get().map(|c| log::info!("c.mode is {:?}", c.mode)); + match crate::CMDLINE.get().map_or(Mode::Native, |c| c.mode) { Mode::Controller => { let server_to_client_queue = diff --git a/lib/rpc/src/transport/shmem/queue_mpmc.rs b/lib/rpc/src/transport/shmem/queue_mpmc.rs index 8d4e3cbe7..7febd1ca9 100644 --- a/lib/rpc/src/transport/shmem/queue_mpmc.rs +++ b/lib/rpc/src/transport/shmem/queue_mpmc.rs @@ -154,18 +154,24 @@ impl<'a> State<'a> { // Calculate and check total data to push let push_data_len = values.iter().fold(0, |acc, x| acc + x.len()); assert!(push_data_len <= QUEUE_ENTRY_SIZE); - log::debug!("Attempt to push {:?} bytes", push_data_len); + log::info!("Attempt to push {:?} bytes", push_data_len); if push_data_len == 0 { return true; } let mask = self.mask; + log::info!("before enq"); let mut pos = self.enqueue_pos(Relaxed); + log::info!("after enq"); loop { + log::info!("before node"); let node = &self.buffer[pos & mask]; + log::info!("after node"); let seq = (*node.get()).sequence.load(Acquire); + log::info!("after seq"); let diff: isize = seq as isize - pos as isize; + log::info!("seq {:#x} diff {:#x}", seq, diff); match diff { 0 => { @@ -178,6 +184,8 @@ impl<'a> State<'a> { // Copy each value into the queue let mut offset = 0; for d in values.iter() { + log::info!("d is at {:#x}", d.as_ptr() as usize); + (*node.get()).value[offset..offset + d.len()].copy_from_slice(d); offset += d.len(); } @@ -192,6 +200,8 @@ impl<'a> State<'a> { _ => pos = self.enqueue_pos(Relaxed), } } + log::info!("pushed {:?} bytes", push_data_len); + true } From f2935e0129933ef59e843bd3cdd417d57309abea Mon Sep 17 00:00:00 2001 From: Erika Hunhoff Date: Tue, 28 Nov 2023 17:15:03 +0000 Subject: [PATCH 17/32] Ran cargo format, fixed some compilation issues for unit tests --- kernel/src/arch/unix/process.rs | 4 +- kernel/src/arch/unix/vspace.rs | 2 +- kernel/src/arch/x86_64/irq.rs | 26 ++- kernel/src/arch/x86_64/mod.rs | 2 +- kernel/src/arch/x86_64/process.rs | 66 +++--- .../src/arch/x86_64/rackscale/client_state.rs | 1 - .../src/arch/x86_64/rackscale/controller.rs | 2 +- .../x86_64/rackscale/get_shmem_structure.rs | 18 +- kernel/src/arch/x86_64/syscall.rs | 6 +- kernel/src/arch/x86_64/tlb.rs | 2 +- kernel/src/arch/x86_64/vspace/mod.rs | 9 +- kernel/src/arch/x86_64/vspace/page_table.rs | 10 +- kernel/src/environment.rs | 7 +- kernel/src/memory/vspace_model.rs | 5 + kernel/src/nr.rs | 15 +- kernel/src/nrproc.rs | 196 +++++++++++------- kernel/src/process.rs | 19 +- kernel/src/syscalls.rs | 2 +- kernel/src/transport/shmem.rs | 29 ++- kernel/tests/s06_rackscale_tests.rs | 2 +- kernel/tests/s10_benchmarks.rs | 5 +- kernel/tests/s11_rackscale_benchmarks.rs | 6 +- kernel/testutils/src/rackscale_runner.rs | 6 +- lib/kpi/src/lib.rs | 2 +- lib/kpi/src/syscalls/process.rs | 2 +- usr/init/src/init.rs | 4 +- 26 files changed, 270 insertions(+), 178 deletions(-) diff --git a/kernel/src/arch/unix/process.rs b/kernel/src/arch/unix/process.rs index 206e5b00c..981de8813 100644 --- a/kernel/src/arch/unix/process.rs +++ b/kernel/src/arch/unix/process.rs @@ -6,12 +6,12 @@ use alloc::boxed::Box; use alloc::sync::Arc; use alloc::vec::Vec; use bootloader_shared::Module; -use nr2::nr::rwlock::RwLock; use core::alloc::Allocator; use core::cell::RefCell; use core::num::NonZeroUsize; use core::ops::{Deref, DerefMut}; use core::sync::atomic::{AtomicUsize, Ordering}; +use nr2::nr::rwlock::RwLock; use x86::current::paging::PAddr; use arrayvec::ArrayVec; @@ -76,7 +76,7 @@ lazy_static! { for pid in 0..MAX_PROCESSES { processes.push( Arc::try_new(RwLock::new(NodeReplicated::>::new(num_replicas, |afc: AffinityChange| { - return 0; // TODO(dynrep): Return error code + 0 // TODO(dynrep): Return error code }).expect("Not enough memory to initialize system"))).expect("Not enough memory to initialize system")); } processes diff --git a/kernel/src/arch/unix/vspace.rs b/kernel/src/arch/unix/vspace.rs index 4695a3f07..22b01d918 100644 --- a/kernel/src/arch/unix/vspace.rs +++ b/kernel/src/arch/unix/vspace.rs @@ -72,7 +72,7 @@ impl AddressSpace for VSpace { fn root(&self) -> PAddr { PAddr::zero() } - + fn map_frame(&mut self, base: VAddr, frame: Frame, action: MapAction) -> Result<(), KError> { let ma = MappingInfo::new(frame, action); self.mappings.insert(ma.vrange(base), ma); diff --git a/kernel/src/arch/x86_64/irq.rs b/kernel/src/arch/x86_64/irq.rs index 8aef27e5c..3fe5c75fc 100644 --- a/kernel/src/arch/x86_64/irq.rs +++ b/kernel/src/arch/x86_64/irq.rs @@ -42,13 +42,13 @@ use apic::x2apic::X2APICDriver; use apic::ApicDriver; use klogger::{sprint, sprintln}; use log::{info, trace, warn}; +use spin::Lazy; use x86::bits64::segmentation::Descriptor64; use x86::irq::*; use x86::segmentation::{ BuildDescriptor, DescriptorBuilder, GateDescriptorBuilder, SegmentSelector, }; use x86::{dtables, Ring}; -use spin::Lazy; use crate::arch::process::CURRENT_EXECUTOR; use crate::memory::vspace::MapAction; @@ -527,35 +527,43 @@ unsafe fn timer_handler(_a: &ExceptionArguments) { for pid in 0..crate::process::MAX_PROCESSES { nrproc::NrProcess::::synchronize(pid); } - + #[cfg(feature = "dynrep")] if *crate::environment::MT_ID == 4 { use crate::arch::process::current_pid; let pid = current_pid().expect("dont have a pid?"); - if rawtime::BOOT_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(10) && *REPLICA_STATE == 0 { + if rawtime::BOOT_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(10) + && *REPLICA_STATE == 0 + { info!("got a timer after 10s, remove rid 1"); - let handles = nrproc::NrProcess::::remove_replica(pid, 1).expect("removed"); + let handles = + nrproc::NrProcess::::remove_replica(pid, 1).expect("removed"); #[cfg(not(feature = "rackscale"))] super::tlb::shootdown(handles[0].clone()); unsafe { *REPLICA_STATE.as_mut_ptr() = 1 }; } - if rawtime::BOOT_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(20) && *REPLICA_STATE == 1 { + if rawtime::BOOT_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(20) + && *REPLICA_STATE == 1 + { info!("got a timer after 20s, add rid 1"); let handles = nrproc::NrProcess::::add_replica(pid, 1).expect("added"); #[cfg(not(feature = "rackscale"))] super::tlb::shootdown(handles[0].clone()); unsafe { *REPLICA_STATE.as_mut_ptr() = 2 }; } - if rawtime::BOOT_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(30) && *REPLICA_STATE == 2 { + if rawtime::BOOT_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(30) + && *REPLICA_STATE == 2 + { info!("got a timer after 30s"); unsafe { *REPLICA_STATE.as_mut_ptr() = 3 }; } - if rawtime::BOOT_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(40) && *REPLICA_STATE == 3 { + if rawtime::BOOT_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(40) + && *REPLICA_STATE == 3 + { info!("got a timer after 40s"); unsafe { *REPLICA_STATE.as_mut_ptr() = 4 }; } - } let kcb = get_kcb(); @@ -787,7 +795,6 @@ pub extern "C" fn handle_generic_exception(a: ExceptionArguments) -> ! { drop(pborrow); kcb_iret_handle(kcb).resume() - } else { // Go to scheduler instead crate::scheduler::schedule() @@ -798,7 +805,6 @@ pub extern "C" fn handle_generic_exception(a: ExceptionArguments) -> ! { let kcb = get_kcb(); if super::process::has_executor() { - //let mut pborrow = super::process::CURRENT_EXECUTOR.borrow_mut(); //let p = pborrow.as_ref().unwrap(); //p.maybe_switch_vspace(); diff --git a/kernel/src/arch/x86_64/mod.rs b/kernel/src/arch/x86_64/mod.rs index bee96be57..2abfe7fdd 100644 --- a/kernel/src/arch/x86_64/mod.rs +++ b/kernel/src/arch/x86_64/mod.rs @@ -512,7 +512,7 @@ fn _start(argc: isize, _argv: *const *const u8) -> isize { lazy_static::initialize(&crate::nr::KERNEL_NODE_INSTANCE); let kernel_node = crate::nr::KERNEL_NODE_INSTANCE.clone(); - + let local_ridx = kernel_node.register(0).unwrap(); log::info!("Kernel node replica idx is {:?}", local_ridx); crate::nr::NR_REPLICA_REGISTRATION.call_once(|| local_ridx); diff --git a/kernel/src/arch/x86_64/process.rs b/kernel/src/arch/x86_64/process.rs index 54b0c6095..a077c8667 100644 --- a/kernel/src/arch/x86_64/process.rs +++ b/kernel/src/arch/x86_64/process.rs @@ -13,17 +13,17 @@ use core::iter::Iterator; use core::sync::atomic::{AtomicUsize, Ordering}; use core::{fmt, ptr}; +use crate::arch::kcb; use arrayvec::ArrayVec; +use core::num::NonZeroUsize; use fallible_collections::try_vec; use fallible_collections::FallibleVec; use kpi::arch::SaveArea; use kpi::process::{FrameId, ELF_OFFSET, EXECUTOR_OFFSET}; use lazy_static::lazy_static; use log::{debug, info, trace, warn}; -use crate::arch::kcb; -use core::num::NonZeroUsize; -use nr2::nr::{NodeReplicated, AffinityChange}; use nr2::nr::rwlock::RwLock; +use nr2::nr::{AffinityChange, NodeReplicated}; use x86::bits64::paging::*; use x86::bits64::rflags; use x86::{controlregs, Ring}; @@ -82,7 +82,7 @@ lazy_static! { .get() .map_or(false, |c| c.mode == crate::cmdline::Mode::Controller) { - // Get the NodeReplicated instances from the controller, + // Get the NodeReplicated instances from the controller, // who will have created them in shared memory use crate::arch::rackscale::get_shmem_structure::{rpc_get_shmem_structure, ShmemStructure}; @@ -104,7 +104,7 @@ lazy_static! { use crate::memory::shmem_affinity::local_shmem_affinity; let pcm = per_core_mem(); pcm.set_mem_affinity(local_shmem_affinity()).expect("Can't change affinity"); - + // Want at least one replica... let num_replicas = NonZeroUsize::new(core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes() * (*NUM_MACHINES))).unwrap(); @@ -171,7 +171,7 @@ lazy_static! { use crate::memory::shmem_affinity::local_shmem_affinity; let pcm = per_core_mem(); - pcm.set_mem_affinity(local_shmem_affinity()).expect("Can't change affinity"); - OR + OR - pcm.set_mem_affinity(orig).expect("Can't change affinity"); } } @@ -191,7 +191,8 @@ lazy_static! { } #[cfg(not(feature = "rackscale"))] -fn create_process_table() -> ArrayVec>>>, MAX_PROCESSES> { +fn create_process_table( +) -> ArrayVec>>>, MAX_PROCESSES> { // Want at least one replica... let num_replicas = NonZeroUsize::new(core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes())).unwrap(); @@ -204,25 +205,29 @@ fn create_process_table() -> ArrayVec>>> = Arc::try_new(RwLock::new( - NodeReplicated::new(num_replicas, |afc: AffinityChange| { - let pcm = kcb::per_core_mem(); - //log::info!("Got AffinityChange: {:?}", afc); - match afc { - AffinityChange::Replica(r) => { - let affinity = { pcm.physical_memory.borrow().affinity }; - pcm.set_mem_affinity(crate::memory::shmem_affinity::mid_to_shmem_affinity(r)).expect("Can't set affinity"); - return affinity; - } - AffinityChange::Revert(orig) => { - pcm.set_mem_affinity(orig).expect("Can't set affinity"); - return 0; + let process: Arc>>> = + Arc::try_new(RwLock::new( + NodeReplicated::new(num_replicas, |afc: AffinityChange| { + let pcm = kcb::per_core_mem(); + //log::info!("Got AffinityChange: {:?}", afc); + match afc { + AffinityChange::Replica(r) => { + let affinity = { pcm.physical_memory.borrow().affinity }; + pcm.set_mem_affinity( + crate::memory::shmem_affinity::mid_to_shmem_affinity(r), + ) + .expect("Can't set affinity"); + return affinity; + } + AffinityChange::Revert(orig) => { + pcm.set_mem_affinity(orig).expect("Can't set affinity"); + return 0; + } } - } - }) - .expect("Not enough memory to initialize system")), - ) - .expect("Not enough memory to initialize system"); + }) + .expect("Not enough memory to initialize system"), + )) + .expect("Not enough memory to initialize system"); processes.push(process) } @@ -302,15 +307,15 @@ fn create_process_table( } */ - pub(crate) struct ArchProcessManagement; - +pub(crate) struct ArchProcessManagement; impl crate::nrproc::ProcessManager for ArchProcessManagement { type Process = Ring3Process; fn process_table( &self, - ) -> &'static ArrayVec>>>, MAX_PROCESSES> { + ) -> &'static ArrayVec>>>, MAX_PROCESSES> + { &*super::process::PROCESS_TABLE } } @@ -1026,7 +1031,10 @@ impl Executor for Ring3Executor { unsafe { let current_pml4 = PAddr::from(controlregs::cr3()); if current_pml4 != replica_pml4 { - info!("Switching from 0x{:x} to 0x{:x}", current_pml4, replica_pml4); + info!( + "Switching from 0x{:x} to 0x{:x}", + current_pml4, replica_pml4 + ); controlregs::cr3_write(self.pml4.into()); } } diff --git a/kernel/src/arch/x86_64/rackscale/client_state.rs b/kernel/src/arch/x86_64/rackscale/client_state.rs index 795cbbaf8..89e5b7612 100644 --- a/kernel/src/arch/x86_64/rackscale/client_state.rs +++ b/kernel/src/arch/x86_64/rackscale/client_state.rs @@ -60,7 +60,6 @@ impl ClientState { crate::transport::shmem::init_shmem_rpc(true) .expect("Failed to initialize shmem RPC"), )) - }; log::info!("per_process_base_pages"); diff --git a/kernel/src/arch/x86_64/rackscale/controller.rs b/kernel/src/arch/x86_64/rackscale/controller.rs index d0e989ce8..c1e1217f3 100644 --- a/kernel/src/arch/x86_64/rackscale/controller.rs +++ b/kernel/src/arch/x86_64/rackscale/controller.rs @@ -75,7 +75,7 @@ pub(crate) fn run() { .expect("Failed to accept client"); ClientReadyCount.fetch_add(1, Ordering::SeqCst); - + log::info!("before ClientReadyCount"); // Wait for all clients to connect before fulfilling any RPCs. while ClientReadyCount.load(Ordering::SeqCst) != (*crate::environment::NUM_MACHINES - 1) as u64 diff --git a/kernel/src/arch/x86_64/rackscale/get_shmem_structure.rs b/kernel/src/arch/x86_64/rackscale/get_shmem_structure.rs index 268e99971..5f8d87969 100644 --- a/kernel/src/arch/x86_64/rackscale/get_shmem_structure.rs +++ b/kernel/src/arch/x86_64/rackscale/get_shmem_structure.rs @@ -21,7 +21,7 @@ use crate::error::{KError, KResult}; use crate::memory::shmem_affinity::local_shmem_affinity; use crate::memory::vspace::TlbFlushHandle; use crate::memory::{kernel_vaddr_to_paddr, paddr_to_kernel_vaddr, PAddr, VAddr}; -use crate::nr::{KERNEL_NODE_INSTANCE, KernelNode}; +use crate::nr::{KernelNode, KERNEL_NODE_INSTANCE}; use crate::nrproc::NrProcess; use crate::process::MAX_PROCESSES; @@ -134,8 +134,7 @@ pub(crate) fn handle_get_shmem_structure( // to a physical address, and then change it to a shmem offset by subtracting the shmem base. // TODO(rackscale): try to simplify this, and below? let arc_log_paddr = kernel_vaddr_to_paddr(VAddr::from_u64( - (*&client_clone - as *const NodeReplicated>) as u64, + (*&client_clone as *const NodeReplicated>) as u64, )); logs[i] = arc_log_paddr.as_u64(); } @@ -147,10 +146,15 @@ pub(crate) fn handle_get_shmem_structure( ShmemStructure::NrLog => { let log_clone = Arc::into_raw(Arc::clone(&KERNEL_NODE_INSTANCE)); - let log_paddr = - kernel_vaddr_to_paddr(VAddr::from_u64((*&log_clone as *const NodeReplicated) as u64)) - .as_u64(); - log::info!("nr_node addr {:?} &KERNEL_NODE_INSTANCE = {:p}", log_paddr, &KERNEL_NODE_INSTANCE); + let log_paddr = kernel_vaddr_to_paddr(VAddr::from_u64( + (*&log_clone as *const NodeReplicated) as u64, + )) + .as_u64(); + log::info!( + "nr_node addr {:?} &KERNEL_NODE_INSTANCE = {:p}", + log_paddr, + &KERNEL_NODE_INSTANCE + ); // Modify header and write into output buffer unsafe { encode(&[log_paddr], &mut payload) }.unwrap(); diff --git a/kernel/src/arch/x86_64/syscall.rs b/kernel/src/arch/x86_64/syscall.rs index f95793cd5..d73cd398f 100644 --- a/kernel/src/arch/x86_64/syscall.rs +++ b/kernel/src/arch/x86_64/syscall.rs @@ -296,8 +296,7 @@ impl ProcessDispatch for T { let pid = current_pid()?; let handles = if add > 0 { NrProcess::::add_replica(pid, rid as usize).expect("add_replica") - } - else { + } else { NrProcess::::remove_replica(pid, rid as usize).expect("remove_replica") }; @@ -308,8 +307,7 @@ impl ProcessDispatch for T { #[cfg(not(feature = "rackscale"))] super::tlb::shootdown(handles[0].clone()); - - Ok((0,0)) + Ok((0, 0)) } fn exit(&self, code: u64) -> Result<(u64, u64), KError> { diff --git a/kernel/src/arch/x86_64/tlb.rs b/kernel/src/arch/x86_64/tlb.rs index 87ecbb987..3a6e40ff9 100644 --- a/kernel/src/arch/x86_64/tlb.rs +++ b/kernel/src/arch/x86_64/tlb.rs @@ -54,7 +54,7 @@ lazy_static! { use crate::arch::kcb::per_core_mem; use crate::memory::shmem_affinity::local_shmem_affinity; let local_affinity = local_shmem_affinity(); - + log::info!("before affinity"); // We want to allocate the queues in shared memory let affinity = { diff --git a/kernel/src/arch/x86_64/vspace/mod.rs b/kernel/src/arch/x86_64/vspace/mod.rs index e28d1cf7c..23264bc0a 100644 --- a/kernel/src/arch/x86_64/vspace/mod.rs +++ b/kernel/src/arch/x86_64/vspace/mod.rs @@ -113,7 +113,7 @@ impl AddressSpace for VSpace { fn root(&self) -> PAddr { self.pml4_address() } - + fn map_frame(&mut self, base: VAddr, frame: Frame, action: MapAction) -> Result<(), KError> { if frame.size() == 0 { return Err(KError::InvalidFrame); @@ -205,7 +205,12 @@ impl Drop for VSpace { impl VSpace { pub(crate) fn new() -> Result { let mut btree = BTreeMap::new(); - btree.try_insert(VAddr(0x0), MappingInfo::new(Frame::empty(), MapAction::none())).expect("fail"); + btree + .try_insert( + VAddr(0x0), + MappingInfo::new(Frame::empty(), MapAction::none()), + ) + .expect("fail"); Ok(VSpace { mappings: BTreeMap::new(), diff --git a/kernel/src/arch/x86_64/vspace/page_table.rs b/kernel/src/arch/x86_64/vspace/page_table.rs index 1429a6c02..824f60da9 100644 --- a/kernel/src/arch/x86_64/vspace/page_table.rs +++ b/kernel/src/arch/x86_64/vspace/page_table.rs @@ -41,7 +41,7 @@ impl Clone for PageTable { let frame_ptr = unsafe { let ptr = alloc::alloc::alloc_zeroed(PT_LAYOUT); debug_assert!(!ptr.is_null()); - + let nptr = NonNull::new_unchecked(ptr); NonNull::slice_from_raw_parts(nptr, PT_LAYOUT.size()) }; @@ -51,17 +51,17 @@ impl Clone for PageTable { unsafe { frame.zero() }; frame } - + fn new_pt() -> PDEntry { let frame = alloc_frame(); return PDEntry::new(frame.base, PDFlags::P | PDFlags::RW | PDFlags::US); } - + fn new_pd() -> PDPTEntry { let frame = alloc_frame(); return PDPTEntry::new(frame.base, PDPTFlags::P | PDPTFlags::RW | PDPTFlags::US); } - + fn new_pdpt() -> PML4Entry { let frame = alloc_frame(); return PML4Entry::new(frame.base, PML4Flags::P | PML4Flags::RW | PML4Flags::US); @@ -84,7 +84,6 @@ impl Clone for PageTable { let cloned_pdpt_entry = cloned_pdpt[pdpt_idx]; drop(cloned_pdpt); - for pd_idx in 0..PAGE_SIZE_ENTRIES { let pd = self.get_pd(pdpt[pdpt_idx]); let cloned_pd = cloned_pt.get_pd_mut(cloned_pdpt_entry); @@ -107,7 +106,6 @@ impl Clone for PageTable { } else { // Encountered a 2 MiB mapping cloned_pd[pd_idx] = pd[pd_idx]; - } } } else { diff --git a/kernel/src/environment.rs b/kernel/src/environment.rs index 805f0822f..b724f78ba 100644 --- a/kernel/src/environment.rs +++ b/kernel/src/environment.rs @@ -16,16 +16,15 @@ pub(crate) static CORE_ID: Lazy = #[thread_local] pub(crate) static MT_ID: Lazy = Lazy::new(|| { #[cfg(feature = "rackscale")] - return (*crate::environment::MACHINE_ID - 1) * (atopology::MACHINE_TOPOLOGY.num_threads()) + atopology::MACHINE_TOPOLOGY.current_thread().id; + return (*crate::environment::MACHINE_ID - 1) * (atopology::MACHINE_TOPOLOGY.num_threads()) + + atopology::MACHINE_TOPOLOGY.current_thread().id; #[cfg(not(feature = "rackscale"))] *CORE_ID }); /// Number of nodes in the current deployment. #[allow(unused)] -pub(crate) static NUM_NODES: Lazy = - Lazy::new(|| atopology::MACHINE_TOPOLOGY.num_nodes()); - +pub(crate) static NUM_NODES: Lazy = Lazy::new(|| atopology::MACHINE_TOPOLOGY.num_nodes()); /// The NUMA node id of the current core (hardware thread). #[thread_local] diff --git a/kernel/src/memory/vspace_model.rs b/kernel/src/memory/vspace_model.rs index bd5e2230d..22b5df4dc 100644 --- a/kernel/src/memory/vspace_model.rs +++ b/kernel/src/memory/vspace_model.rs @@ -51,6 +51,11 @@ impl Default for ModelAddressSpace { } impl AddressSpace for ModelAddressSpace { + // TODO(correctness): I don't know if this is correct for the model + fn root(&self) -> PAddr { + PAddr::zero() + } + fn map_frame(&mut self, base: VAddr, frame: Frame, action: MapAction) -> Result<(), KError> { // Don't allow mapping of zero-sized frames if frame.size() == 0 { diff --git a/kernel/src/nr.rs b/kernel/src/nr.rs index fb62458b8..263115cfa 100644 --- a/kernel/src/nr.rs +++ b/kernel/src/nr.rs @@ -4,12 +4,12 @@ use crate::prelude::*; use core::fmt::Debug; +use crate::arch::kcb; use alloc::sync::Arc; use hashbrown::HashMap; use log::{error, trace}; use nr2::nr::{Dispatch, NodeReplicated, ThreadToken}; use spin::Once; -use crate::arch::kcb; use lazy_static::lazy_static; @@ -121,7 +121,6 @@ lazy_static! { }; } - #[derive(PartialEq, Clone, Copy, Debug)] pub(crate) enum ReadOps { CurrentProcess(kpi::system::GlobalThreadId), @@ -168,7 +167,7 @@ pub(crate) struct KernelNode { impl Default for KernelNode { fn default() -> KernelNode { let k = KernelNode { - process_map: HashMap::with_capacity(MAX_PROCESSES), // with_capacity(MAX_PROCESSES), + process_map: HashMap::with_capacity(MAX_PROCESSES), // with_capacity(MAX_PROCESSES), scheduler_map: HashMap::with_capacity(24), // with_capacity(MAX_CORES), or, for rackscale, with_capacity(MAX_CORES * MAX_MACHINES) }; k @@ -188,10 +187,12 @@ impl KernelNode { gtid: Option, ) -> Result { // todo node id - crate::nr::NR_REPLICA_REGISTRATION.call_once(|| crate::nr::KERNEL_NODE_INSTANCE.register(0).unwrap()); + crate::nr::NR_REPLICA_REGISTRATION + .call_once(|| crate::nr::KERNEL_NODE_INSTANCE.register(0).unwrap()); let op = Op::SchedAllocateCore(pid, affinity, gtid, entry_point); - let response = KERNEL_NODE_INSTANCE.execute_mut(op, *NR_REPLICA_REGISTRATION.get().unwrap()); + let response = + KERNEL_NODE_INSTANCE.execute_mut(op, *NR_REPLICA_REGISTRATION.get().unwrap()); match response { Ok(NodeResult::CoreAllocated(rgtid)) => Ok(rgtid), @@ -206,8 +207,8 @@ impl KernelNode { gtid: kpi::system::GlobalThreadId, ) -> Result<(), KError> { let op = Op::SchedReleaseCore(pid, affinity, gtid); - let response = KERNEL_NODE_INSTANCE - .execute_mut(op,*NR_REPLICA_REGISTRATION.get().unwrap()); + let response = + KERNEL_NODE_INSTANCE.execute_mut(op, *NR_REPLICA_REGISTRATION.get().unwrap()); match response { Ok(NodeResult::CoreReleased) => Ok(()), Err(e) => Err(e), diff --git a/kernel/src/nrproc.rs b/kernel/src/nrproc.rs index 8a0512474..8c4c9e4a2 100644 --- a/kernel/src/nrproc.rs +++ b/kernel/src/nrproc.rs @@ -11,7 +11,7 @@ use arrayvec::ArrayVec; use fallible_collections::vec::FallibleVec; use kpi::process::{FrameId, ProcessInfo}; use kpi::MemType; -use nr2::nr::{Dispatch, NodeReplicated, ThreadToken, rwlock::RwLock}; +use nr2::nr::{rwlock::RwLock, Dispatch, NodeReplicated, ThreadToken}; use spin::Once; use crate::arch::process::PROCESS_TABLE; @@ -34,7 +34,7 @@ pub(crate) fn register_thread_with_process_replicas() { let node = *crate::environment::NODE_ID; #[cfg(feature = "rackscale")] let node = 0; //*crate::environment::MACHINE_ID - + debug_assert!(PROCESS_TABLE.len() > node, "Invalid Node ID"); PROCESS_TOKEN.call_once(|| { @@ -42,8 +42,13 @@ pub(crate) fn register_thread_with_process_replicas() { for pid in 0..MAX_PROCESSES { debug_assert!(PROCESS_TABLE.len() > pid, "Invalid PID"); - let token = PROCESS_TABLE[pid].read(*crate::environment::MT_ID).register(node); - log::info!("MT_ID is {}: {node} registered {pid} {token:?}", *crate::environment::MT_ID); + let token = PROCESS_TABLE[pid] + .read(*crate::environment::MT_ID) + .register(node); + log::info!( + "MT_ID is {}: {node} registered {pid} {token:?}", + *crate::environment::MT_ID + ); tokens.push(token.expect("Need to be able to register")); } @@ -90,10 +95,10 @@ pub(crate) enum ProcessOpMut { #[cfg(feature = "rackscale")] DispatcherAllocation(Frame, kpi::system::MachineId), - + #[cfg(not(feature = "rackscale"))] DispatcherAllocation(Frame), - + MemMapFrame(VAddr, Frame, MapAction), MemMapDevice(Frame, MapAction), MemMapFrameId(VAddr, FrameId, MapAction), @@ -157,11 +162,14 @@ impl NrProcess

{ log::info!("add_replica {pid} {rid}"); // we use unmap of 0x0 to get a snapshot of where the core is running on let handle = NrProcess::

::unmap(pid, VAddr::from(0x0)); - if !handle.is_ok() { + if handle.is_err() { panic!("couldn't get snapshot"); } - PROCESS_TABLE[pid].write(*crate::environment::MT_ID).add_replica(rid).expect("add_replica failed"); + PROCESS_TABLE[pid] + .write(*crate::environment::MT_ID) + .add_replica(rid) + .expect("add_replica failed"); log::info!("added_replica {pid} {rid}"); handle @@ -174,14 +182,20 @@ impl NrProcess

{ #[cfg(not(feature = "rackscale"))] let max_nodes = *crate::environment::NUM_NODES; - debug_assert!(rid < max_nodes, "Invalid Node ID {rid} max_nodes {max_nodes}"); + debug_assert!( + rid < max_nodes, + "Invalid Node ID {rid} max_nodes {max_nodes}" + ); // we use unmap of 0x0 to get a snapshot of where the core is running on let handle = NrProcess::

::unmap(pid, VAddr::from(0x0)); - if !handle.is_ok() { + if handle.is_err() { panic!("couldn't get snapshot"); } - PROCESS_TABLE[pid].write(*crate::environment::MT_ID).remove_replica(rid).expect("remove_replica failed"); + PROCESS_TABLE[pid] + .write(*crate::environment::MT_ID) + .remove_replica(rid) + .expect("remove_replica failed"); handle } @@ -192,10 +206,12 @@ impl NrProcess

{ writeable_sections: Vec, ) -> Result<(), KError> { debug_assert!(pid < MAX_PROCESSES, "Invalid PID"); - let response = PROCESS_TABLE[pid].read(*crate::environment::MT_ID).execute_mut( - ProcessOpMut::Load(pid, module_name, writeable_sections), - PROCESS_TOKEN.get().unwrap()[pid], - ); + let response = PROCESS_TABLE[pid] + .read(*crate::environment::MT_ID) + .execute_mut( + ProcessOpMut::Load(pid, module_name, writeable_sections), + PROCESS_TOKEN.get().unwrap()[pid], + ); match response { Ok(ProcessResult::Ok) => Ok(()), Err(e) => Err(e), @@ -219,7 +235,9 @@ impl NrProcess

{ pub(crate) fn synchronize(pid: Pid) { debug_assert!(pid < MAX_PROCESSES, "Invalid PID"); - PROCESS_TABLE[pid].read(*crate::environment::MT_ID).sync(PROCESS_TOKEN.get().unwrap()[pid]); + PROCESS_TABLE[pid] + .read(*crate::environment::MT_ID) + .sync(PROCESS_TOKEN.get().unwrap()[pid]); } pub(crate) fn map_device_frame( @@ -228,10 +246,12 @@ impl NrProcess

{ action: MapAction, ) -> Result<(u64, u64), KError> { debug_assert!(pid < MAX_PROCESSES, "Invalid PID"); - let response = PROCESS_TABLE[pid].read(*crate::environment::MT_ID).execute_mut( - ProcessOpMut::MemMapDevice(frame, action), - PROCESS_TOKEN.get().unwrap()[pid], - ); + let response = PROCESS_TABLE[pid] + .read(*crate::environment::MT_ID) + .execute_mut( + ProcessOpMut::MemMapDevice(frame, action), + PROCESS_TOKEN.get().unwrap()[pid], + ); match response { Ok(ProcessResult::Ok) => Ok((frame.base.as_u64(), frame.size() as u64)), Err(e) => Err(e), @@ -241,10 +261,12 @@ impl NrProcess

{ pub(crate) fn unmap(pid: Pid, base: VAddr) -> Result, KError> { debug_assert!(pid < MAX_PROCESSES, "Invalid PID"); - let response = PROCESS_TABLE[pid].read(*crate::environment::MT_ID).execute_mut( - ProcessOpMut::MemUnmap(base), - PROCESS_TOKEN.get().unwrap()[pid], - ); + let response = PROCESS_TABLE[pid] + .read(*crate::environment::MT_ID) + .execute_mut( + ProcessOpMut::MemUnmap(base), + PROCESS_TOKEN.get().unwrap()[pid], + ); match response { Ok(ProcessResult::Unmapped(handle)) => Ok(handle), Err(e) => Err(e), @@ -260,10 +282,12 @@ impl NrProcess

{ ) -> Result<(PAddr, usize), KError> { debug_assert!(pid < MAX_PROCESSES, "Invalid PID"); //action.multiple_mappings(true); - let response = PROCESS_TABLE[pid].read(*crate::environment::MT_ID).execute_mut( - ProcessOpMut::MemMapFrameId(base, frame_id, action), - PROCESS_TOKEN.get().unwrap()[pid], - ); + let response = PROCESS_TABLE[pid] + .read(*crate::environment::MT_ID) + .execute_mut( + ProcessOpMut::MemMapFrameId(base, frame_id, action), + PROCESS_TOKEN.get().unwrap()[pid], + ); match response { Ok(ProcessResult::MappedFrameId(paddr, size)) => Ok((paddr, size)), Err(e) => Err(e), @@ -280,10 +304,12 @@ impl NrProcess

{ debug_assert!(pid < MAX_PROCESSES, "Invalid PID"); let mut virtual_offset = 0; for frame in frames { - let response = PROCESS_TABLE[pid].read(*crate::environment::MT_ID).execute_mut( - ProcessOpMut::MemMapFrame(base + virtual_offset, frame, action), - PROCESS_TOKEN.get().unwrap()[pid], - ); + let response = PROCESS_TABLE[pid] + .read(*crate::environment::MT_ID) + .execute_mut( + ProcessOpMut::MemMapFrame(base + virtual_offset, frame, action), + PROCESS_TOKEN.get().unwrap()[pid], + ); match response { Ok(ProcessResult::Ok) => {} e => unreachable!( @@ -301,11 +327,11 @@ impl NrProcess

{ Ok((base.as_u64(), virtual_offset as u64)) } - pub(crate) fn ptroot(pid: Pid) -> Result { debug_assert!(pid < MAX_PROCESSES, "Invalid PID"); - let response = - PROCESS_TABLE[pid].read(*crate::environment::MT_ID).execute(ProcessOp::GetPtRoot, PROCESS_TOKEN.get().unwrap()[pid]); + let response = PROCESS_TABLE[pid] + .read(*crate::environment::MT_ID) + .execute(ProcessOp::GetPtRoot, PROCESS_TOKEN.get().unwrap()[pid]); match response { Ok(ProcessResult::PtRoot(paddr)) => Ok(paddr), Err(e) => Err(e), @@ -315,8 +341,9 @@ impl NrProcess

{ pub(crate) fn pinfo(pid: Pid) -> Result { debug_assert!(pid < MAX_PROCESSES, "Invalid PID"); - let response = - PROCESS_TABLE[pid].read(*crate::environment::MT_ID).execute(ProcessOp::ProcessInfo, PROCESS_TOKEN.get().unwrap()[pid]); + let response = PROCESS_TABLE[pid] + .read(*crate::environment::MT_ID) + .execute(ProcessOp::ProcessInfo, PROCESS_TOKEN.get().unwrap()[pid]); match response { Ok(ProcessResult::ProcessInfo(pinfo)) => Ok(pinfo), Err(e) => Err(e), @@ -332,10 +359,12 @@ impl NrProcess

{ let gtid = *crate::environment::CORE_ID; let node = *crate::environment::NODE_ID; - let response = pm.process_table()[pid].read(*crate::environment::MT_ID).execute_mut( - ProcessOpMut::AssignExecutor(gtid, node), - PROCESS_TOKEN.get().unwrap()[pid], - ); + let response = pm.process_table()[pid] + .read(*crate::environment::MT_ID) + .execute_mut( + ProcessOpMut::AssignExecutor(gtid, node), + PROCESS_TOKEN.get().unwrap()[pid], + ); match response { Ok(ProcessResult::Executor(executor)) => Ok(executor), Err(e) => Err(e), @@ -363,10 +392,12 @@ impl NrProcess

{ pub(crate) fn allocate_frame_to_process(pid: Pid, frame: Frame) -> Result { debug_assert!(pid < MAX_PROCESSES, "Invalid PID"); - let response = PROCESS_TABLE[pid].read(*crate::environment::MT_ID).execute_mut( - ProcessOpMut::AllocateFrameToProcess(frame), - PROCESS_TOKEN.get().unwrap()[pid], - ); + let response = PROCESS_TABLE[pid] + .read(*crate::environment::MT_ID) + .execute_mut( + ProcessOpMut::AllocateFrameToProcess(frame), + PROCESS_TOKEN.get().unwrap()[pid], + ); match response { Ok(ProcessResult::FrameId(fid)) => Ok(fid), Err(e) => Err(e), @@ -377,10 +408,12 @@ impl NrProcess

{ pub(crate) fn release_frame_from_process(pid: Pid, fid: FrameId) -> Result { debug_assert!(pid < MAX_PROCESSES, "Invalid PID"); debug_assert!(fid < MAX_FRAMES_PER_PROCESS, "Invalid FID"); - let response = PROCESS_TABLE[pid].read(*crate::environment::MT_ID).execute_mut( - ProcessOpMut::ReleaseFrameFromProcess(fid), - PROCESS_TOKEN.get().unwrap()[pid], - ); + let response = PROCESS_TABLE[pid] + .read(*crate::environment::MT_ID) + .execute_mut( + ProcessOpMut::ReleaseFrameFromProcess(fid), + PROCESS_TOKEN.get().unwrap()[pid], + ); match response { Ok(ProcessResult::Frame(f)) => Ok(f), Err(e) => Err(e), @@ -393,14 +426,16 @@ impl NrProcess

{ #[cfg(feature = "rackscale")] let mid = *crate::environment::MACHINE_ID; - let response = PROCESS_TABLE[pid].read(*crate::environment::MT_ID).execute_mut( - #[cfg(not(feature = "rackscale"))] - ProcessOpMut::DispatcherAllocation(frame), - #[cfg(feature = "rackscale")] - ProcessOpMut::DispatcherAllocation(frame, mid), - PROCESS_TOKEN.get().unwrap()[pid], - ); - + let response = PROCESS_TABLE[pid] + .read(*crate::environment::MT_ID) + .execute_mut( + #[cfg(not(feature = "rackscale"))] + ProcessOpMut::DispatcherAllocation(frame), + #[cfg(feature = "rackscale")] + ProcessOpMut::DispatcherAllocation(frame, mid), + PROCESS_TOKEN.get().unwrap()[pid], + ); + match response { Ok(ProcessResult::ExecutorsCreated(how_many)) => Ok(how_many), Err(e) => Err(e), @@ -409,10 +444,12 @@ impl NrProcess

{ } pub(crate) fn userslice_to_arc_slice(from: UserSlice) -> Result, KError> { - let response = PROCESS_TABLE[from.pid].read(*crate::environment::MT_ID).execute( - ProcessOp::ReadSlice(from), - PROCESS_TOKEN.get().unwrap()[from.pid], - ); + let response = PROCESS_TABLE[from.pid] + .read(*crate::environment::MT_ID) + .execute( + ProcessOp::ReadSlice(from), + PROCESS_TOKEN.get().unwrap()[from.pid], + ); match response { Ok(ProcessResult::ReadSlice(v)) => Ok(v), Err(e) => Err(e), @@ -421,10 +458,12 @@ impl NrProcess

{ } pub(crate) fn read_string_from_userspace(from: UserSlice) -> Result { - let response = PROCESS_TABLE[from.pid].read(*crate::environment::MT_ID).execute( - ProcessOp::ReadString(from), - PROCESS_TOKEN.get().unwrap()[from.pid], - ); + let response = PROCESS_TABLE[from.pid] + .read(*crate::environment::MT_ID) + .execute( + ProcessOp::ReadString(from), + PROCESS_TOKEN.get().unwrap()[from.pid], + ); match response { Ok(ProcessResult::ReadString(s)) => Ok(s), Err(e) => Err(e), @@ -451,10 +490,12 @@ impl NrProcess

{ on: UserSlice, f: Box KResult<(u64, u64)>>, ) -> Result<(u64, u64), KError> { - let response = PROCESS_TABLE[on.pid].read(*crate::environment::MT_ID).execute( - ProcessOp::ExecSliceMut(on, f), - PROCESS_TOKEN.get().unwrap()[on.pid], - ); + let response = PROCESS_TABLE[on.pid] + .read(*crate::environment::MT_ID) + .execute( + ProcessOp::ExecSliceMut(on, f), + PROCESS_TOKEN.get().unwrap()[on.pid], + ); match response { Ok(ProcessResult::SysRetOk((a, b))) => Ok((a, b)), Err(e) => Err(e), @@ -466,10 +507,12 @@ impl NrProcess

{ on: &'a UserSlice, f: Box KResult<()>>, ) -> Result<(), KError> { - let response = PROCESS_TABLE[on.pid].read(*crate::environment::MT_ID).execute( - ProcessOp::ExecSlice(on, f), - PROCESS_TOKEN.get().unwrap()[on.pid], - ); + let response = PROCESS_TABLE[on.pid] + .read(*crate::environment::MT_ID) + .execute( + ProcessOp::ExecSlice(on, f), + PROCESS_TOKEN.get().unwrap()[on.pid], + ); match response { Ok(ProcessResult::Ok) => Ok(()), Err(e) => Err(e), @@ -489,9 +532,7 @@ where fn dispatch<'buf>(&self, op: Self::ReadOperation<'_>) -> Self::Response { match op { - ProcessOp::GetPtRoot => { - Ok(ProcessResult::PtRoot(self.process.vspace().root())) - } + ProcessOp::GetPtRoot => Ok(ProcessResult::PtRoot(self.process.vspace().root())), ProcessOp::ProcessInfo => Ok(ProcessResult::ProcessInfo(*self.process.pinfo())), ProcessOp::MemResolve(base) => { let (paddr, rights) = self.process.vspace().resolve(base)?; @@ -595,8 +636,7 @@ where .expect("is_aliasable implies this op can't fail"); } shootdown_handle - } - else { + } else { TlbFlushHandle::new(0x0.into(), 0x0.into(), 0x0, MapAction::none()) }; diff --git a/kernel/src/process.rs b/kernel/src/process.rs index 5fe7a0f20..0693832e8 100644 --- a/kernel/src/process.rs +++ b/kernel/src/process.rs @@ -79,7 +79,11 @@ pub(crate) trait Process: FrameManagement + Clone { affinity: atopology::NodeId, ) -> Result<(), alloc::collections::TryReserveError>; - fn allocate_executors(&mut self, frame: Frame, #[cfg(feature = "rackscale")] mid: kpi::system::MachineId) -> Result; + fn allocate_executors( + &mut self, + frame: Frame, + #[cfg(feature = "rackscale")] mid: kpi::system::MachineId, + ) -> Result; fn vspace_mut(&mut self) -> &mut Self::A; @@ -463,7 +467,10 @@ impl elfloader::ElfLoader for DataSecAllocator { pub(crate) fn make_process(binary: &'static str) -> Result { // Allocate a new process let pid = { - let response = crate::nr::KERNEL_NODE_INSTANCE.execute_mut(crate::nr::Op::AllocatePid, *crate::nr::NR_REPLICA_REGISTRATION.get().unwrap())?; + let response = crate::nr::KERNEL_NODE_INSTANCE.execute_mut( + crate::nr::Op::AllocatePid, + *crate::nr::NR_REPLICA_REGISTRATION.get().unwrap(), + )?; if let crate::nr::NodeResult::PidAllocated(pid) = response { Ok(pid) } else { @@ -582,11 +589,13 @@ pub(crate) fn allocate_dispatchers(pid: Pid, affinity: NodeId) -> Re let pcm = crate::arch::kcb::per_core_mem(); #[cfg(feature = "rackscale")] - pcm.set_mem_affinity(affinity).expect("Can't change affinity"); + pcm.set_mem_affinity(affinity) + .expect("Can't change affinity"); let frame = pcm.mem_manager().allocate_large_page()?; - + #[cfg(feature = "rackscale")] - pcm.set_mem_affinity(crate::memory::shmem_affinity::local_shmem_affinity()).expect("Can't reset affinity"); + pcm.set_mem_affinity(crate::memory::shmem_affinity::local_shmem_affinity()) + .expect("Can't reset affinity"); frame }; diff --git a/kernel/src/syscalls.rs b/kernel/src/syscalls.rs index f736a160a..5051cb564 100644 --- a/kernel/src/syscalls.rs +++ b/kernel/src/syscalls.rs @@ -128,7 +128,7 @@ enum ProcessOperationArgs { ReleaseCore(W), AllocatePhysical(W, W), ReleasePhysical(W), - SetReplicas(W, W) + SetReplicas(W, W), } impl + LowerHex + Debug + Copy + Clone> ProcessOperationArgs { diff --git a/kernel/src/transport/shmem.rs b/kernel/src/transport/shmem.rs index 60b178262..31b854c5d 100644 --- a/kernel/src/transport/shmem.rs +++ b/kernel/src/transport/shmem.rs @@ -177,19 +177,25 @@ impl ShmemDevice { .expect("Failed to write potential shmem memory region addresses"); // Note: leaving this code as a comment as a way to test if all shmem is writeable. - + #[cfg(feature = "rackscale")] match crate::CMDLINE.get().map_or(Mode::Native, |c| c.mode) { Mode::Controller => { //let mut f= Frame::new(PAddr::from(0x70003f600000u64),2*1024*1024,12); //log::info!("zeroing out shmem"); //unsafe {f.zero()}; - log::info!("zeroing out shmem {:x} -- {:x}", mem_region.address, mem_region.address + mem_region.size); + log::info!( + "zeroing out shmem {:x} -- {:x}", + mem_region.address, + mem_region.address + mem_region.size + ); for offset in 1..512 { - let myptr: *mut u8 = - (KERNEL_BASE + mem_region.address + (offset*4096)) as *mut u8; + let myptr: *mut u8 = + (KERNEL_BASE + mem_region.address + (offset * 4096)) as *mut u8; log::info!("at addr {:x}", (myptr as u64 - KERNEL_BASE)); - unsafe { *myptr = 0x0; } + unsafe { + *myptr = 0x0; + } } /* let mymemslice = unsafe { @@ -197,13 +203,16 @@ impl ShmemDevice { (KERNEL_BASE + mem_region.address) as *mut u8, mem_region.size as usize, ) - }; + }; mymemslice.fill(0);*/ - log::info!("after out shmem {:x} -- {:x}", mem_region.address, mem_region.address + mem_region.size); + log::info!( + "after out shmem {:x} -- {:x}", + mem_region.address, + mem_region.address + mem_region.size + ); } _ => {} } - // Map the MSI-X table into kernel space kvspace @@ -356,7 +365,9 @@ pub(crate) fn create_shmem_transport(mid: MachineId) -> KResult= SHMEM_TRANSPORT_SIZE); let allocator = ShmemAllocator::new(base_addr.as_u64(), SHMEM_TRANSPORT_SIZE); - crate::CMDLINE.get().map(|c| log::info!("c.mode is {:?}", c.mode)); + crate::CMDLINE + .get() + .map(|c| log::info!("c.mode is {:?}", c.mode)); match crate::CMDLINE.get().map_or(Mode::Native, |c| c.mode) { Mode::Controller => { diff --git a/kernel/tests/s06_rackscale_tests.rs b/kernel/tests/s06_rackscale_tests.rs index 25e9518af..fcc8871c6 100644 --- a/kernel/tests/s06_rackscale_tests.rs +++ b/kernel/tests/s06_rackscale_tests.rs @@ -39,7 +39,7 @@ fn rackscale_userspace_smoke_test(transport: RackscaleTransport) { "test-upcall", "test-scheduler", "test-syscalls", - "test-dynamic-replication" + "test-dynamic-replication", ]) .set_rackscale(true) .release() diff --git a/kernel/tests/s10_benchmarks.rs b/kernel/tests/s10_benchmarks.rs index c39001adc..9e063edbc 100644 --- a/kernel/tests/s10_benchmarks.rs +++ b/kernel/tests/s10_benchmarks.rs @@ -493,7 +493,10 @@ fn s10_fxmark_benchmark() { if cfg!(feature = "smoke") && cores > 2 { cmdline = cmdline.nodes(std::cmp::max(machine.max_cores() / 16, 1)); } else { - cmdline = cmdline.nodes(std::cmp::max(machine.max_cores() / 16, machine.max_numa_nodes())); + cmdline = cmdline.nodes(std::cmp::max( + machine.max_cores() / 16, + machine.max_numa_nodes(), + )); } let mut output = String::new(); diff --git a/kernel/tests/s11_rackscale_benchmarks.rs b/kernel/tests/s11_rackscale_benchmarks.rs index 22130ea90..f863af806 100644 --- a/kernel/tests/s11_rackscale_benchmarks.rs +++ b/kernel/tests/s11_rackscale_benchmarks.rs @@ -1016,7 +1016,7 @@ fn s11_rackscale_memcached_dynrep_benchmark_internal() { } else { MemcachedInternalConfig { num_queries: 100_000_000, // TODO(rackscale): should be 100_000_000, - mem_size: 16, // TODO(rackscale): should be 32_000, + mem_size: 16, // TODO(rackscale): should be 32_000, } }; @@ -1032,7 +1032,9 @@ fn s11_rackscale_memcached_dynrep_benchmark_internal() { test.cores_per_client = 4; test.cmd = format!( r#"init=memcachedbench.bin initargs={} appcmd='--x-benchmark-mem={} --x-benchmark-queries={}'"#, - 2*4, config.mem_size, config.num_queries + 2 * 4, + config.mem_size, + config.num_queries ); test.arg = Some(config); test.run_rackscale(); diff --git a/kernel/testutils/src/rackscale_runner.rs b/kernel/testutils/src/rackscale_runner.rs index 3b8af826c..ee17ddf8d 100644 --- a/kernel/testutils/src/rackscale_runner.rs +++ b/kernel/testutils/src/rackscale_runner.rs @@ -513,7 +513,11 @@ impl RackscaleBench { // Find max cores, max numa, and max cores per node let machine = Machine::determine(); let max_cores = if is_smoke { 8 } else { machine.max_cores() }; - let max_numa = if is_smoke { 4 } else { machine.max_numa_nodes() }; + let max_numa = if is_smoke { + 4 + } else { + machine.max_numa_nodes() + }; let total_cores_per_node = core::cmp::max(1, max_cores / max_numa); // Do initial network configuration diff --git a/lib/kpi/src/lib.rs b/lib/kpi/src/lib.rs index 6ebd2000d..3397e0495 100644 --- a/lib/kpi/src/lib.rs +++ b/lib/kpi/src/lib.rs @@ -105,7 +105,7 @@ pub enum ProcessOperation { /// Release a physical memory page from the process. ReleasePhysical = 10, /// Set Replicas - SetReplicas = 11 + SetReplicas = 11, } impl ProcessOperation { diff --git a/lib/kpi/src/syscalls/process.rs b/lib/kpi/src/syscalls/process.rs index 4110a6500..6d2e2ab27 100644 --- a/lib/kpi/src/syscalls/process.rs +++ b/lib/kpi/src/syscalls/process.rs @@ -14,7 +14,7 @@ use x86::bits64::paging::VAddr; pub struct Process; impl Process { - pub fn set_replicas(add: bool, replica_idx: usize) -> Result<(), SystemCallError>{ + pub fn set_replicas(add: bool, replica_idx: usize) -> Result<(), SystemCallError> { let r = unsafe { syscall!( SystemCall::Process as u64, diff --git a/usr/init/src/init.rs b/usr/init/src/init.rs index 17cb706fd..b2d08bfa5 100644 --- a/usr/init/src/init.rs +++ b/usr/init/src/init.rs @@ -989,7 +989,7 @@ pub fn upcall_test() { pub fn dynamic_replication_test() { info!("dynamic_replication_test START"); - + vibrio::syscalls::Process::set_replicas(false, 0).expect("Can't remove replica 0."); //vibrio::syscalls::Process::set_replicas(false, 1).expect("Can't remove replica 1."); @@ -1079,7 +1079,7 @@ pub extern "C" fn _start() -> ! { #[cfg(feature = "test-core-alloc")] core_alloc_test(); - + #[cfg(feature = "test-dynamic-replication")] dynamic_replication_test(); From 652d5761941e17c5fa88400af04acb646da7a5ab Mon Sep 17 00:00:00 2001 From: Erika Hunhoff Date: Tue, 28 Nov 2023 21:53:46 +0000 Subject: [PATCH 18/32] Add logging, remove logging, fix kernel binary to specific address --- bootloader/src/kernel.rs | 35 ++++++++++++++++++++++- bootloader/src/main.rs | 2 +- bootloader/src/vspace.rs | 31 ++++++++++++++------ kernel/src/arch/x86_64/process.rs | 5 +++- kernel/src/memory/mod.rs | 2 +- kernel/src/nr.rs | 5 ++++ kernel/src/transport/shmem.rs | 2 ++ kernel/tests/s11_rackscale_benchmarks.rs | 10 +++---- lib/rpc/src/transport/shmem/queue_mpmc.rs | 11 +------ 9 files changed, 75 insertions(+), 28 deletions(-) diff --git a/bootloader/src/kernel.rs b/bootloader/src/kernel.rs index 18cbe4f0b..de5d746b0 100644 --- a/bootloader/src/kernel.rs +++ b/bootloader/src/kernel.rs @@ -5,7 +5,9 @@ use crate::alloc::vec::Vec; use bootloader_shared::TlsInfo; +use core::sync::atomic::{AtomicUsize, Ordering}; use elfloader::{self, ElfLoaderErr}; +use uefi::table::boot::AllocateType; use x86::bits64::paging::*; use crate::vspace::*; @@ -59,6 +61,11 @@ pub(crate) fn paddr_to_kernel_vaddr(paddr: PAddr) -> VAddr { /// displacement. pub const KERNEL_OFFSET: usize = 1 << 46; +static calls_to_alloc: AtomicUsize = AtomicUsize::new(0); + +const NUM_ALLOC_ADDRESSES: usize = 1; +static ALLOC_ADDRESSES: [usize; NUM_ALLOC_ADDRESSES] = [0x1dc0e000]; + /// This struct stores meta-data required to construct /// an address space for the kernel and relocate the /// kernel ELF binary into it. @@ -162,14 +169,40 @@ impl<'a> elfloader::ElfLoader for Kernel<'a> { is_page_aligned!(max_end), "max end is not aligned to page-size" ); + + let alloc_call_num = calls_to_alloc.fetch_add(1, Ordering::SeqCst); + let alloc_type = if alloc_call_num < NUM_ALLOC_ADDRESSES { + let alloc_addr = AllocateType::Address(ALLOC_ADDRESSES[alloc_call_num]); + info!( + "Attempting to allocate index {:?} at {:?}", + alloc_call_num, alloc_addr + ); + alloc_addr + } else { + warn!( + "Attempting to allocate index {:?} at AnyPage - this may not work for rackscale", + alloc_call_num + ); + AllocateType::AnyPages + }; + let pbase = VSpace::allocate_pages_aligned( + alloc_type, ((max_end - min_base) >> BASE_PAGE_SHIFT) as usize, uefi::table::boot::MemoryType(KERNEL_ELF), max_alignment, ); self.offset = VAddr::from(KERNEL_OFFSET + pbase.as_usize()); - info!("Kernel loaded at address: {:#x}", self.offset); + info!( + "Kernel loaded at address: {:#x} (paddr={:#x})", + self.offset, + pbase.as_usize() + ); + + if alloc_call_num < NUM_ALLOC_ADDRESSES { + assert!(pbase.as_usize() == ALLOC_ADDRESSES[alloc_call_num]); + } // Do the mappings: for (base, size, _alignment, action) in self.mapping.iter() { diff --git a/bootloader/src/main.rs b/bootloader/src/main.rs index 4e8eee06c..c28e64715 100644 --- a/bootloader/src/main.rs +++ b/bootloader/src/main.rs @@ -370,7 +370,7 @@ pub extern "C" fn uefi_start(handle: uefi::Handle, mut st: SystemTable) -> // Next create an address space for our kernel trace!("Allocate a PML4 (page-table root)"); - let pml4: PAddr = VSpace::allocate_one_page(); + let pml4: PAddr = VSpace::allocate_one_page(AllocateType::AnyPages); let pml4_table = unsafe { &mut *paddr_to_uefi_vaddr(pml4).as_mut_ptr::() }; let mut kernel = Kernel { diff --git a/bootloader/src/vspace.rs b/bootloader/src/vspace.rs index 688acd8ef..e37614816 100644 --- a/bootloader/src/vspace.rs +++ b/bootloader/src/vspace.rs @@ -348,14 +348,15 @@ impl<'a> VSpace<'a> { } /// A simple wrapper function for allocating just oen page. - pub(crate) fn allocate_one_page() -> PAddr { - let paddr = VSpace::allocate_pages(1, uefi::table::boot::MemoryType(KERNEL_PT)); + pub(crate) fn allocate_one_page(alloc_type: AllocateType) -> PAddr { + let paddr = VSpace::allocate_pages(alloc_type, 1, uefi::table::boot::MemoryType(KERNEL_PT)); trace!("allocate_one_page {:#x}", paddr); paddr } /// Does an allocation of physical memory where the base-address is a multiple of `align_to`. pub(crate) fn allocate_pages_aligned( + alloc_type: AllocateType, how_many: usize, typ: uefi::table::boot::MemoryType, align_to: u64, @@ -371,7 +372,7 @@ impl<'a> VSpace<'a> { assert!(actual_how_many >= how_many); // The region we allocated - let paddr = VSpace::allocate_pages(actual_how_many, typ); + let paddr = VSpace::allocate_pages(alloc_type, actual_how_many, typ); let end = paddr + (actual_how_many * BASE_PAGE_SIZE); // The region within the allocated one we actually want @@ -431,13 +432,17 @@ impl<'a> VSpace<'a> { /// /// Zeroes the memory we allocate (TODO: I'm not sure if this is already done by UEFI). /// Returns a `u64` containing the base to that. - pub(crate) fn allocate_pages(how_many: usize, typ: uefi::table::boot::MemoryType) -> PAddr { + pub(crate) fn allocate_pages( + alloc_type: AllocateType, + how_many: usize, + typ: uefi::table::boot::MemoryType, + ) -> PAddr { let st = system_table(); unsafe { match st .as_ref() .boot_services() - .allocate_pages(AllocateType::AnyPages, typ, how_many) + .allocate_pages(alloc_type, typ, how_many) { Ok(num) => { st.as_ref().boot_services().set_mem( @@ -453,17 +458,17 @@ impl<'a> VSpace<'a> { } fn new_pt(&mut self) -> PDEntry { - let paddr: PAddr = VSpace::allocate_one_page(); + let paddr: PAddr = VSpace::allocate_one_page(AllocateType::AnyPages); return PDEntry::new(paddr, PDFlags::P | PDFlags::RW); } fn new_pd(&mut self) -> PDPTEntry { - let paddr: PAddr = VSpace::allocate_one_page(); + let paddr: PAddr = VSpace::allocate_one_page(AllocateType::AnyPages); return PDPTEntry::new(paddr, PDPTFlags::P | PDPTFlags::RW); } fn new_pdpt(&mut self) -> PML4Entry { - let paddr: PAddr = VSpace::allocate_one_page(); + let paddr: PAddr = VSpace::allocate_one_page(AllocateType::AnyPages); return PML4Entry::new(paddr, PML4Flags::P | PML4Flags::RW); } @@ -521,10 +526,18 @@ impl<'a> VSpace<'a> { /// * The base should be a multiple of `BASE_PAGE_SIZE`. /// * The size should be a multiple of `BASE_PAGE_SIZE`. #[allow(unused)] - pub fn map(&mut self, base: VAddr, size: usize, rights: MapAction, palignment: u64) { + pub fn map( + &mut self, + alloc_type: AllocateType, + base: VAddr, + size: usize, + rights: MapAction, + palignment: u64, + ) { assert!(base.is_base_page_aligned(), "base is not page-aligned"); assert_eq!(size % BASE_PAGE_SIZE, 0, "size is not page-aligned"); let paddr = VSpace::allocate_pages_aligned( + alloc_type, size / BASE_PAGE_SIZE, uefi::table::boot::MemoryType(KERNEL_ELF), palignment, diff --git a/kernel/src/arch/x86_64/process.rs b/kernel/src/arch/x86_64/process.rs index a077c8667..540cc3b91 100644 --- a/kernel/src/arch/x86_64/process.rs +++ b/kernel/src/arch/x86_64/process.rs @@ -971,8 +971,12 @@ impl Executor for Ring3Executor { "Run on remote replica?" ); + // THIS IS THE PROBLEM + log::info!("Before maybe switch vspace 2"); self.maybe_switch_vspace(); + log::info!("After maybe switch vspace 2"); let entry_point = unsafe { (*self.vcpu_kernel()).resume_with_upcall }; + log::info!("Entry point is: {:?}", entry_point); if entry_point == INVALID_EXECUTOR_START { Ring3Resumer::new_start(self.entry_point, self.stack_top()) @@ -980,7 +984,6 @@ impl Executor for Ring3Executor { // This is similar to `upcall` as it starts executing the defined upcall // handler, but on the regular stack (for that dispatcher) and not // the upcall stack. It's used to add a new core to a process. - let entry_point = unsafe { (*self.vcpu_kernel()).resume_with_upcall }; trace!("Added core entry point is at {:#x}", entry_point); let cpu_ctl = self.vcpu_addr().as_u64(); diff --git a/kernel/src/memory/mod.rs b/kernel/src/memory/mod.rs index 38e38c7e0..dddebacd6 100644 --- a/kernel/src/memory/mod.rs +++ b/kernel/src/memory/mod.rs @@ -438,7 +438,7 @@ impl KernelAllocator { } frames } else { - log::info!("try-refill-shmem needed_base_pages={needed_base_pages} needed_large_pages={needed_large_pages} total_needed_base_pages={total_needed_base_pages} total_needed_large_pages={total_needed_large_pages}"); + log::trace!("try-refill-shmem needed_base_pages={needed_base_pages} needed_large_pages={needed_large_pages} total_needed_base_pages={total_needed_base_pages} total_needed_large_pages={total_needed_large_pages}"); rpc_get_shmem_frames(None, total_needed_large_pages)? }; diff --git a/kernel/src/nr.rs b/kernel/src/nr.rs index 263115cfa..ae4eb0b86 100644 --- a/kernel/src/nr.rs +++ b/kernel/src/nr.rs @@ -49,6 +49,7 @@ lazy_static! { let nr = Arc::try_new( NodeReplicated::new(num_replicas, |afc: AffinityChange| { + log::info!("Got AffinityChange 1: {:?}", afc); let pcm = kcb::per_core_mem(); //log::info!("Got AffinityChange: {:?}", afc); match afc { @@ -239,11 +240,15 @@ impl Dispatch for KernelNode { Op::AllocatePid => { // TODO(performance): O(n) scan probably not what we really // want, fine for now, MAX_PROCESSES is tiny + log::info!("in op alloc pid"); for i in 0..MAX_PROCESSES { if !self.process_map.contains_key(&i) { + log::info!("in op alloc pid 1"); self.process_map.try_reserve(1)?; + log::info!("in op alloc pid 2"); let r = self.process_map.insert(i, ()); assert!(r.is_none(), "!contains_key"); + log::info!("in op alloc pid 3"); return Ok(NodeResult::PidAllocated(i)); } } diff --git a/kernel/src/transport/shmem.rs b/kernel/src/transport/shmem.rs index 31b854c5d..72e78d898 100644 --- a/kernel/src/transport/shmem.rs +++ b/kernel/src/transport/shmem.rs @@ -178,6 +178,7 @@ impl ShmemDevice { // Note: leaving this code as a comment as a way to test if all shmem is writeable. + /* #[cfg(feature = "rackscale")] match crate::CMDLINE.get().map_or(Mode::Native, |c| c.mode) { Mode::Controller => { @@ -213,6 +214,7 @@ impl ShmemDevice { } _ => {} } + */ // Map the MSI-X table into kernel space kvspace diff --git a/kernel/tests/s11_rackscale_benchmarks.rs b/kernel/tests/s11_rackscale_benchmarks.rs index f863af806..40e11d702 100644 --- a/kernel/tests/s11_rackscale_benchmarks.rs +++ b/kernel/tests/s11_rackscale_benchmarks.rs @@ -897,9 +897,9 @@ fn s11_rackscale_memcached_dynrep_benchmark_internal() { .user_feature("rkapps:memcached-bench") .set_rackscale(true) .kernel_feature("pages-4k") - .kernel_feature("dynrep") .release() .build(); + //.kernel_feature("dynrep") fn controller_match_fn( proc: &mut PtySession, @@ -1010,13 +1010,13 @@ fn s11_rackscale_memcached_dynrep_benchmark_internal() { let config = if is_smoke { MemcachedInternalConfig { - num_queries: 100_000_000, + num_queries: 100_000, mem_size: 16, } } else { MemcachedInternalConfig { - num_queries: 100_000_000, // TODO(rackscale): should be 100_000_000, - mem_size: 16, // TODO(rackscale): should be 32_000, + num_queries: 100_000, // TODO(rackscale): should be 100_000_000, + mem_size: 16, // TODO(rackscale): should be 32_000, } }; @@ -1032,7 +1032,7 @@ fn s11_rackscale_memcached_dynrep_benchmark_internal() { test.cores_per_client = 4; test.cmd = format!( r#"init=memcachedbench.bin initargs={} appcmd='--x-benchmark-mem={} --x-benchmark-queries={}'"#, - 2 * 4, + test.num_clients * test.cores_per_client, config.mem_size, config.num_queries ); diff --git a/lib/rpc/src/transport/shmem/queue_mpmc.rs b/lib/rpc/src/transport/shmem/queue_mpmc.rs index 7febd1ca9..7a8fdb45a 100644 --- a/lib/rpc/src/transport/shmem/queue_mpmc.rs +++ b/lib/rpc/src/transport/shmem/queue_mpmc.rs @@ -154,24 +154,17 @@ impl<'a> State<'a> { // Calculate and check total data to push let push_data_len = values.iter().fold(0, |acc, x| acc + x.len()); assert!(push_data_len <= QUEUE_ENTRY_SIZE); - log::info!("Attempt to push {:?} bytes", push_data_len); if push_data_len == 0 { return true; } let mask = self.mask; - log::info!("before enq"); let mut pos = self.enqueue_pos(Relaxed); - log::info!("after enq"); loop { - log::info!("before node"); let node = &self.buffer[pos & mask]; - log::info!("after node"); let seq = (*node.get()).sequence.load(Acquire); - log::info!("after seq"); let diff: isize = seq as isize - pos as isize; - log::info!("seq {:#x} diff {:#x}", seq, diff); match diff { 0 => { @@ -184,8 +177,6 @@ impl<'a> State<'a> { // Copy each value into the queue let mut offset = 0; for d in values.iter() { - log::info!("d is at {:#x}", d.as_ptr() as usize); - (*node.get()).value[offset..offset + d.len()].copy_from_slice(d); offset += d.len(); } @@ -200,7 +191,7 @@ impl<'a> State<'a> { _ => pos = self.enqueue_pos(Relaxed), } } - log::info!("pushed {:?} bytes", push_data_len); + log::trace!("pushed {:?} bytes", push_data_len); true } From 95efcc4dcce8587a7dbb1731f614dff57145d545 Mon Sep 17 00:00:00 2001 From: Erika Hunhoff Date: Tue, 28 Nov 2023 22:18:39 +0000 Subject: [PATCH 19/32] Note in process.rs about maybe switch vspace --- kernel/src/arch/x86_64/process.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/src/arch/x86_64/process.rs b/kernel/src/arch/x86_64/process.rs index 540cc3b91..cbfce3790 100644 --- a/kernel/src/arch/x86_64/process.rs +++ b/kernel/src/arch/x86_64/process.rs @@ -1038,7 +1038,8 @@ impl Executor for Ring3Executor { "Switching from 0x{:x} to 0x{:x}", current_pml4, replica_pml4 ); - controlregs::cr3_write(self.pml4.into()); + // TODO: if the replica changes, maybe this sometimes needs to be self.pml4.into()? + controlregs::cr3_write(replica_pml4.into()); } } } From 6e6bfaa05bf317cca26717970348019ffe3daa27 Mon Sep 17 00:00:00 2001 From: Erika Hunhoff Date: Wed, 29 Nov 2023 02:12:28 +0000 Subject: [PATCH 20/32] Saving debugging state - may need to revert these changes later --- kernel/src/arch/x86_64/process.rs | 58 ++++++++++++++++- kernel/src/arch/x86_64/vspace/page_table.rs | 71 ++++++++++++++++++++- kernel/src/memory/mod.rs | 11 +++- kernel/src/nr.rs | 2 +- kernel/src/nrproc.rs | 2 +- kernel/tests/s11_rackscale_benchmarks.rs | 8 ++- 6 files changed, 143 insertions(+), 9 deletions(-) diff --git a/kernel/src/arch/x86_64/process.rs b/kernel/src/arch/x86_64/process.rs index cbfce3790..e3b3b5a40 100644 --- a/kernel/src/arch/x86_64/process.rs +++ b/kernel/src/arch/x86_64/process.rs @@ -1030,15 +1030,52 @@ impl Executor for Ring3Executor { } fn maybe_switch_vspace(&self) { + //use crate::arch::vspace::page_table::ReadOnlyPageTable; + let replica_pml4 = NrProcess::::ptroot(self.pid).expect("Can't read pml4"); unsafe { let current_pml4 = PAddr::from(controlregs::cr3()); + /* + let pml4_entry = paddr_to_kernel_vaddr(current_pml4).as_ptr::(); + let pml4_slice = core::slice::from_raw_parts(pml4_entry, 512); + let current_ropt = ReadOnlyPageTable { + pml4: pml4_slice.try_into().unwrap(), + }; + log::info!("Printing current read only page table"); + let current_walk = current_ropt.walk(); + log::info!("current len = {:?}", current_walk.len()); + + let pml4_entry_rep = paddr_to_kernel_vaddr(replica_pml4).as_ptr::(); + let pml4_slice_rep = core::slice::from_raw_parts(pml4_entry_rep, 512); + let rep_ropt = ReadOnlyPageTable { + pml4: pml4_slice_rep.try_into().unwrap(), + }; + log::info!("Printing replica read only page table"); + let rep_walk = rep_ropt.walk(); + + //assert!(current_walk == rep_walk); + let max = core::cmp::min(current_walk.len(), rep_walk.len()); + log::info!("current len = {:?} rep len = {:?}", current_walk.len(), rep_walk.len()); + for i in 0..max { + if current_walk[i] != rep_walk[i] { + panic!("index {:?} current={:#x} replica={:#x}", i, current_walk[i], rep_walk[i]); + } + } + */ + + /* + for i in 0..512 { + if pml4_slice[i] != pml4_slice_rep[i] { + log::info!("index={:?} current={:?} replica={:?}", i, pml4_slice[i], pml4_slice_rep[i]); + } + } + */ + if current_pml4 != replica_pml4 { info!( "Switching from 0x{:x} to 0x{:x}", current_pml4, replica_pml4 ); - // TODO: if the replica changes, maybe this sometimes needs to be self.pml4.into()? controlregs::cr3_write(replica_pml4.into()); } } @@ -1427,6 +1464,8 @@ impl Process for Ring3Process { module_name: String, writeable_sections: Vec, ) -> Result<(), KError> { + info!("IN PROCESS LOAD"); + self.pid = pid; // TODO(error-handling): properly unwind on error self.writeable_sections.clear(); @@ -1466,11 +1505,26 @@ impl Process for Ring3Process { // TODO(broken): Big (>= 2 MiB) allocations should be inserted here too // TODO(ugly): Find a better way to express this mess let kvspace = super::vspace::INITIAL_VSPACE.lock(); + + use crate::arch::vspace::page_table::ReadOnlyPageTable; + let pt = ReadOnlyPageTable { + pml4: &kvspace.pml4, + }; + let walk = pt.walk(); + info!("Walk is len: {:?}", walk.len()); + for (addr_idx, frame, action) in walk { + self.vspace + .map_frame(addr_idx, frame, action) + .expect("failed map"); + } + + /* for i in 128..=510 { let kernel_pml_entry = kvspace.pml4[i]; - trace!("Patched in kernel mappings at {:?}", kernel_pml_entry); + info!("Patched in kernel mappings at {:?}", kernel_pml_entry); self.vspace.page_table.pml4[i] = kernel_pml_entry; } + */ Ok(()) } diff --git a/kernel/src/arch/x86_64/vspace/page_table.rs b/kernel/src/arch/x86_64/vspace/page_table.rs index 824f60da9..d05de6d12 100644 --- a/kernel/src/arch/x86_64/vspace/page_table.rs +++ b/kernel/src/arch/x86_64/vspace/page_table.rs @@ -978,9 +978,11 @@ impl PageTable { } pub(crate) struct ReadOnlyPageTable<'a> { - pml4: &'a PML4, + pub pml4: &'a PML4, } +use alloc::vec::Vec; + impl<'a> ReadOnlyPageTable<'a> { /// Get read-only access to the current page-table. /// @@ -1021,6 +1023,73 @@ impl<'a> ReadOnlyPageTable<'a> { assert_ne!(entry.address(), PAddr::zero()); unsafe { transmute::(paddr_to_kernel_vaddr(entry.address())) } } + + pub fn walk(&self) -> Vec<(VAddr, Frame, MapAction)> { + log::info!("calling walk in PageTable"); + let mut my_walk = Vec::with_capacity(1024); + + // Do a DFS and free all page-table memory allocated below kernel-base, + // don't free the mapped frames -- we return them later through NR + for pml4_idx in 128..PAGE_SIZE_ENTRIES { + if self.pml4[pml4_idx].is_present() { + for pdpt_idx in 0..PAGE_SIZE_ENTRIES { + let pdpt = self.get_pdpt(self.pml4[pml4_idx]); + if pdpt[pdpt_idx].is_present() { + if !pdpt[pdpt_idx].is_page() { + for pd_idx in 0..PAGE_SIZE_ENTRIES { + let pd = self.get_pd(pdpt[pdpt_idx]); + if pd[pd_idx].is_present() { + if !pd[pd_idx].is_page() { + for pt_idx in 0..PAGE_SIZE_ENTRIES { + let pt = self.get_pt(pd[pd_idx]); + if pt[pt_idx].is_present() { + let addr = pt[pt_idx].address(); + let flags = pt[pt_idx].flags(); + let frame = Frame::new(addr, BASE_PAGE_SIZE, 0); + let vaddr_pos: VAddr = VAddr::from( + PML4_SLOT_SIZE * pml4_idx + + HUGE_PAGE_SIZE * pdpt_idx + + LARGE_PAGE_SIZE * pd_idx + + pt_idx * BASE_PAGE_SIZE, + ); + //let vaddr = paddr_to_kernel_vaddr(addr); + //log::info!("4K mapping addr={:?} vaddr={:?}", addr, vaddr); + my_walk.push((vaddr_pos, frame, flags.into())); + } + } + } else { + // is page + let addr = pd[pd_idx].address(); + let flags = pd[pd_idx].flags(); + let frame = Frame::new(addr, LARGE_PAGE_SIZE, 0); + let vaddr_pos: VAddr = VAddr::from( + PML4_SLOT_SIZE * pml4_idx + + HUGE_PAGE_SIZE * pdpt_idx + + LARGE_PAGE_SIZE * pd_idx, + ); + //let vaddr = paddr_to_kernel_vaddr(addr); + //log::info!("2 MB mapping addr={:?} vaddr={:?}", addr, vaddr); + my_walk.push((vaddr_pos, frame, flags.into())); + } + } + } + } else { + // Encountered Page is a 1 GiB mapping, nothing to free + let addr = pdpt[pdpt_idx].address(); + let flags = pdpt[pdpt_idx].flags(); + let frame = Frame::new(addr, HUGE_PAGE_SIZE, 0); // TODO: size is wrong + let vaddr_pos: VAddr = + VAddr::from(PML4_SLOT_SIZE * pml4_idx + HUGE_PAGE_SIZE * pdpt_idx); + //let vaddr = paddr_to_kernel_vaddr(addr); + //log::info!("1 GiB mapping addr={:?} vaddr={:?}", addr, vaddr); + my_walk.push((vaddr_pos, frame, flags.into())); + } + } + } + } + } + my_walk + } } impl<'a> AddressSpace for ReadOnlyPageTable<'a> { diff --git a/kernel/src/memory/mod.rs b/kernel/src/memory/mod.rs index dddebacd6..a0bb4ed54 100644 --- a/kernel/src/memory/mod.rs +++ b/kernel/src/memory/mod.rs @@ -761,6 +761,8 @@ unsafe impl GlobalAlloc for KernelAllocator { } unsafe fn realloc(&self, ptr: *mut u8, layout: Layout, new_size: usize) -> *mut u8 { + #[cfg(feature = "rackscale")] + let mut drop = true; try_per_core_mem().map_or_else( || { unreachable!("Trying to reallocate {:p} {:?} without a KCB.", ptr, layout); @@ -788,7 +790,8 @@ unsafe impl GlobalAlloc for KernelAllocator { } else if is_shmem_affinity(affinity) && !is_shmem_addr_with_affinity(ptr as u64, affinity, true) { // TODO(rackscale): should switch to non-shmem affinity for alloc below. // TODO(rackscale): check if shmem is a match for id? - //panic!("Trying to realloc shmem to wrong or non- shmem allocator"); + warn!("Trying to realloc shmem to wrong or non- shmem allocator"); + drop = false; } else if !is_shmem_affinity(affinity) && is_shmem_addr(ptr as u64, false, true) { // TODO(rackscale): should switch to use shmem affinity for alloc below. // TODO(rackscale): check if shmem is a match for id? @@ -806,6 +809,12 @@ unsafe impl GlobalAlloc for KernelAllocator { new_ptr, core::cmp::min(layout.size(), new_size), ); + #[cfg(feature = "rackscale")] + if drop { + self.dealloc(ptr, layout); + } + + #[cfg(not(feature = "rackscale"))] self.dealloc(ptr, layout); } new_ptr diff --git a/kernel/src/nr.rs b/kernel/src/nr.rs index ae4eb0b86..7d8f3df19 100644 --- a/kernel/src/nr.rs +++ b/kernel/src/nr.rs @@ -49,7 +49,7 @@ lazy_static! { let nr = Arc::try_new( NodeReplicated::new(num_replicas, |afc: AffinityChange| { - log::info!("Got AffinityChange 1: {:?}", afc); + log::trace!("Got AffinityChange 1: {:?}", afc); let pcm = kcb::per_core_mem(); //log::info!("Got AffinityChange: {:?}", afc); match afc { diff --git a/kernel/src/nrproc.rs b/kernel/src/nrproc.rs index 8c4c9e4a2..ab366a514 100644 --- a/kernel/src/nrproc.rs +++ b/kernel/src/nrproc.rs @@ -33,7 +33,7 @@ pub(crate) fn register_thread_with_process_replicas() { #[cfg(not(feature = "rackscale"))] let node = *crate::environment::NODE_ID; #[cfg(feature = "rackscale")] - let node = 0; //*crate::environment::MACHINE_ID + let node = *crate::environment::MACHINE_ID; // node = 0; debug_assert!(PROCESS_TABLE.len() > node, "Invalid Node ID"); diff --git a/kernel/tests/s11_rackscale_benchmarks.rs b/kernel/tests/s11_rackscale_benchmarks.rs index 40e11d702..9f52e2671 100644 --- a/kernel/tests/s11_rackscale_benchmarks.rs +++ b/kernel/tests/s11_rackscale_benchmarks.rs @@ -1011,7 +1011,7 @@ fn s11_rackscale_memcached_dynrep_benchmark_internal() { let config = if is_smoke { MemcachedInternalConfig { num_queries: 100_000, - mem_size: 16, + mem_size: 16, //4 * 1024, } } else { MemcachedInternalConfig { @@ -1023,12 +1023,14 @@ fn s11_rackscale_memcached_dynrep_benchmark_internal() { let mut test = RackscaleRun::new("userspace-smp".to_string(), built); test.controller_match_fn = controller_match_fn; test.transport = transport; - test.shmem_size *= 2; + test.controller_timeout *= 2; //*= 8; + //test.client_timeout *= 8; + test.shmem_size = 1024 * 64; test.use_affinity_shmem = cfg!(feature = "affinity-shmem"); test.use_qemu_huge_pages = cfg!(feature = "affinity-shmem"); test.file_name = file_name.to_string(); test.run_dhcpd_for_baseline = true; - test.num_clients = 2; + test.num_clients = 3; test.cores_per_client = 4; test.cmd = format!( r#"init=memcachedbench.bin initargs={} appcmd='--x-benchmark-mem={} --x-benchmark-queries={}'"#, From 84dc6291bea4c88bdf7c0b58e607839897212431 Mon Sep 17 00:00:00 2001 From: Erika Hunhoff Date: Wed, 29 Nov 2023 18:07:30 +0000 Subject: [PATCH 21/32] pushing current debugging state --- kernel/src/arch/x86_64/irq.rs | 27 ++++++++++++++++-------- kernel/src/memory/mod.rs | 3 ++- kernel/tests/s11_rackscale_benchmarks.rs | 5 +++-- kernel/testutils/src/rackscale_runner.rs | 4 +++- 4 files changed, 26 insertions(+), 13 deletions(-) diff --git a/kernel/src/arch/x86_64/irq.rs b/kernel/src/arch/x86_64/irq.rs index 3fe5c75fc..23db52a5b 100644 --- a/kernel/src/arch/x86_64/irq.rs +++ b/kernel/src/arch/x86_64/irq.rs @@ -533,36 +533,45 @@ unsafe fn timer_handler(_a: &ExceptionArguments) { use crate::arch::process::current_pid; let pid = current_pid().expect("dont have a pid?"); - if rawtime::BOOT_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(10) + if rawtime::BOOT_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(33) && *REPLICA_STATE == 0 { - info!("got a timer after 10s, remove rid 1"); + warn!("got a timer after 10s, remove rid 1"); let handles = nrproc::NrProcess::::remove_replica(pid, 1).expect("removed"); #[cfg(not(feature = "rackscale"))] super::tlb::shootdown(handles[0].clone()); unsafe { *REPLICA_STATE.as_mut_ptr() = 1 }; } - if rawtime::BOOT_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(20) + if rawtime::BOOT_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(36) && *REPLICA_STATE == 1 { - info!("got a timer after 20s, add rid 1"); - let handles = nrproc::NrProcess::::add_replica(pid, 1).expect("added"); + warn!("got a timer after 20s, add rid 1"); + let handles = + nrproc::NrProcess::::remove_replica(pid, 2).expect("removed"); #[cfg(not(feature = "rackscale"))] super::tlb::shootdown(handles[0].clone()); unsafe { *REPLICA_STATE.as_mut_ptr() = 2 }; } - if rawtime::BOOT_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(30) + if rawtime::BOOT_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(39) && *REPLICA_STATE == 2 { - info!("got a timer after 30s"); + warn!("got a timer after 30s"); unsafe { *REPLICA_STATE.as_mut_ptr() = 3 }; + + let handles = nrproc::NrProcess::::add_replica(pid, 1).expect("added"); + #[cfg(not(feature = "rackscale"))] + super::tlb::shootdown(handles[0].clone()); } - if rawtime::BOOT_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(40) + if rawtime::BOOT_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(32) && *REPLICA_STATE == 3 { - info!("got a timer after 40s"); + warn!("got a timer after 40s"); unsafe { *REPLICA_STATE.as_mut_ptr() = 4 }; + + let handles = nrproc::NrProcess::::add_replica(pid, 2).expect("added"); + #[cfg(not(feature = "rackscale"))] + super::tlb::shootdown(handles[0].clone()); } } let kcb = get_kcb(); diff --git a/kernel/src/memory/mod.rs b/kernel/src/memory/mod.rs index a0bb4ed54..5869868c0 100644 --- a/kernel/src/memory/mod.rs +++ b/kernel/src/memory/mod.rs @@ -795,7 +795,8 @@ unsafe impl GlobalAlloc for KernelAllocator { } else if !is_shmem_affinity(affinity) && is_shmem_addr(ptr as u64, false, true) { // TODO(rackscale): should switch to use shmem affinity for alloc below. // TODO(rackscale): check if shmem is a match for id? - panic!("Trying to realloc shmem using non-shmem allocator"); + warn!("Trying to realloc shmem using non-shmem allocator"); + drop = false; } } } diff --git a/kernel/tests/s11_rackscale_benchmarks.rs b/kernel/tests/s11_rackscale_benchmarks.rs index 9f52e2671..946f71f88 100644 --- a/kernel/tests/s11_rackscale_benchmarks.rs +++ b/kernel/tests/s11_rackscale_benchmarks.rs @@ -897,6 +897,7 @@ fn s11_rackscale_memcached_dynrep_benchmark_internal() { .user_feature("rkapps:memcached-bench") .set_rackscale(true) .kernel_feature("pages-4k") + .kernel_feature("dynrep") .release() .build(); //.kernel_feature("dynrep") @@ -1010,7 +1011,7 @@ fn s11_rackscale_memcached_dynrep_benchmark_internal() { let config = if is_smoke { MemcachedInternalConfig { - num_queries: 100_000, + num_queries: 100_000_000, mem_size: 16, //4 * 1024, } } else { @@ -1024,7 +1025,7 @@ fn s11_rackscale_memcached_dynrep_benchmark_internal() { test.controller_match_fn = controller_match_fn; test.transport = transport; test.controller_timeout *= 2; //*= 8; - //test.client_timeout *= 8; + test.client_timeout *= 2; test.shmem_size = 1024 * 64; test.use_affinity_shmem = cfg!(feature = "affinity-shmem"); test.use_qemu_huge_pages = cfg!(feature = "affinity-shmem"); diff --git a/kernel/testutils/src/rackscale_runner.rs b/kernel/testutils/src/rackscale_runner.rs index ee17ddf8d..c5de20c53 100644 --- a/kernel/testutils/src/rackscale_runner.rs +++ b/kernel/testutils/src/rackscale_runner.rs @@ -397,11 +397,12 @@ impl RackscaleRun { let _ignore = dcm.process.kill(SIGKILL); // If there's been an error, print everything - if controller_ret.is_err() || (&client_rets).into_iter().any(|ret| ret.is_err()) { + if true { //controller_ret.is_err() || (&client_rets).into_iter().any(|ret| ret.is_err()) { let outputs = all_outputs.lock().expect("Failed to get output lock"); for (name, output) in outputs.iter() { log_qemu_out_with_name(None, name.to_string(), output.to_string()); } + /* if controller_ret.is_err() { let dcm_log = dcm.exp_eof(); if dcm_log.is_ok() { @@ -410,6 +411,7 @@ impl RackscaleRun { eprintln!("Failed to print DCM log."); } } + */ } for client_ret in client_rets { From 02b71d823dc5b1feb66c57b9f7cb67bec0208864 Mon Sep 17 00:00:00 2001 From: Erika Hunhoff Date: Wed, 29 Nov 2023 21:11:04 +0000 Subject: [PATCH 22/32] Updated parsing for per-thread throughput --- kernel/tests/s11_rackscale_benchmarks.rs | 43 ++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/kernel/tests/s11_rackscale_benchmarks.rs b/kernel/tests/s11_rackscale_benchmarks.rs index 946f71f88..28db715c3 100644 --- a/kernel/tests/s11_rackscale_benchmarks.rs +++ b/kernel/tests/s11_rackscale_benchmarks.rs @@ -905,7 +905,7 @@ fn s11_rackscale_memcached_dynrep_benchmark_internal() { fn controller_match_fn( proc: &mut PtySession, output: &mut String, - _cores_per_client: usize, + cores_per_client: usize, num_clients: usize, file_name: &str, is_baseline: bool, @@ -948,6 +948,30 @@ fn s11_rackscale_memcached_dynrep_benchmark_internal() { *output += prev.as_str(); *output += matched.as_str(); + let mut thread_results = Vec::new(); + let mut num_not_finished = num_clients * cores_per_client; + while num_not_finished > 0 { + let (prev, matched) = proc.exp_regex(r#"thread.(\d+).*\r\r"#)?; + *output += prev.as_str(); + *output += matched.as_str(); + + if matched.contains("done") { + println!("> Thread done: {:?}", matched); + num_not_finished -= 1; + } else if matched.contains("executed") { + let matched = matched.replace("thread.", ""); + let tokens = matched.split(" ").collect::>(); + let thread_id = tokens[0].to_string(); + let queries = tokens[2].to_string(); + let time = tokens[5].to_string(); + println!( + "> thread {:?} performed {:?} queries in {:?} us", + thread_id, queries, time + ); + thread_results.push((thread_id, queries, time)); + } + } + // benchmark took 129 seconds let (prev, matched) = proc.exp_regex(r#"benchmark took (\d+) ms"#)?; println!("> {}", matched); @@ -986,7 +1010,7 @@ fn s11_rackscale_memcached_dynrep_benchmark_internal() { .open(file_name) .expect("Can't open file"); if write_headers { - let row = "git_rev,benchmark,nthreads,mem,queries,time,thpt,num_clients,num_replicas\n"; + let row = "git_rev,benchmark,nthreads,mem,queries,time,thpt,num_clients,num_replicas,thead_num\n"; let r = csv_file.write(row.as_bytes()); assert!(r.is_ok()); } @@ -996,14 +1020,27 @@ fn s11_rackscale_memcached_dynrep_benchmark_internal() { let r = csv_file.write(format!("{},", env!("GIT_HASH")).as_bytes()); assert!(r.is_ok()); let out = format!( - "memcached,{},{},{},{},{},{},{}", + "memcached,{},{},{},{},{},{},{},aggregate", b_threads, b_mem, b_queries, b_time, b_thpt, actual_num_clients, num_clients ); + let r = csv_file.write(out.as_bytes()); assert!(r.is_ok()); let r = csv_file.write("\n".as_bytes()); assert!(r.is_ok()); + for (thread_id, queries, time) in thread_results { + let out = format!( + "memcached,{},{},{},{},,{},{},{}", + b_threads, b_mem, queries, time, actual_num_clients, num_clients, thread_id + ); + + let r = csv_file.write(out.as_bytes()); + assert!(r.is_ok()); + let r = csv_file.write("\n".as_bytes()); + assert!(r.is_ok()); + } + println!("> {}", output); Ok(()) From 57187f9203c715d1fafb7e070f4175b48c8557dc Mon Sep 17 00:00:00 2001 From: Erika Hunhoff Date: Wed, 29 Nov 2023 21:29:59 +0000 Subject: [PATCH 23/32] Update librettos commit --- usr/rkapps/build.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/usr/rkapps/build.rs b/usr/rkapps/build.rs index 064c22d36..2e3dd2dbd 100644 --- a/usr/rkapps/build.rs +++ b/usr/rkapps/build.rs @@ -138,11 +138,11 @@ fn main() { .unwrap(); println!( - "CHECKOUT 1f36e49a3c5fc529cd80278a1bd73b9941c813e6 {:?}", + "CHECKOUT eece690294fbfed418f43034b5dc77290865f8cf {:?}", out_dir ); Command::new("git") - .args(&["checkout", "1f36e49a3c5fc529cd80278a1bd73b9941c813e6"]) + .args(&["checkout", "eece690294fbfed418f43034b5dc77290865f8cf"]) .current_dir(&Path::new(&out_dir)) .status() .unwrap(); From f81763ef19afad030da046bfd0656bc86d003366 Mon Sep 17 00:00:00 2001 From: Gerd Zellweger Date: Wed, 29 Nov 2023 22:11:47 -0800 Subject: [PATCH 24/32] Modifications for dynrep+new bios code. Signed-off-by: Gerd Zellweger --- kernel/src/arch/x86_64/irq.rs | 63 +++++++++++++++------ kernel/src/arch/x86_64/process.rs | 50 +++------------- kernel/src/arch/x86_64/syscall.rs | 26 +++++++-- kernel/src/arch/x86_64/tlb.rs | 11 ++++ kernel/src/arch/x86_64/vspace/page_table.rs | 8 ++- kernel/src/nrproc.rs | 4 +- kernel/src/process.rs | 2 +- lib/node-replication2 | 2 +- lib/vibrio/src/rumprt/crt/mod.rs | 7 +++ 9 files changed, 104 insertions(+), 69 deletions(-) diff --git a/kernel/src/arch/x86_64/irq.rs b/kernel/src/arch/x86_64/irq.rs index 23db52a5b..4318710c6 100644 --- a/kernel/src/arch/x86_64/irq.rs +++ b/kernel/src/arch/x86_64/irq.rs @@ -509,6 +509,14 @@ unsafe fn bkp_handler(a: &ExceptionArguments) { #[thread_local] pub(crate) static REPLICA_STATE: Lazy = Lazy::new(|| 0); + +pub static DYNREP_ENABLED: core::sync::atomic::AtomicBool = core::sync::atomic::AtomicBool::new(false); + +use lazy_static::lazy_static; +lazy_static! { + pub static ref DYNREP_TIME_ANCHOR: rawtime::Instant = rawtime::Instant::now(); +} + /// Handler for the timer exception. /// /// We currently use it to periodically make sure that a replica @@ -529,51 +537,74 @@ unsafe fn timer_handler(_a: &ExceptionArguments) { } #[cfg(feature = "dynrep")] - if *crate::environment::MT_ID == 4 { + if *crate::environment::MT_ID == 0 && DYNREP_ENABLED.load(core::sync::atomic::Ordering::SeqCst) { use crate::arch::process::current_pid; let pid = current_pid().expect("dont have a pid?"); - if rawtime::BOOT_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(33) + if DYNREP_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(0) && *REPLICA_STATE == 0 { - warn!("got a timer after 10s, remove rid 1"); + warn!("PHASE 1: remove rid 1"); + let handles = nrproc::NrProcess::::remove_replica(pid, 1).expect("removed"); + #[cfg(not(feature = "rackscale"))] super::tlb::shootdown(handles[0].clone()); + #[cfg(feature = "rackscale")] + super::tlb::remote_shootdown(handles); + unsafe { *REPLICA_STATE.as_mut_ptr() = 1 }; } - if rawtime::BOOT_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(36) + if DYNREP_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(3) && *REPLICA_STATE == 1 { - warn!("got a timer after 20s, add rid 1"); + warn!("PHASE 2: remove rid 2"); + let handles = - nrproc::NrProcess::::remove_replica(pid, 2).expect("removed"); + nrproc::NrProcess::::remove_replica(pid, 2).expect("removed"); + #[cfg(not(feature = "rackscale"))] super::tlb::shootdown(handles[0].clone()); + #[cfg(feature = "rackscale")] + super::tlb::remote_shootdown(handles); + unsafe { *REPLICA_STATE.as_mut_ptr() = 2 }; } - if rawtime::BOOT_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(39) + if DYNREP_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(6) && *REPLICA_STATE == 2 { - warn!("got a timer after 30s"); - unsafe { *REPLICA_STATE.as_mut_ptr() = 3 }; + warn!("PHASE 3: add rid 1"); let handles = nrproc::NrProcess::::add_replica(pid, 1).expect("added"); + #[cfg(not(feature = "rackscale"))] super::tlb::shootdown(handles[0].clone()); + #[cfg(feature = "rackscale")] + super::tlb::remote_shootdown(handles); + + unsafe { *REPLICA_STATE.as_mut_ptr() = 3 }; + } - if rawtime::BOOT_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(32) + if DYNREP_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(9) && *REPLICA_STATE == 3 { - warn!("got a timer after 40s"); - unsafe { *REPLICA_STATE.as_mut_ptr() = 4 }; + warn!("PHASE 4: add rid 2"); let handles = nrproc::NrProcess::::add_replica(pid, 2).expect("added"); + #[cfg(not(feature = "rackscale"))] super::tlb::shootdown(handles[0].clone()); + #[cfg(feature = "rackscale")] + super::tlb::remote_shootdown(handles); + + unsafe { *REPLICA_STATE.as_mut_ptr() = 4 }; } } + else { + //info!("dynrep not enabled MT_ID={} DYNREP_ENABLED.load(core::sync::atomic::Ordering::SeqCst)={}", *crate::environment::MT_ID, DYNREP_ENABLED.load(core::sync::atomic::Ordering::SeqCst)); + } + let kcb = get_kcb(); if super::process::has_executor() { @@ -798,10 +829,10 @@ pub extern "C" fn handle_generic_exception(a: ExceptionArguments) -> ! { // Return immediately TLB_TIME.update(|t| t + x86::time::rdtsc() - start); - let mut pborrow = super::process::CURRENT_EXECUTOR.borrow_mut(); - let p = pborrow.as_ref().unwrap(); - p.maybe_switch_vspace(); - drop(pborrow); + //let mut pborrow = super::process::CURRENT_EXECUTOR.borrow_mut(); + //let p = pborrow.as_ref().unwrap(); + //p.maybe_switch_vspace(); + //drop(pborrow); kcb_iret_handle(kcb).resume() } else { diff --git a/kernel/src/arch/x86_64/process.rs b/kernel/src/arch/x86_64/process.rs index e3b3b5a40..7d0f66a9b 100644 --- a/kernel/src/arch/x86_64/process.rs +++ b/kernel/src/arch/x86_64/process.rs @@ -76,7 +76,7 @@ lazy_static! { pub(crate) static ref PROCESS_TABLE: ArrayVec>>>, MAX_PROCESSES> = { use crate::memory::shmem_affinity::mid_to_shmem_affinity; use crate::arch::kcb::per_core_mem; - use crate::environment::NUM_MACHINES; + //use crate::environment::NUM_MACHINES; if !crate::CMDLINE .get() @@ -107,7 +107,7 @@ lazy_static! { // Want at least one replica... let num_replicas = - NonZeroUsize::new(core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes() * (*NUM_MACHINES))).unwrap(); + NonZeroUsize::new(3).unwrap(); let mut processes = ArrayVec::new(); for _pid in 0..MAX_PROCESSES { @@ -191,6 +191,7 @@ lazy_static! { } #[cfg(not(feature = "rackscale"))] +#[allow(unused_variables)] fn create_process_table( ) -> ArrayVec>>>, MAX_PROCESSES> { // Want at least one replica... @@ -213,6 +214,7 @@ fn create_process_table( match afc { AffinityChange::Replica(r) => { let affinity = { pcm.physical_memory.borrow().affinity }; + #[cfg(feature = "rackscale")] pcm.set_mem_affinity( crate::memory::shmem_affinity::mid_to_shmem_affinity(r), ) @@ -972,9 +974,7 @@ impl Executor for Ring3Executor { ); // THIS IS THE PROBLEM - log::info!("Before maybe switch vspace 2"); self.maybe_switch_vspace(); - log::info!("After maybe switch vspace 2"); let entry_point = unsafe { (*self.vcpu_kernel()).resume_with_upcall }; log::info!("Entry point is: {:?}", entry_point); @@ -1035,48 +1035,16 @@ impl Executor for Ring3Executor { let replica_pml4 = NrProcess::::ptroot(self.pid).expect("Can't read pml4"); unsafe { let current_pml4 = PAddr::from(controlregs::cr3()); - /* - let pml4_entry = paddr_to_kernel_vaddr(current_pml4).as_ptr::(); - let pml4_slice = core::slice::from_raw_parts(pml4_entry, 512); - let current_ropt = ReadOnlyPageTable { - pml4: pml4_slice.try_into().unwrap(), - }; - log::info!("Printing current read only page table"); - let current_walk = current_ropt.walk(); - log::info!("current len = {:?}", current_walk.len()); - - let pml4_entry_rep = paddr_to_kernel_vaddr(replica_pml4).as_ptr::(); - let pml4_slice_rep = core::slice::from_raw_parts(pml4_entry_rep, 512); - let rep_ropt = ReadOnlyPageTable { - pml4: pml4_slice_rep.try_into().unwrap(), - }; - log::info!("Printing replica read only page table"); - let rep_walk = rep_ropt.walk(); - - //assert!(current_walk == rep_walk); - let max = core::cmp::min(current_walk.len(), rep_walk.len()); - log::info!("current len = {:?} rep len = {:?}", current_walk.len(), rep_walk.len()); - for i in 0..max { - if current_walk[i] != rep_walk[i] { - panic!("index {:?} current={:#x} replica={:#x}", i, current_walk[i], rep_walk[i]); - } - } - */ - - /* - for i in 0..512 { - if pml4_slice[i] != pml4_slice_rep[i] { - log::info!("index={:?} current={:?} replica={:?}", i, pml4_slice[i], pml4_slice_rep[i]); - } - } - */ - if current_pml4 != replica_pml4 { - info!( + debug!( "Switching from 0x{:x} to 0x{:x}", current_pml4, replica_pml4 ); controlregs::cr3_write(replica_pml4.into()); + debug!("switched"); + } + else { + debug!("not switched, the same"); } } } diff --git a/kernel/src/arch/x86_64/syscall.rs b/kernel/src/arch/x86_64/syscall.rs index d73cd398f..0137b0610 100644 --- a/kernel/src/arch/x86_64/syscall.rs +++ b/kernel/src/arch/x86_64/syscall.rs @@ -294,18 +294,34 @@ impl ProcessDispatch for T { fn set_replicas(&self, add: u64, rid: u64) -> crate::error::KResult<(u64, u64)> { let pid = current_pid()?; + + if rid == 99 { + log::info!("call from memcached"); + lazy_static::initialize(&super::irq::DYNREP_TIME_ANCHOR); + super::irq::DYNREP_ENABLED.store(true, core::sync::atomic::Ordering::SeqCst); + info!("set_replicas: MT_ID={} DYNREP_ENABLED={} DYNREP_TIME_ANCHOR={}", + *crate::environment::MT_ID, + super::irq::DYNREP_ENABLED.load(core::sync::atomic::Ordering::SeqCst), + super::irq::DYNREP_TIME_ANCHOR.elapsed().as_nanos(), + ); + + return Ok((0, 0)); + } + let handles = if add > 0 { NrProcess::::add_replica(pid, rid as usize).expect("add_replica") } else { NrProcess::::remove_replica(pid, rid as usize).expect("remove_replica") }; - #[cfg(feature = "rackscale")] - super::tlb::remote_shootdown(handles); + if handles.len() > 0 { + #[cfg(feature = "rackscale")] + super::tlb::remote_shootdown(handles); - // There will only be one handle in non-rackscale build - #[cfg(not(feature = "rackscale"))] - super::tlb::shootdown(handles[0].clone()); + // There will only be one handle in non-rackscale build + #[cfg(not(feature = "rackscale"))] + super::tlb::shootdown(handles[0].clone()); + } Ok((0, 0)) } diff --git a/kernel/src/arch/x86_64/tlb.rs b/kernel/src/arch/x86_64/tlb.rs index 3a6e40ff9..fd5595be1 100644 --- a/kernel/src/arch/x86_64/tlb.rs +++ b/kernel/src/arch/x86_64/tlb.rs @@ -172,6 +172,10 @@ impl Shootdown { if self.vregion.start == 0u64 && self.vregion.end == 0u64 { log::info!("got special unmap for 0..0, skipping TLB flush"); + use crate::process::Executor; + let pborrow = super::process::CURRENT_EXECUTOR.borrow_mut(); + let p = pborrow.as_ref().unwrap(); + p.maybe_switch_vspace(); return; } @@ -237,6 +241,13 @@ pub(crate) fn remote_dequeue(mid: kpi::system::MachineId) { // Process locally, then mark as complete shootdown(h); s.acknowledge(); + + //use crate::process::Executor; + //let pborrow = super::process::CURRENT_EXECUTOR.borrow_mut(); + //let p = pborrow.as_ref().unwrap(); + //p.maybe_switch_vspace(); + //drop(pborrow); + } None => return, } diff --git a/kernel/src/arch/x86_64/vspace/page_table.rs b/kernel/src/arch/x86_64/vspace/page_table.rs index d05de6d12..3b2680402 100644 --- a/kernel/src/arch/x86_64/vspace/page_table.rs +++ b/kernel/src/arch/x86_64/vspace/page_table.rs @@ -37,6 +37,8 @@ pub(crate) struct PageTable { impl Clone for PageTable { fn clone(&self) -> Self { + let start = rawtime::Instant::now(); + fn alloc_frame() -> Frame { let frame_ptr = unsafe { let ptr = alloc::alloc::alloc_zeroed(PT_LAYOUT); @@ -71,7 +73,7 @@ impl Clone for PageTable { // Do a DFS and find all mapped entries and replicate them in the new `pt` for pml4_idx in 0..PAGE_SIZE_ENTRIES { - if pml4_idx < pml4_index(KERNEL_BASE.into()) && self.pml4[pml4_idx].is_present() { + if self.pml4[pml4_idx].is_present() { cloned_pt.pml4[pml4_idx] = new_pdpt(); for pdpt_idx in 0..PAGE_SIZE_ENTRIES { @@ -115,7 +117,7 @@ impl Clone for PageTable { } } } - + log::debug!("PageTable::clone() completed in {:?}. {:#x}", start.elapsed(), cloned_pt.pml4_address()); cloned_pt } } @@ -123,7 +125,7 @@ impl Clone for PageTable { impl Drop for PageTable { #[allow(unreachable_code)] fn drop(&mut self) { - log::info!("calling drop in PageTable, skipping for now"); + log::debug!("calling drop in PageTable, skipping for now"); return; use alloc::alloc::dealloc; diff --git a/kernel/src/nrproc.rs b/kernel/src/nrproc.rs index ab366a514..b9b2a291a 100644 --- a/kernel/src/nrproc.rs +++ b/kernel/src/nrproc.rs @@ -33,7 +33,7 @@ pub(crate) fn register_thread_with_process_replicas() { #[cfg(not(feature = "rackscale"))] let node = *crate::environment::NODE_ID; #[cfg(feature = "rackscale")] - let node = *crate::environment::MACHINE_ID; // node = 0; + let node = *crate::environment::MACHINE_ID - 1; // node = 0; debug_assert!(PROCESS_TABLE.len() > node, "Invalid Node ID"); @@ -170,7 +170,7 @@ impl NrProcess

{ .write(*crate::environment::MT_ID) .add_replica(rid) .expect("add_replica failed"); - log::info!("added_replica {pid} {rid}"); + log::debug!("added_replica {pid} {rid}"); handle } diff --git a/kernel/src/process.rs b/kernel/src/process.rs index 0693832e8..6dca6eee0 100644 --- a/kernel/src/process.rs +++ b/kernel/src/process.rs @@ -49,7 +49,7 @@ pub(crate) type Pid = usize; pub(crate) type Eid = usize; /// How many (concurrent) processes the systems supports. -pub(crate) const MAX_PROCESSES: usize = 12; +pub(crate) const MAX_PROCESSES: usize = 1; /// How many registered "named" frames a process can have. pub(crate) const MAX_FRAMES_PER_PROCESS: usize = MAX_CORES; diff --git a/lib/node-replication2 b/lib/node-replication2 index eba8f64b5..23d793890 160000 --- a/lib/node-replication2 +++ b/lib/node-replication2 @@ -1 +1 @@ -Subproject commit eba8f64b55cbde8e510565a382465f77472c8238 +Subproject commit 23d79389079659a202abc03a4d9753130acce22d diff --git a/lib/vibrio/src/rumprt/crt/mod.rs b/lib/vibrio/src/rumprt/crt/mod.rs index bb004d9c1..6036a632e 100644 --- a/lib/vibrio/src/rumprt/crt/mod.rs +++ b/lib/vibrio/src/rumprt/crt/mod.rs @@ -114,6 +114,13 @@ static mut INIT_INFO: InitInfo = InitInfo { ], }; + +#[no_mangle] +pub unsafe extern "C" fn rs_start_dynrep_protocol() { + log::info!("start_dynrep_protocol"); + crate::syscalls::Process::set_replicas(true, 0x99).expect("start dynrep failed"); +} + /// Sets up ps strings. pub unsafe fn netbsd_userlevel_init() { extern "C" { From 4ae9f3fcff7120999d05837560b6eab85fd9eb0c Mon Sep 17 00:00:00 2001 From: Gerd Zellweger Date: Wed, 29 Nov 2023 22:23:40 -0800 Subject: [PATCH 25/32] Update memcached build. Signed-off-by: Gerd Zellweger --- kernel/tests/s11_rackscale_benchmarks.rs | 4 ++-- usr/rkapps/build.rs | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/tests/s11_rackscale_benchmarks.rs b/kernel/tests/s11_rackscale_benchmarks.rs index 28db715c3..52bf82eb6 100644 --- a/kernel/tests/s11_rackscale_benchmarks.rs +++ b/kernel/tests/s11_rackscale_benchmarks.rs @@ -1063,13 +1063,13 @@ fn s11_rackscale_memcached_dynrep_benchmark_internal() { test.transport = transport; test.controller_timeout *= 2; //*= 8; test.client_timeout *= 2; - test.shmem_size = 1024 * 64; + test.shmem_size = 1024 * 2; test.use_affinity_shmem = cfg!(feature = "affinity-shmem"); test.use_qemu_huge_pages = cfg!(feature = "affinity-shmem"); test.file_name = file_name.to_string(); test.run_dhcpd_for_baseline = true; test.num_clients = 3; - test.cores_per_client = 4; + test.cores_per_client = 1; test.cmd = format!( r#"init=memcachedbench.bin initargs={} appcmd='--x-benchmark-mem={} --x-benchmark-queries={}'"#, test.num_clients * test.cores_per_client, diff --git a/usr/rkapps/build.rs b/usr/rkapps/build.rs index 2e3dd2dbd..ecd269b1e 100644 --- a/usr/rkapps/build.rs +++ b/usr/rkapps/build.rs @@ -138,11 +138,11 @@ fn main() { .unwrap(); println!( - "CHECKOUT eece690294fbfed418f43034b5dc77290865f8cf {:?}", + "CHECKOUT d999a1f02b6fe0797f27ea109c028e6ae278012f {:?}", out_dir ); Command::new("git") - .args(&["checkout", "eece690294fbfed418f43034b5dc77290865f8cf"]) + .args(&["checkout", "d999a1f02b6fe0797f27ea109c028e6ae278012f"]) .current_dir(&Path::new(&out_dir)) .status() .unwrap(); From 7c139d24159a5995d1925643364568142fcda642 Mon Sep 17 00:00:00 2001 From: Gerd Zellweger Date: Wed, 29 Nov 2023 22:41:30 -0800 Subject: [PATCH 26/32] remove depth limit on clone. Signed-off-by: Gerd Zellweger --- usr/rkapps/build.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/usr/rkapps/build.rs b/usr/rkapps/build.rs index ecd269b1e..755768c94 100644 --- a/usr/rkapps/build.rs +++ b/usr/rkapps/build.rs @@ -133,7 +133,7 @@ fn main() { println!("CLONE {:?}", out_dir); let url = "https://github.com/gz/librettos-packages.git"; Command::new("git") - .args(&["clone", "--depth=1", url, out_dir.as_str()]) + .args(&["clone", url, out_dir.as_str()]) .status() .unwrap(); From c4923ec2750000c5533df6d00933c9d76141610d Mon Sep 17 00:00:00 2001 From: Erika Hunhoff Date: Thu, 30 Nov 2023 07:10:30 +0000 Subject: [PATCH 27/32] Some config for dynrep and rump thread pinning hack for memcached --- kernel/tests/s11_rackscale_benchmarks.rs | 30 +++++++++++++++++------- lib/vibrio/src/rumprt/prt/mod.rs | 20 +++++++++++++++- 2 files changed, 41 insertions(+), 9 deletions(-) diff --git a/kernel/tests/s11_rackscale_benchmarks.rs b/kernel/tests/s11_rackscale_benchmarks.rs index 52bf82eb6..5586ea15a 100644 --- a/kernel/tests/s11_rackscale_benchmarks.rs +++ b/kernel/tests/s11_rackscale_benchmarks.rs @@ -1010,7 +1010,7 @@ fn s11_rackscale_memcached_dynrep_benchmark_internal() { .open(file_name) .expect("Can't open file"); if write_headers { - let row = "git_rev,benchmark,nthreads,mem,queries,time,thpt,num_clients,num_replicas,thead_num\n"; + let row = "git_rev,benchmark,nthreads,mem,queries,time,thpt,num_clients,num_replicas,thread_num\n"; let r = csv_file.write(row.as_bytes()); assert!(r.is_ok()); } @@ -1020,8 +1020,15 @@ fn s11_rackscale_memcached_dynrep_benchmark_internal() { let r = csv_file.write(format!("{},", env!("GIT_HASH")).as_bytes()); assert!(r.is_ok()); let out = format!( - "memcached,{},{},{},{},{},{},{},aggregate", - b_threads, b_mem, b_queries, b_time, b_thpt, actual_num_clients, num_clients + "memcached,{},{},{},{},{},{},{},{}", + b_threads, + b_mem, + b_queries, + b_time, + b_thpt, + actual_num_clients, + num_clients, + "aggregate" ); let r = csv_file.write(out.as_bytes()); @@ -1030,9 +1037,12 @@ fn s11_rackscale_memcached_dynrep_benchmark_internal() { assert!(r.is_ok()); for (thread_id, queries, time) in thread_results { + let r = csv_file.write(format!("{},", env!("GIT_HASH")).as_bytes()); + assert!(r.is_ok()); + let out = format!( - "memcached,{},{},{},{},,{},{},{}", - b_threads, b_mem, queries, time, actual_num_clients, num_clients, thread_id + "memcached,{},{},{},{},{},{},{},{}", + b_threads, b_mem, queries, time, " ", actual_num_clients, num_clients, thread_id ); let r = csv_file.write(out.as_bytes()); @@ -1050,11 +1060,15 @@ fn s11_rackscale_memcached_dynrep_benchmark_internal() { MemcachedInternalConfig { num_queries: 100_000_000, mem_size: 16, //4 * 1024, + //num_queries: 1_000_000_000, + //mem_size: 512, } } else { MemcachedInternalConfig { - num_queries: 100_000, // TODO(rackscale): should be 100_000_000, - mem_size: 16, // TODO(rackscale): should be 32_000, + num_queries: 100_000_000, + mem_size: 16, //4 * 1024, + //num_queries: 1_000_000_000, // 1_000_000_000, // TODO(rackscale): should be 100_000_000, + //mem_size: 512, // TODO(rackscale): should be 32_000, } }; @@ -1069,7 +1083,7 @@ fn s11_rackscale_memcached_dynrep_benchmark_internal() { test.file_name = file_name.to_string(); test.run_dhcpd_for_baseline = true; test.num_clients = 3; - test.cores_per_client = 1; + test.cores_per_client = 1; // 2 test.cmd = format!( r#"init=memcachedbench.bin initargs={} appcmd='--x-benchmark-mem={} --x-benchmark-queries={}'"#, test.num_clients * test.cores_per_client, diff --git a/lib/vibrio/src/rumprt/prt/mod.rs b/lib/vibrio/src/rumprt/prt/mod.rs index 5582e8e23..685e520a0 100644 --- a/lib/vibrio/src/rumprt/prt/mod.rs +++ b/lib/vibrio/src/rumprt/prt/mod.rs @@ -191,7 +191,16 @@ pub unsafe extern "C" fn rumprun_makelwp( rump_pub_lwproc_switch(curlwp); let coreid = (rlid as usize) % AVAILABLE_CORES.load(Ordering::Relaxed); - let gtid = crate::rumprt::CPUIDX_TO_GTID.lock()[coreid]; + let hacky_coreid = match rlid as usize { + 2 => 0, + 11 => 1, + 12 => 2, + 13 => 3, + 14 => 4, + 15 => 5, + _ => coreid, + }; + let gtid = crate::rumprt::CPUIDX_TO_GTID.lock()[hacky_coreid]; let tid = Environment::thread().spawn_with_args( stack, Some(rumprun_makelwp_tramp), @@ -200,6 +209,15 @@ pub unsafe extern "C" fn rumprun_makelwp( None, tls_private, ); + trace!( + "rlid={:?}, available_cores={:?} rump_core_id={:?} gtid={:?}, tid={:?}", + rlid, + AVAILABLE_CORES.load(Ordering::Relaxed), + coreid, + gtid, + tid + ); + debug!( "rumprun_makelwp spawned {:?} on core {} (gtid={:?})", tid, coreid, gtid From 5748e0c959765973212f2973e3f44fb98ec9b693 Mon Sep 17 00:00:00 2001 From: Erika Hunhoff <54562339+hunhoffe@users.noreply.github.com> Date: Thu, 30 Nov 2023 11:00:33 -0700 Subject: [PATCH 28/32] change dynrep intervals to 5s --- kernel/src/arch/x86_64/irq.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/src/arch/x86_64/irq.rs b/kernel/src/arch/x86_64/irq.rs index 4318710c6..4adb1386e 100644 --- a/kernel/src/arch/x86_64/irq.rs +++ b/kernel/src/arch/x86_64/irq.rs @@ -556,7 +556,7 @@ unsafe fn timer_handler(_a: &ExceptionArguments) { unsafe { *REPLICA_STATE.as_mut_ptr() = 1 }; } - if DYNREP_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(3) + if DYNREP_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(5) && *REPLICA_STATE == 1 { warn!("PHASE 2: remove rid 2"); @@ -571,7 +571,7 @@ unsafe fn timer_handler(_a: &ExceptionArguments) { unsafe { *REPLICA_STATE.as_mut_ptr() = 2 }; } - if DYNREP_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(6) + if DYNREP_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(10) && *REPLICA_STATE == 2 { warn!("PHASE 3: add rid 1"); @@ -586,7 +586,7 @@ unsafe fn timer_handler(_a: &ExceptionArguments) { unsafe { *REPLICA_STATE.as_mut_ptr() = 3 }; } - if DYNREP_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(9) + if DYNREP_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(15) && *REPLICA_STATE == 3 { warn!("PHASE 4: add rid 2"); From ac0965f5d48d70c3523502fffca4467a676d8d79 Mon Sep 17 00:00:00 2001 From: Reto Achermann Date: Thu, 30 Nov 2023 10:55:57 -0800 Subject: [PATCH 29/32] working 64GB config Signed-off-by: Reto Achermann --- kernel/tests/s11_rackscale_benchmarks.rs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/kernel/tests/s11_rackscale_benchmarks.rs b/kernel/tests/s11_rackscale_benchmarks.rs index 5586ea15a..db0291fe6 100644 --- a/kernel/tests/s11_rackscale_benchmarks.rs +++ b/kernel/tests/s11_rackscale_benchmarks.rs @@ -1059,14 +1059,14 @@ fn s11_rackscale_memcached_dynrep_benchmark_internal() { let config = if is_smoke { MemcachedInternalConfig { num_queries: 100_000_000, - mem_size: 16, //4 * 1024, + mem_size: 64*1024, //4 * 1024, //num_queries: 1_000_000_000, //mem_size: 512, } } else { MemcachedInternalConfig { num_queries: 100_000_000, - mem_size: 16, //4 * 1024, + mem_size: 64*1024, //4 * 1024, //num_queries: 1_000_000_000, // 1_000_000_000, // TODO(rackscale): should be 100_000_000, //mem_size: 512, // TODO(rackscale): should be 32_000, } @@ -1075,14 +1075,15 @@ fn s11_rackscale_memcached_dynrep_benchmark_internal() { let mut test = RackscaleRun::new("userspace-smp".to_string(), built); test.controller_match_fn = controller_match_fn; test.transport = transport; - test.controller_timeout *= 2; //*= 8; - test.client_timeout *= 2; - test.shmem_size = 1024 * 2; + test.controller_timeout *= 100; //*= 8; TODO: make this dependent on the memory size + test.client_timeout *= 100; + test.shmem_size = std::cmp::max(1024 * 2, 2*config.mem_size); test.use_affinity_shmem = cfg!(feature = "affinity-shmem"); test.use_qemu_huge_pages = cfg!(feature = "affinity-shmem"); test.file_name = file_name.to_string(); test.run_dhcpd_for_baseline = true; test.num_clients = 3; + test.memory = 2*4096; test.cores_per_client = 1; // 2 test.cmd = format!( r#"init=memcachedbench.bin initargs={} appcmd='--x-benchmark-mem={} --x-benchmark-queries={}'"#, From 9dbf884e15ac56f940b6cc281de084bdc2bc2aff Mon Sep 17 00:00:00 2001 From: zmckevitt Date: Tue, 5 Dec 2023 22:26:38 -0500 Subject: [PATCH 30/32] Added metrics for page allocation/deallocation to page table replication --- kernel/src/arch/x86_64/vspace/page_table.rs | 50 +++++++++++++++++++-- 1 file changed, 46 insertions(+), 4 deletions(-) diff --git a/kernel/src/arch/x86_64/vspace/page_table.rs b/kernel/src/arch/x86_64/vspace/page_table.rs index 3b2680402..259e7c928 100644 --- a/kernel/src/arch/x86_64/vspace/page_table.rs +++ b/kernel/src/arch/x86_64/vspace/page_table.rs @@ -15,6 +15,9 @@ use crate::error::KError; use crate::memory::vspace::*; use crate::memory::{kernel_vaddr_to_paddr, paddr_to_kernel_vaddr, Frame, PAddr, VAddr}; +use lazy_static::lazy_static; +use spin::Mutex; + /// Describes a potential modification operation on existing page tables. pub(super) const PT_LAYOUT: Layout = unsafe { Layout::from_size_align_unchecked(BASE_PAGE_SIZE, BASE_PAGE_SIZE) }; @@ -30,6 +33,12 @@ enum Modify { Unmap, } +// Stats for dynamic replication +lazy_static! { + pub static ref ALLOCS: Mutex = Mutex::new(0); + pub static ref DEALLOCS: Mutex = Mutex::new(0); +} + /// The actual page-table. We allocate the PML4 upfront. pub(crate) struct PageTable { pub pml4: Pin>, @@ -39,6 +48,13 @@ impl Clone for PageTable { fn clone(&self) -> Self { let start = rawtime::Instant::now(); + *ALLOCS.lock() += 1; + log::info!( + "PageTable::Allocations: {} (pml4 addr: {:?})", + *ALLOCS.lock(), + self.pml4_address() + ); + fn alloc_frame() -> Frame { let frame_ptr = unsafe { let ptr = alloc::alloc::alloc_zeroed(PT_LAYOUT); @@ -70,11 +86,16 @@ impl Clone for PageTable { } let mut cloned_pt = PageTable::new().expect("Can't clone PT"); - + let mut p_allocs = 0; // Do a DFS and find all mapped entries and replicate them in the new `pt` for pml4_idx in 0..PAGE_SIZE_ENTRIES { + + let reached_kernel = pml4_idx >= pml4_index(KERNEL_BASE.into()); if self.pml4[pml4_idx].is_present() { cloned_pt.pml4[pml4_idx] = new_pdpt(); + if !reached_kernel { + p_allocs += 1; + } for pdpt_idx in 0..PAGE_SIZE_ENTRIES { let pdpt = self.get_pdpt(self.pml4[pml4_idx]); @@ -83,6 +104,9 @@ impl Clone for PageTable { if pdpt[pdpt_idx].is_present() { if !pdpt[pdpt_idx].is_page() { cloned_pdpt[pdpt_idx] = new_pd(); + if !reached_kernel { + p_allocs += 1; + } let cloned_pdpt_entry = cloned_pdpt[pdpt_idx]; drop(cloned_pdpt); @@ -93,6 +117,9 @@ impl Clone for PageTable { if pd[pd_idx].is_present() { if !pd[pd_idx].is_page() { cloned_pd[pd_idx] = new_pt(); + if !reached_kernel { + p_allocs += 1; + } let cloned_pd_entry = cloned_pd[pd_idx]; drop(cloned_pd); @@ -117,7 +144,12 @@ impl Clone for PageTable { } } } - log::debug!("PageTable::clone() completed in {:?}. {:#x}", start.elapsed(), cloned_pt.pml4_address()); + log::info!("PageTable::Page Allocations: {}", p_allocs); + log::debug!( + "PageTable::clone() completed in {:?}. {:#x}", + start.elapsed(), + cloned_pt.pml4_address() + ); cloned_pt } } @@ -126,8 +158,14 @@ impl Drop for PageTable { #[allow(unreachable_code)] fn drop(&mut self) { log::debug!("calling drop in PageTable, skipping for now"); - return; - + *DEALLOCS.lock() += 1; + log::info!( + "PageTable::Deallocations: {} (pml4 addr: {:?})", + *DEALLOCS.lock(), + self.pml4_address() + ); + // return; + let mut p_deallocs = 0; use alloc::alloc::dealloc; // Do a DFS and free all page-table memory allocated below kernel-base, // don't free the mapped frames -- we return them later through NR @@ -149,6 +187,7 @@ impl Drop for PageTable { let addr = pd[pd_idx].address(); let vaddr = paddr_to_kernel_vaddr(addr); unsafe { dealloc(vaddr.as_mut_ptr(), PT_LAYOUT) }; + p_deallocs += 1; } } else { // Encountered a 2 MiB mapping, nothing to free @@ -158,6 +197,7 @@ impl Drop for PageTable { let addr = pdpt[pdpt_idx].address(); let vaddr = paddr_to_kernel_vaddr(addr); unsafe { dealloc(vaddr.as_mut_ptr(), PT_LAYOUT) }; + p_deallocs += 1; } else { // Encountered Page is a 1 GiB mapping, nothing to free } @@ -168,9 +208,11 @@ impl Drop for PageTable { let addr = self.pml4[pml4_idx].address(); let vaddr = paddr_to_kernel_vaddr(addr); unsafe { dealloc(vaddr.as_mut_ptr(), PT_LAYOUT) }; + p_deallocs += 1; self.pml4[pml4_idx] = PML4Entry(0x0); } } + log::info!("PageTable::Page Deallocations: {}", p_deallocs); } } From cb513936e8701f1729867382310f4df4f23f45d9 Mon Sep 17 00:00:00 2001 From: zmckevitt Date: Tue, 5 Dec 2023 22:29:23 -0500 Subject: [PATCH 31/32] Fixed formatting for page table metrics --- kernel/src/arch/x86_64/vspace/page_table.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/src/arch/x86_64/vspace/page_table.rs b/kernel/src/arch/x86_64/vspace/page_table.rs index 259e7c928..fccae3fb1 100644 --- a/kernel/src/arch/x86_64/vspace/page_table.rs +++ b/kernel/src/arch/x86_64/vspace/page_table.rs @@ -89,7 +89,6 @@ impl Clone for PageTable { let mut p_allocs = 0; // Do a DFS and find all mapped entries and replicate them in the new `pt` for pml4_idx in 0..PAGE_SIZE_ENTRIES { - let reached_kernel = pml4_idx >= pml4_index(KERNEL_BASE.into()); if self.pml4[pml4_idx].is_present() { cloned_pt.pml4[pml4_idx] = new_pdpt(); From 68bb6ac939632951f9366c104b148fe301475cda Mon Sep 17 00:00:00 2001 From: Erika Hunhoff Date: Wed, 6 Dec 2023 19:15:49 -0700 Subject: [PATCH 32/32] update memcached --- usr/rkapps/build.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/usr/rkapps/build.rs b/usr/rkapps/build.rs index 755768c94..baa777371 100644 --- a/usr/rkapps/build.rs +++ b/usr/rkapps/build.rs @@ -138,11 +138,11 @@ fn main() { .unwrap(); println!( - "CHECKOUT d999a1f02b6fe0797f27ea109c028e6ae278012f {:?}", + "CHECKOUT 5c42c159f781fa62a269dd99fcb8ded6389049e0 {:?}", out_dir ); Command::new("git") - .args(&["checkout", "d999a1f02b6fe0797f27ea109c028e6ae278012f"]) + .args(&["checkout", "5c42c159f781fa62a269dd99fcb8ded6389049e0"]) .current_dir(&Path::new(&out_dir)) .status() .unwrap();