From 628c56d7f0371202d047496ad23b5eacf1a96fe4 Mon Sep 17 00:00:00 2001
From: Gerd Zellweger <mail@gerdzellweger.com>
Date: Sun, 6 Aug 2023 22:33:39 -0700
Subject: [PATCH 01/32] WiP on dynamic NR.

Signed-off-by: Gerd Zellweger <mail@gerdzellweger.com>
---
 Cargo.lock                                    | 1078 +++++++++++++++--
 Cargo.toml                                    |    1 +
 kernel/Cargo.toml                             |    3 +-
 kernel/run.py                                 |    3 +-
 kernel/src/arch/unix/kcb.rs                   |    2 +-
 kernel/src/arch/unix/mod.rs                   |    2 +-
 kernel/src/arch/unix/process.rs               |    2 +-
 kernel/src/arch/x86_64/coreboot.rs            |   29 +-
 kernel/src/arch/x86_64/mod.rs                 |   41 +-
 .../x86_64/rackscale/get_shmem_structure.rs   |    2 +-
 kernel/src/environment.rs                     |    2 +-
 kernel/src/integration_tests.rs               |    2 +-
 kernel/src/nr.rs                              |   12 +-
 13 files changed, 1048 insertions(+), 131 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 1b11d08f0..050df3015 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -36,6 +36,12 @@ version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
 
+[[package]]
+name = "ahash"
+version = "0.3.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e8fd72866655d1904d6b0997d0b07ba561047d070fbe29de039031c641b61217"
+
 [[package]]
 name = "ahash"
 version = "0.7.6"
@@ -44,7 +50,7 @@ checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47"
 dependencies = [
  "getrandom 0.2.7",
  "once_cell",
- "version_check",
+ "version_check 0.9.4",
 ]
 
 [[package]]
@@ -65,6 +71,15 @@ dependencies = [
  "as-slice",
 ]
 
+[[package]]
+name = "ansi_term"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2"
+dependencies = [
+ "winapi 0.3.9",
+]
+
 [[package]]
 name = "apic"
 version = "0.0.1"
@@ -104,7 +119,7 @@ checksum = "0609c78bd572f4edc74310dfb63a01f5609d53fa8b4dd7c4d98aef3b3e8d72d1"
 dependencies = [
  "proc-macro-hack",
  "quote",
- "syn",
+ "syn 1.0.98",
 ]
 
 [[package]]
@@ -125,6 +140,17 @@ dependencies = [
  "stable_deref_trait",
 ]
 
+[[package]]
+name = "async-trait"
+version = "0.1.72"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cc6dde6e4ed435a4c1ee4e73592f5ba9da2151af10076cc04858746af9352d09"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.28",
+]
+
 [[package]]
 name = "atomic-polyfill"
 version = "0.1.8"
@@ -212,18 +238,58 @@ version = "1.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f8fe8f5a8a398345e52358e18ff07cc17a568fbca5c6f73873d3a62056309603"
 
-[[package]]
-name = "base64ct"
-version = "1.6.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b"
-
 [[package]]
 name = "beef"
 version = "0.5.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1"
 
+[[package]]
+name = "bench_utils"
+version = "0.1.0"
+dependencies = [
+ "arr_macro",
+ "async-trait",
+ "core_affinity",
+ "crossbeam-utils 0.8.10",
+ "csv",
+ "hwloc2",
+ "lazy_static",
+ "log",
+ "nix",
+ "nr2",
+ "num_cpus",
+ "parking_lot 0.12.1",
+ "rand 0.8.5",
+ "serde",
+ "static_assertions",
+ "tokio",
+ "urcu-sys",
+]
+
+[[package]]
+name = "bindgen"
+version = "0.51.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ebd71393f1ec0509b553aa012b9b58e81dadbdff7130bd3b8cba576e69b32f75"
+dependencies = [
+ "bitflags",
+ "cexpr",
+ "cfg-if 0.1.10",
+ "clang-sys",
+ "clap",
+ "env_logger 0.6.2",
+ "lazy_static",
+ "log",
+ "peeking_take_while",
+ "proc-macro2",
+ "quote",
+ "regex",
+ "rustc-hash",
+ "shlex",
+ "which 3.1.1",
+]
+
 [[package]]
 name = "bit-set"
 version = "0.5.2"
@@ -257,15 +323,6 @@ version = "1.3.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
 
-[[package]]
-name = "block-buffer"
-version = "0.10.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
-dependencies = [
- "generic-array 0.14.6",
-]
-
 [[package]]
 name = "bootloader"
 version = "0.0.1"
@@ -306,12 +363,27 @@ version = "1.4.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"
 
+[[package]]
+name = "bytes"
+version = "1.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "89b2fd2a0dcf38d7971e2194b6b6eebab45ae01067456a7fd93d5547a61b70be"
+
 [[package]]
 name = "cc"
 version = "1.0.73"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11"
 
+[[package]]
+name = "cexpr"
+version = "0.3.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fce5b5fb86b0c57c20c834c1b412fd09c77c8a59b9473f86272709e78874cd1d"
+dependencies = [
+ "nom",
+]
+
 [[package]]
 name = "cfg-if"
 version = "0.1.10"
@@ -331,7 +403,43 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ff41a3c2c1e39921b9003de14bf0439c7b63a9039637c291e1a64925d8ddfa45"
 dependencies = [
  "owning_ref",
- "parking_lot",
+ "parking_lot 0.4.8",
+]
+
+[[package]]
+name = "clang-sys"
+version = "0.28.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "81de550971c976f176130da4b2978d3b524eaa0fd9ac31f3ceb5ae1231fb4853"
+dependencies = [
+ "glob",
+ "libc 0.2.126",
+ "libloading",
+]
+
+[[package]]
+name = "clap"
+version = "2.34.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c"
+dependencies = [
+ "ansi_term",
+ "atty",
+ "bitflags",
+ "strsim",
+ "textwrap",
+ "unicode-width",
+ "vec_map",
+ "yaml-rust",
+]
+
+[[package]]
+name = "cloudabi"
+version = "0.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f"
+dependencies = [
+ "bitflags",
 ]
 
 [[package]]
@@ -341,7 +449,7 @@ dependencies = [
  "arr_macro",
  "chashmap",
  "crossbeam-queue",
- "crossbeam-utils",
+ "crossbeam-utils 0.8.10",
  "env_logger 0.8.4",
  "log",
  "static_assertions",
@@ -356,6 +464,18 @@ dependencies = [
  "memchr",
 ]
 
+[[package]]
+name = "core_affinity"
+version = "0.5.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f8a03115cc34fb0d7c321dd154a3914b3ca082ccc5c11d91bf7117dbbe7171f"
+dependencies = [
+ "kernel32-sys",
+ "libc 0.2.126",
+ "num_cpus",
+ "winapi 0.2.8",
+]
+
 [[package]]
 name = "cortex-m"
 version = "0.6.7"
@@ -393,6 +513,33 @@ dependencies = [
  "riscv",
 ]
 
+[[package]]
+name = "crossbeam-epoch"
+version = "0.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "058ed274caafc1f60c4997b5fc07bf7dc7cca454af7c6e81edffe5f33f70dace"
+dependencies = [
+ "autocfg",
+ "cfg-if 0.1.10",
+ "crossbeam-utils 0.7.2",
+ "lazy_static",
+ "maybe-uninit",
+ "memoffset 0.5.6",
+ "scopeguard",
+]
+
+[[package]]
+name = "crossbeam-epoch"
+version = "0.9.15"
+source = "git+https://github.com/crossbeam-rs/crossbeam#cd412be043b247505f2d73f9e9e41fc89557d500"
+dependencies = [
+ "autocfg",
+ "cfg-if 1.0.0",
+ "crossbeam-utils 0.8.16",
+ "memoffset 0.9.0",
+ "scopeguard",
+]
+
 [[package]]
 name = "crossbeam-queue"
 version = "0.3.5"
@@ -400,7 +547,29 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1f25d8400f4a7a5778f0e4e52384a48cbd9b5c495d110786187fc750075277a2"
 dependencies = [
  "cfg-if 1.0.0",
- "crossbeam-utils",
+ "crossbeam-utils 0.8.10",
+]
+
+[[package]]
+name = "crossbeam-skiplist"
+version = "0.1.1"
+source = "git+https://github.com/crossbeam-rs/crossbeam#cd412be043b247505f2d73f9e9e41fc89557d500"
+dependencies = [
+ "cfg-if 1.0.0",
+ "crossbeam-epoch 0.9.15",
+ "crossbeam-utils 0.8.16",
+ "scopeguard",
+]
+
+[[package]]
+name = "crossbeam-utils"
+version = "0.7.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c3c7c73a2d1e9fc0886a08b93e98eb643461230d5f1925e4036204d5f2e261a8"
+dependencies = [
+ "autocfg",
+ "cfg-if 0.1.10",
+ "lazy_static",
 ]
 
 [[package]]
@@ -414,13 +583,11 @@ dependencies = [
 ]
 
 [[package]]
-name = "crypto-common"
-version = "0.1.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3"
+name = "crossbeam-utils"
+version = "0.8.16"
+source = "git+https://github.com/crossbeam-rs/crossbeam#cd412be043b247505f2d73f9e9e41fc89557d500"
 dependencies = [
- "generic-array 0.14.6",
- "typenum",
+ "cfg-if 1.0.0",
 ]
 
 [[package]]
@@ -462,7 +629,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f877be4f7c9f246b183111634f75baa039715e3f46ce860677d3b19a69fb229c"
 dependencies = [
  "quote",
- "syn",
+ "syn 1.0.98",
 ]
 
 [[package]]
@@ -478,13 +645,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4f8a51dd197fa6ba5b4dc98a990a43cc13693c23eb0089ebb0fcc1f04152bca6"
 
 [[package]]
-name = "digest"
-version = "0.10.7"
+name = "custom_error_core"
+version = "1.8.0"
+source = "git+https://github.com/gz/custom_error.git#be6af8aa1ad227b2686c3fdfd94a0a249767bf6f"
+
+[[package]]
+name = "dashmap"
+version = "4.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
+checksum = "e77a43b28d0668df09411cb0bc9a8c2adc40f9a048afe863e05fd43251e8e39c"
 dependencies = [
- "block-buffer",
- "crypto-common",
+ "cfg-if 1.0.0",
+ "num_cpus",
 ]
 
 [[package]]
@@ -495,7 +667,7 @@ checksum = "3bf95dc3f046b9da4f2d51833c0d3547d8564ef6910f5c1ed130306a75b92886"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 1.0.98",
 ]
 
 [[package]]
@@ -558,6 +730,19 @@ dependencies = [
  "termcolor",
 ]
 
+[[package]]
+name = "env_logger"
+version = "0.6.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "aafcde04e90a5226a6443b7aabdb016ba2f8307c847d524724bd9b346dd1a2d3"
+dependencies = [
+ "atty",
+ "humantime 1.3.0",
+ "log",
+ "regex",
+ "termcolor",
+]
+
 [[package]]
 name = "env_logger"
 version = "0.8.4"
@@ -605,6 +790,12 @@ dependencies = [
  "libc 0.2.126",
 ]
 
+[[package]]
+name = "error-chain"
+version = "0.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d9435d864e017c3c6afeac1654189b06cdb491cf2ff73dbf0d73b0f292f42ff8"
+
 [[package]]
 name = "error-chain"
 version = "0.12.4"
@@ -612,7 +803,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2d2f06b9cac1506ece98fe3231e3cc9c4410ec3d5b1f24ae1c8946f0742cdefc"
 dependencies = [
  "backtrace",
- "version_check",
+ "version_check 0.9.4",
+]
+
+[[package]]
+name = "evmap"
+version = "8.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "15ddfe939e67347afdefc312443bf3c2c61ac79153243669ae3a594e56ab2225"
+dependencies = [
+ "smallvec 1.11.0",
 ]
 
 [[package]]
@@ -632,6 +832,18 @@ dependencies = [
  "instant",
 ]
 
+[[package]]
+name = "flurry"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8c0a35f7b50e99185a2825541946252f669f3c3ca77801357cd682a1b356bb3e"
+dependencies = [
+ "ahash 0.3.8",
+ "crossbeam-epoch 0.8.2",
+ "num_cpus",
+ "parking_lot 0.10.2",
+]
+
 [[package]]
 name = "fnv"
 version = "1.0.7"
@@ -653,6 +865,95 @@ version = "0.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba"
 
+[[package]]
+name = "futures"
+version = "0.3.28"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "23342abe12aba583913b2e62f22225ff9c950774065e4bfb61a19cd9770fec40"
+dependencies = [
+ "futures-channel",
+ "futures-core",
+ "futures-executor",
+ "futures-io",
+ "futures-sink",
+ "futures-task",
+ "futures-util",
+]
+
+[[package]]
+name = "futures-channel"
+version = "0.3.28"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "955518d47e09b25bbebc7a18df10b81f0c766eaf4c4f1cccef2fca5f2a4fb5f2"
+dependencies = [
+ "futures-core",
+ "futures-sink",
+]
+
+[[package]]
+name = "futures-core"
+version = "0.3.28"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4bca583b7e26f571124fe5b7561d49cb2868d79116cfa0eefce955557c6fee8c"
+
+[[package]]
+name = "futures-executor"
+version = "0.3.28"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ccecee823288125bd88b4d7f565c9e58e41858e47ab72e8ea2d64e93624386e0"
+dependencies = [
+ "futures-core",
+ "futures-task",
+ "futures-util",
+]
+
+[[package]]
+name = "futures-io"
+version = "0.3.28"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4fff74096e71ed47f8e023204cfd0aa1289cd54ae5430a9523be060cdb849964"
+
+[[package]]
+name = "futures-macro"
+version = "0.3.28"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.28",
+]
+
+[[package]]
+name = "futures-sink"
+version = "0.3.28"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f43be4fe21a13b9781a69afa4985b0f6ee0e1afab2c6f454a8cf30e2b2237b6e"
+
+[[package]]
+name = "futures-task"
+version = "0.3.28"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "76d3d132be6c0e6aa1534069c705a74a5997a356c0dc2f86a47765e5617c5b65"
+
+[[package]]
+name = "futures-util"
+version = "0.3.28"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "26b01e40b772d54cf6c6d721c1d1abd0647a0106a12ecaa1c186273392a69533"
+dependencies = [
+ "futures-channel",
+ "futures-core",
+ "futures-io",
+ "futures-macro",
+ "futures-sink",
+ "futures-task",
+ "memchr",
+ "pin-project-lite",
+ "pin-utils",
+ "slab",
+]
+
 [[package]]
 name = "gdbstub"
 version = "0.5.0"
@@ -675,6 +976,19 @@ dependencies = [
  "num-traits",
 ]
 
+[[package]]
+name = "generator"
+version = "0.7.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5cc16584ff22b460a382b7feec54b23d2908d858152e5739a120b949293bd74e"
+dependencies = [
+ "cc",
+ "libc 0.2.126",
+ "log",
+ "rustversion",
+ "windows",
+]
+
 [[package]]
 name = "generic-array"
 version = "0.12.4"
@@ -700,7 +1014,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "bff49e947297f3312447abdca79f45f4738097cc82b06e72054d2223f601f1b9"
 dependencies = [
  "typenum",
- "version_check",
+ "version_check 0.9.4",
 ]
 
 [[package]]
@@ -743,6 +1057,12 @@ dependencies = [
  "stable_deref_trait",
 ]
 
+[[package]]
+name = "glob"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
+
 [[package]]
 name = "half"
 version = "1.8.2"
@@ -764,7 +1084,7 @@ version = "0.11.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e"
 dependencies = [
- "ahash",
+ "ahash 0.7.6",
 ]
 
 [[package]]
@@ -773,7 +1093,7 @@ version = "0.12.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
 dependencies = [
- "ahash",
+ "ahash 0.7.6",
 ]
 
 [[package]]
@@ -839,14 +1159,12 @@ name = "init"
 version = "0.1.0"
 dependencies = [
  "arrayvec",
- "base64ct",
  "cstr_core",
  "hashbrown 0.11.2",
  "kpi",
  "lazy_static",
  "lineup",
  "log",
- "md-5",
  "num-traits",
  "proptest 1.0.0 (git+https://github.com/gz/proptest.git?branch=x86-asm)",
  "rawtime",
@@ -911,7 +1229,7 @@ dependencies = [
  "core2",
  "env_logger 0.9.0",
  "log",
- "memoffset",
+ "memoffset 0.6.5",
  "serde",
  "serde_cbor",
  "static_assertions",
@@ -945,6 +1263,16 @@ version = "0.2.126"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836"
 
+[[package]]
+name = "libloading"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2b111a074963af1d37a139918ac6d49ad1d0d5e47f72fd55388619691a7d753"
+dependencies = [
+ "cc",
+ "winapi 0.3.9",
+]
+
 [[package]]
 name = "libm"
 version = "0.2.2"
@@ -958,7 +1286,7 @@ dependencies = [
  "arr_macro",
  "backtracer_core",
  "crossbeam-queue",
- "crossbeam-utils",
+ "crossbeam-utils 0.8.10",
  "either",
  "env_logger 0.9.0",
  "fringe",
@@ -976,6 +1304,15 @@ version = "0.0.46"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d4d2456c373231a208ad294c33dc5bff30051eafd954cd4caae83a712b12854d"
 
+[[package]]
+name = "lock_api"
+version = "0.3.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c4da24a77a3d8a6d4862d95f72e6fdb9c09a643ecdb402d754004a557f2bec75"
+dependencies = [
+ "scopeguard",
+]
+
 [[package]]
 name = "lock_api"
 version = "0.4.7"
@@ -1015,7 +1352,20 @@ dependencies = [
  "proc-macro2",
  "quote",
  "regex-syntax",
- "syn",
+ "syn 1.0.98",
+]
+
+[[package]]
+name = "loom"
+version = "0.5.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ff50ecb28bb86013e935fb6683ab1f6d3a20016f123c76fd4c27470076ac30f5"
+dependencies = [
+ "cfg-if 1.0.0",
+ "generator",
+ "scoped-tls",
+ "tracing",
+ "tracing-subscriber",
 ]
 
 [[package]]
@@ -1030,6 +1380,15 @@ version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0ca88d725a0a943b096803bd34e73a4437208b6077654cc4ecb2947a5f91618d"
 
+[[package]]
+name = "matchers"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558"
+dependencies = [
+ "regex-automata",
+]
+
 [[package]]
 name = "matches"
 version = "0.1.9"
@@ -1042,16 +1401,6 @@ version = "2.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "60302e4db3a61da70c0cb7991976248362f30319e88850c487b9b95bbf059e00"
 
-[[package]]
-name = "md-5"
-version = "0.10.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf"
-dependencies = [
- "cfg-if 1.0.0",
- "digest",
-]
-
 [[package]]
 name = "memchr"
 version = "2.5.0"
@@ -1069,22 +1418,63 @@ dependencies = [
 
 [[package]]
 name = "memoffset"
-version = "0.6.5"
+version = "0.5.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce"
+checksum = "043175f069eda7b85febe4a74abbaeff828d9f8b448515d3151a14a3542811aa"
 dependencies = [
  "autocfg",
 ]
 
 [[package]]
-name = "miniz_oxide"
-version = "0.5.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
+name = "memoffset"
+version = "0.6.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce"
+dependencies = [
+ "autocfg",
+]
+
+[[package]]
+name = "memoffset"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c"
+dependencies = [
+ "autocfg",
+]
+
+[[package]]
+name = "metadeps"
+version = "1.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "73b122901b3a675fac8cecf68dcb2f0d3036193bc861d1ac0e1c337f7d5254c2"
+dependencies = [
+ "error-chain 0.10.0",
+ "pkg-config",
+ "toml",
+]
+
+[[package]]
+name = "miniz_oxide"
+version = "0.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6f5c75688da582b8ffc1f1799e9db273f32133c49e048f614d22ec3256773ccc"
 dependencies = [
  "adler",
 ]
 
+[[package]]
+name = "mio"
+version = "0.8.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "927a765cd3fc26206e66b296465fa9d3e5ab003e651c1b3c060e7956d96b19d2"
+dependencies = [
+ "libc 0.2.126",
+ "log",
+ "wasi 0.11.0+wasi-snapshot-preview1",
+ "windows-sys 0.48.0",
+]
+
 [[package]]
 name = "mmap"
 version = "0.1.1"
@@ -1119,7 +1509,7 @@ dependencies = [
  "bitflags",
  "cfg-if 1.0.0",
  "libc 0.2.126",
- "memoffset",
+ "memoffset 0.6.5",
 ]
 
 [[package]]
@@ -1128,12 +1518,69 @@ version = "0.1.0"
 dependencies = [
  "arr_macro",
  "chashmap",
- "crossbeam-utils",
+ "crossbeam-utils 0.8.10",
  "log",
  "rand 0.7.3",
  "static_assertions",
 ]
 
+[[package]]
+name = "nom"
+version = "4.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2ad2a91a8e869eeb30b9cb3119ae87773a8f4ae617f41b1eb9c154b2905f7bd6"
+dependencies = [
+ "memchr",
+ "version_check 0.1.5",
+]
+
+[[package]]
+name = "nr2"
+version = "0.2.0"
+dependencies = [
+ "arr_macro",
+ "arrayvec",
+ "bench_utils",
+ "chashmap",
+ "clap",
+ "crossbeam-queue",
+ "crossbeam-skiplist",
+ "crossbeam-utils 0.8.10",
+ "dashmap",
+ "env_logger 0.9.0",
+ "evmap",
+ "flurry",
+ "futures",
+ "hashbrown 0.11.2",
+ "log",
+ "loom",
+ "nrfs",
+ "parking_lot 0.12.1",
+ "rand 0.8.5",
+ "static_assertions",
+ "tokio",
+ "urcu-sys",
+ "x86 0.51.0",
+ "zipf",
+]
+
+[[package]]
+name = "nrfs"
+version = "0.0.1"
+source = "git+https://github.com/ankit-iitb/nrfs.git?branch=master#2b688ce16e762c79f1ba6809954de79ad5ba10a1"
+dependencies = [
+ "arr_macro",
+ "bitflags",
+ "crossbeam-utils 0.8.10",
+ "custom_error_core",
+ "hashbrown 0.12.3",
+ "hwloc2",
+ "log",
+ "spin 0.9.8",
+ "static_assertions",
+ "x86 0.49.0",
+]
+
 [[package]]
 name = "nrk"
 version = "0.0.4"
@@ -1150,7 +1597,7 @@ dependencies = [
  "cnr",
  "core2",
  "crossbeam-queue",
- "crossbeam-utils",
+ "crossbeam-utils 0.8.10",
  "cstr_core",
  "csv",
  "ctor",
@@ -1172,8 +1619,9 @@ dependencies = [
  "log",
  "logos",
  "memfile",
- "memoffset",
+ "memoffset 0.6.5",
  "node-replication",
+ "nr2",
  "once_cell",
  "proptest 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "rand 0.8.5",
@@ -1189,7 +1637,7 @@ dependencies = [
  "testutils",
  "uefi",
  "vmxnet3",
- "which",
+ "which 4.2.5",
  "x86 0.52.0",
 ]
 
@@ -1276,7 +1724,27 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "149d8f5b97f3c1133e3cfcd8886449959e856b557ff281e292b733d7c69e005e"
 dependencies = [
  "owning_ref",
- "parking_lot_core",
+ "parking_lot_core 0.2.14",
+]
+
+[[package]]
+name = "parking_lot"
+version = "0.10.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d3a704eb390aafdc107b0e392f56a82b668e3a71366993b5340f5833fd62505e"
+dependencies = [
+ "lock_api 0.3.4",
+ "parking_lot_core 0.7.3",
+]
+
+[[package]]
+name = "parking_lot"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f"
+dependencies = [
+ "lock_api 0.4.7",
+ "parking_lot_core 0.9.8",
 ]
 
 [[package]]
@@ -1287,10 +1755,37 @@ checksum = "4db1a8ccf734a7bce794cc19b3df06ed87ab2f3907036b693c68f56b4d4537fa"
 dependencies = [
  "libc 0.2.126",
  "rand 0.4.6",
- "smallvec",
+ "smallvec 0.6.14",
  "winapi 0.3.9",
 ]
 
+[[package]]
+name = "parking_lot_core"
+version = "0.7.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b93f386bb233083c799e6e642a9d73db98c24a5deeb95ffc85bf281255dffc98"
+dependencies = [
+ "cfg-if 0.1.10",
+ "cloudabi",
+ "libc 0.2.126",
+ "redox_syscall 0.1.57",
+ "smallvec 1.11.0",
+ "winapi 0.3.9",
+]
+
+[[package]]
+name = "parking_lot_core"
+version = "0.9.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93f00c865fe7cabf650081affecd3871070f26767e7b2070a3ffae14c654b447"
+dependencies = [
+ "cfg-if 1.0.0",
+ "libc 0.2.126",
+ "redox_syscall 0.3.5",
+ "smallvec 1.11.0",
+ "windows-targets 0.48.1",
+]
+
 [[package]]
 name = "paste"
 version = "1.0.7"
@@ -1306,6 +1801,12 @@ dependencies = [
  "tracing",
 ]
 
+[[package]]
+name = "peeking_take_while"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099"
+
 [[package]]
 name = "phf"
 version = "0.10.1"
@@ -1350,6 +1851,12 @@ version = "0.2.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116"
 
+[[package]]
+name = "pin-utils"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
+
 [[package]]
 name = "pkg-config"
 version = "0.3.25"
@@ -1382,9 +1889,9 @@ checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5"
 
 [[package]]
 name = "proc-macro2"
-version = "1.0.40"
+version = "1.0.66"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dd96a1e8ed2596c337f8eae5f24924ec83f5ad5ab21ea8e455d3566c69fbcaf7"
+checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9"
 dependencies = [
  "unicode-ident",
 ]
@@ -1450,9 +1957,9 @@ checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3"
 
 [[package]]
 name = "quote"
-version = "1.0.20"
+version = "1.0.32"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3bcdf212e9776fbcb2d23ab029360416bb1706b1aea2d1a5ba002727cbcab804"
+checksum = "50f3b39ccfb720540debaa0164757101c08ecb8d326b15358ce76a62c7e85965"
 dependencies = [
  "proc-macro2",
 ]
@@ -1605,6 +2112,12 @@ dependencies = [
  "rand_core 0.3.1",
 ]
 
+[[package]]
+name = "redox_syscall"
+version = "0.1.57"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce"
+
 [[package]]
 name = "redox_syscall"
 version = "0.2.13"
@@ -1614,6 +2127,15 @@ dependencies = [
  "bitflags",
 ]
 
+[[package]]
+name = "redox_syscall"
+version = "0.3.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29"
+dependencies = [
+ "bitflags",
+]
+
 [[package]]
 name = "regex"
 version = "1.5.6"
@@ -1630,6 +2152,9 @@ name = "regex-automata"
 version = "0.1.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
+dependencies = [
+ "regex-syntax",
+]
 
 [[package]]
 name = "regex-syntax"
@@ -1651,7 +2176,7 @@ name = "rexpect"
 version = "0.4.0"
 source = "git+https://github.com/gz/rexpect.git?branch=eof-nix#de1352b82c52a9466a5c3b417abe02356c48744c"
 dependencies = [
- "error-chain",
+ "error-chain 0.12.4",
  "nix",
  "regex",
  "tempfile",
@@ -1717,6 +2242,12 @@ version = "0.1.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7ef03e0a2b150c7a90d01faf6254c9c48a41e95fb2a8c2ac1c6f0d2b9aefc342"
 
+[[package]]
+name = "rustc-hash"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
+
 [[package]]
 name = "rustc_version"
 version = "0.2.3"
@@ -1746,9 +2277,15 @@ dependencies = [
  "io-lifetimes",
  "libc 0.2.126",
  "linux-raw-sys",
- "windows-sys",
+ "windows-sys 0.36.1",
 ]
 
+[[package]]
+name = "rustversion"
+version = "1.0.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4"
+
 [[package]]
 name = "rusty-fork"
 version = "0.3.0"
@@ -1767,6 +2304,12 @@ version = "1.0.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f3f6f92acf49d1b98f7a81226834412ada05458b7364277387724a237f062695"
 
+[[package]]
+name = "scoped-tls"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294"
+
 [[package]]
 name = "scopeguard"
 version = "1.1.0"
@@ -1821,7 +2364,31 @@ checksum = "1f26faba0c3959972377d3b2d306ee9f71faee9714294e41bb777f83f88578be"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 1.0.98",
+]
+
+[[package]]
+name = "sharded-slab"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "900fba806f70c630b0a382d0d825e17a0f19fcd059a2ade1ff237bcddf446b31"
+dependencies = [
+ "lazy_static",
+]
+
+[[package]]
+name = "shlex"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7fdf1b9db47230893d76faad238fd6097fd6d6a9245cd7a4d90dbd639536bbd2"
+
+[[package]]
+name = "signal-hook-registry"
+version = "1.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d8229b473baa5980ac72ef434c4415e70c4b5e71b423043adb4ba059f89c99a1"
+dependencies = [
+ "libc 0.2.126",
 ]
 
 [[package]]
@@ -1830,6 +2397,15 @@ version = "0.3.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de"
 
+[[package]]
+name = "slab"
+version = "0.4.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6528351c9bc8ab22353f9d776db39a20288e8d6c37ef8cfe3317cf875eecfc2d"
+dependencies = [
+ "autocfg",
+]
+
 [[package]]
 name = "slabmalloc"
 version = "0.10.0"
@@ -1848,6 +2424,12 @@ dependencies = [
  "maybe-uninit",
 ]
 
+[[package]]
+name = "smallvec"
+version = "1.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "62bb4feee49fdd9f707ef802e22365a35de4b7b299de4763d44bfea899442ff9"
+
 [[package]]
 name = "smoltcp"
 version = "0.7.5"
@@ -1872,6 +2454,16 @@ dependencies = [
  "managed 0.8.0",
 ]
 
+[[package]]
+name = "socket2"
+version = "0.4.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "02e2d2db9033d13a1567121ddd7a095ee144db4e1ca1b1bda3419bc0da294ebd"
+dependencies = [
+ "libc 0.2.126",
+ "winapi 0.3.9",
+]
+
 [[package]]
 name = "spin"
 version = "0.5.2"
@@ -1884,7 +2476,7 @@ version = "0.9.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
 dependencies = [
- "lock_api",
+ "lock_api 0.4.7",
 ]
 
 [[package]]
@@ -1899,6 +2491,12 @@ version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
 
+[[package]]
+name = "strsim"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
+
 [[package]]
 name = "syn"
 version = "1.0.98"
@@ -1910,6 +2508,17 @@ dependencies = [
  "unicode-ident",
 ]
 
+[[package]]
+name = "syn"
+version = "2.0.28"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "04361975b3f5e348b2189d8dc55bc942f278b2d482a6a0365de5bdd62d351567"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
 [[package]]
 name = "tempdir"
 version = "0.3.7"
@@ -1929,7 +2538,7 @@ dependencies = [
  "cfg-if 1.0.0",
  "fastrand",
  "libc 0.2.126",
- "redox_syscall",
+ "redox_syscall 0.2.13",
  "remove_dir_all",
  "winapi 0.3.9",
 ]
@@ -1963,6 +2572,62 @@ dependencies = [
  "serde",
 ]
 
+[[package]]
+name = "textwrap"
+version = "0.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060"
+dependencies = [
+ "unicode-width",
+]
+
+[[package]]
+name = "thread_local"
+version = "1.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3fdd6f064ccff2d6567adcb3873ca630700f00b5ad3f060c25b5dcfd9a4ce152"
+dependencies = [
+ "cfg-if 1.0.0",
+ "once_cell",
+]
+
+[[package]]
+name = "tokio"
+version = "1.26.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "03201d01c3c27a29c8a5cee5b55a93ddae1ccf6f08f65365c2c918f8c1b76f64"
+dependencies = [
+ "autocfg",
+ "bytes",
+ "libc 0.2.126",
+ "memchr",
+ "mio",
+ "num_cpus",
+ "parking_lot 0.12.1",
+ "pin-project-lite",
+ "signal-hook-registry",
+ "socket2",
+ "tokio-macros",
+ "windows-sys 0.45.0",
+]
+
+[[package]]
+name = "tokio-macros"
+version = "1.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d266c00fde287f55d3f1c3e96c500c362a2b8c695076ec180f27918820bc6df8"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 1.0.98",
+]
+
+[[package]]
+name = "toml"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "736b60249cb25337bc196faa43ee12c705e426f3d55c214d73a4e7be06f92cb4"
+
 [[package]]
 name = "tracing"
 version = "0.1.35"
@@ -1983,7 +2648,7 @@ checksum = "cc6b8ad3567499f98a1db7a752b07a7c8c7c7c34c332ec00effb2b0027974b7c"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 1.0.98",
 ]
 
 [[package]]
@@ -1993,6 +2658,36 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7b7358be39f2f274f322d2aaed611acc57f382e8eb1e5b48cb9ae30933495ce7"
 dependencies = [
  "once_cell",
+ "valuable",
+]
+
+[[package]]
+name = "tracing-log"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "78ddad33d2d10b1ed7eb9d1f518a5674713876e97e5bb9b7345a7984fbb4f922"
+dependencies = [
+ "lazy_static",
+ "log",
+ "tracing-core",
+]
+
+[[package]]
+name = "tracing-subscriber"
+version = "0.3.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "60db860322da191b40952ad9affe65ea23e7dd6a5c442c2c42865810c6ab8e6b"
+dependencies = [
+ "ansi_term",
+ "matchers",
+ "once_cell",
+ "regex",
+ "sharded-slab",
+ "smallvec 1.11.0",
+ "thread_local",
+ "tracing",
+ "tracing-core",
+ "tracing-log",
 ]
 
 [[package]]
@@ -2030,7 +2725,7 @@ checksum = "a9271b66bf83671563773e54b178f1022ac2dab87dc197f80be51885a5e1a2f4"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 1.0.98",
 ]
 
 [[package]]
@@ -2050,12 +2745,46 @@ version = "1.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5bd2fe26506023ed7b5e1e315add59d6f584c621d037f9368fea9cfb988f368c"
 
+[[package]]
+name = "unicode-width"
+version = "0.1.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b"
+
+[[package]]
+name = "urcu-sys"
+version = "0.0.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fff1711e4a29f9bad323402d0cde3f5977f9fd131bb937625665a480547871b5"
+dependencies = [
+ "bindgen",
+ "metadeps",
+]
+
+[[package]]
+name = "valuable"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d"
+
 [[package]]
 name = "vcell"
 version = "0.1.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "77439c1b53d2303b20d9459b1ade71a83c716e3f9c34f3228c00e6f185d6c002"
 
+[[package]]
+name = "vec_map"
+version = "0.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191"
+
+[[package]]
+name = "version_check"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "914b1a6776c4c929a602fafd8bc742e06365d4bcbe48c30f9cca5824f70dc9dd"
+
 [[package]]
 name = "version_check"
 version = "0.9.4"
@@ -2068,7 +2797,7 @@ version = "0.1.0"
 dependencies = [
  "arrayvec",
  "bitflags",
- "crossbeam-utils",
+ "crossbeam-utils 0.8.10",
  "cstr_core",
  "hashbrown 0.11.2",
  "kpi",
@@ -2134,6 +2863,15 @@ version = "0.11.0+wasi-snapshot-preview1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
 
+[[package]]
+name = "which"
+version = "3.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d011071ae14a2f6671d0b74080ae0cd8ebf3a6f8c9589a2cd45f23126fe29724"
+dependencies = [
+ "libc 0.2.126",
+]
+
 [[package]]
 name = "which"
 version = "4.2.5"
@@ -2188,49 +2926,190 @@ version = "0.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
 
+[[package]]
+name = "windows"
+version = "0.48.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f"
+dependencies = [
+ "windows-targets 0.48.1",
+]
+
 [[package]]
 name = "windows-sys"
 version = "0.36.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2"
 dependencies = [
- "windows_aarch64_msvc",
- "windows_i686_gnu",
- "windows_i686_msvc",
- "windows_x86_64_gnu",
- "windows_x86_64_msvc",
+ "windows_aarch64_msvc 0.36.1",
+ "windows_i686_gnu 0.36.1",
+ "windows_i686_msvc 0.36.1",
+ "windows_x86_64_gnu 0.36.1",
+ "windows_x86_64_msvc 0.36.1",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.45.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0"
+dependencies = [
+ "windows-targets 0.42.2",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.48.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9"
+dependencies = [
+ "windows-targets 0.48.1",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.42.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071"
+dependencies = [
+ "windows_aarch64_gnullvm 0.42.2",
+ "windows_aarch64_msvc 0.42.2",
+ "windows_i686_gnu 0.42.2",
+ "windows_i686_msvc 0.42.2",
+ "windows_x86_64_gnu 0.42.2",
+ "windows_x86_64_gnullvm 0.42.2",
+ "windows_x86_64_msvc 0.42.2",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.48.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "05d4b17490f70499f20b9e791dcf6a299785ce8af4d709018206dc5b4953e95f"
+dependencies = [
+ "windows_aarch64_gnullvm 0.48.0",
+ "windows_aarch64_msvc 0.48.0",
+ "windows_i686_gnu 0.48.0",
+ "windows_i686_msvc 0.48.0",
+ "windows_x86_64_gnu 0.48.0",
+ "windows_x86_64_gnullvm 0.48.0",
+ "windows_x86_64_msvc 0.48.0",
 ]
 
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.42.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8"
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.48.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc"
+
 [[package]]
 name = "windows_aarch64_msvc"
 version = "0.36.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47"
 
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.42.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.48.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3"
+
 [[package]]
 name = "windows_i686_gnu"
 version = "0.36.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6"
 
+[[package]]
+name = "windows_i686_gnu"
+version = "0.42.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.48.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241"
+
 [[package]]
 name = "windows_i686_msvc"
 version = "0.36.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024"
 
+[[package]]
+name = "windows_i686_msvc"
+version = "0.42.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.48.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00"
+
 [[package]]
 name = "windows_x86_64_gnu"
 version = "0.36.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1"
 
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.42.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.48.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.42.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.48.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953"
+
 [[package]]
 name = "windows_x86_64_msvc"
 version = "0.36.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680"
 
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.42.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.48.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a"
+
 [[package]]
 name = "x86"
 version = "0.47.0"
@@ -2242,6 +3121,28 @@ dependencies = [
  "raw-cpuid",
 ]
 
+[[package]]
+name = "x86"
+version = "0.49.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ae0df8874628b15fdefccbd5c6b06766ec9ec2d6cd30d6d2f82245343358c50a"
+dependencies = [
+ "bit_field",
+ "bitflags",
+ "raw-cpuid",
+]
+
+[[package]]
+name = "x86"
+version = "0.51.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fa7477e4af519a58818c1f0ab4b149d0ce3304e625be8fe56c8484cac50b0532"
+dependencies = [
+ "bit_field",
+ "bitflags",
+ "raw-cpuid",
+]
+
 [[package]]
 name = "x86"
 version = "0.52.0"
@@ -2262,8 +3163,23 @@ dependencies = [
  "zero",
 ]
 
+[[package]]
+name = "yaml-rust"
+version = "0.3.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e66366e18dc58b46801afbf2ca7661a9f59cc8c5962c29892b6039b4f86fa992"
+
 [[package]]
 name = "zero"
 version = "0.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5f1bc8a6b2005884962297587045002d8cfb8dcec9db332f4ca216ddc5de82c5"
+
+[[package]]
+name = "zipf"
+version = "7.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "835688a7a1b5d2dfaeb5b7e1b4cfb979e7095a70cd1c72fe083f4904ef3e995e"
+dependencies = [
+ "rand 0.8.5",
+]
diff --git a/Cargo.toml b/Cargo.toml
index b2974a5f9..2c6c0ece2 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -10,6 +10,7 @@ members = [
     "lib/lineup",
     "lib/node-replication/cnr",
     "lib/node-replication/nr",
+    "lib/node-replication2/node-replication",
     "lib/rpc",
     "lib/vibrio",
     "lib/vmxnet3",
diff --git a/kernel/Cargo.toml b/kernel/Cargo.toml
index d91aeae3c..7656474c0 100644
--- a/kernel/Cargo.toml
+++ b/kernel/Cargo.toml
@@ -15,7 +15,8 @@ resolver = "2"
 [dependencies]
 # Our own dependencies:
 apic = { path = "../lib/apic/" }
-node-replication = { path = "../lib/node-replication/nr" } #"0.1.1"
+node-replication = { path = "../lib/node-replication/nr" }
+nr2 = { path = "../lib/node-replication2/node-replication" }
 cnr = { path = "../lib/node-replication/cnr" }
 kpi = { path = "../lib/kpi" }
 rpc = { path = "../lib/rpc", optional = true }
diff --git a/kernel/run.py b/kernel/run.py
index a1fa5d4b5..cde5a351c 100644
--- a/kernel/run.py
+++ b/kernel/run.py
@@ -769,8 +769,7 @@ def configure_dcm_scheduler(args):
         # Download jar if necessary
         if not os.path.exists(dcm_path):
             subprocess.run("wget https://github.com/hunhoffe/nrk-dcm-scheduler/releases/download/release-{}/{} -P {}".format(
-                DCM_SCHEDULER_VERSION, dcm_jar, jar_dir), shell=True, check=True, timeout=10)
-
+                DCM_SCHEDULER_VERSION, dcm_jar, jar_dir), shell=True, check=True, timeout=120)
     # Create consistent symlink location for the DCM scheduler jar
     if os.path.exists(symlink_jar_path) or os.path.islink(symlink_jar_path):
         os.unlink(symlink_jar_path)
diff --git a/kernel/src/arch/unix/kcb.rs b/kernel/src/arch/unix/kcb.rs
index d9ef577e3..bc2f0cd30 100644
--- a/kernel/src/arch/unix/kcb.rs
+++ b/kernel/src/arch/unix/kcb.rs
@@ -7,7 +7,7 @@ use alloc::sync::Arc;
 use core::any::Any;
 
 use arrayvec::ArrayVec;
-use node_replication::{Replica, ReplicaToken};
+use nr2::nr::{Replica, ReplicaToken};
 
 use crate::error::KError;
 use crate::memory::mcache::FrameCacheEarly;
diff --git a/kernel/src/arch/unix/mod.rs b/kernel/src/arch/unix/mod.rs
index db26933ba..50b34401b 100644
--- a/kernel/src/arch/unix/mod.rs
+++ b/kernel/src/arch/unix/mod.rs
@@ -10,7 +10,7 @@ use cnr::Replica as MlnrReplica;
 use ctor::ctor;
 use fallible_collections::TryClone;
 use log::{debug, info};
-use node_replication::{Log, Replica};
+use nr2::nr::{Log, Replica};
 use x86::current::paging::HUGE_PAGE_SIZE;
 
 use crate::fs::cnrfs::MlnrKernelNode;
diff --git a/kernel/src/arch/unix/process.rs b/kernel/src/arch/unix/process.rs
index 089c80c4f..7ab2829f7 100644
--- a/kernel/src/arch/unix/process.rs
+++ b/kernel/src/arch/unix/process.rs
@@ -15,7 +15,7 @@ use arrayvec::ArrayVec;
 use kpi::process::FrameId;
 use lazy_static::lazy_static;
 
-use node_replication::{Dispatch, Log, Replica};
+use nr2::nr::{Dispatch, Log, Replica};
 
 use crate::arch::kcb::get_kcb;
 use crate::error::{KError, KResult};
diff --git a/kernel/src/arch/x86_64/coreboot.rs b/kernel/src/arch/x86_64/coreboot.rs
index f7e4b2a5e..e41245618 100644
--- a/kernel/src/arch/x86_64/coreboot.rs
+++ b/kernel/src/arch/x86_64/coreboot.rs
@@ -8,6 +8,7 @@
 
 use alloc::sync::Arc;
 use alloc::vec::Vec;
+use core::num::NonZeroUsize;
 use core::sync::atomic::{AtomicBool, Ordering};
 
 use apic::ApicDriver;
@@ -17,7 +18,7 @@ use fallible_collections::FallibleVecGlobal;
 use fallible_collections::TryClone;
 use log::debug;
 use log::trace;
-use node_replication::{Log, Replica};
+use nr2::nr::{NodeReplicated};
 use x86::apic::ApicId;
 use x86::current::paging::PAddr;
 
@@ -29,7 +30,6 @@ use crate::memory::global::GlobalMemory;
 use crate::memory::vspace::MapAction;
 use crate::memory::Frame;
 use crate::nr::KernelNode;
-use crate::nr::Op;
 use crate::round_up;
 use crate::stack::OwnedStack;
 use crate::stack::Stack;
@@ -57,8 +57,7 @@ pub(crate) struct AppCoreArgs {
     pub(super) global_pmem: &'static GlobalMemory,
     pub(super) thread: atopology::ThreadId,
     pub(super) node: atopology::NodeId,
-    pub(super) _log: Arc<Log<'static, Op>>,
-    pub(super) replica: Arc<Replica<'static, KernelNode>>,
+    pub(super) replica: Arc<NodeReplicated<KernelNode>>,
     pub(super) fs_replica: Option<Arc<MlnrReplica<'static, MlnrKernelNode>>>,
 }
 
@@ -333,8 +332,7 @@ pub(crate) unsafe fn initialize<A>(
 ///  - Initialized topology
 ///  - Local APIC driver
 pub(super) fn boot_app_cores(
-    log: Arc<Log<'static, Op>>,
-    bsp_replica: Arc<Replica<'static, KernelNode>>,
+    kernel_node: Arc<NodeReplicated<KernelNode>>,
     fs_logs: Vec<Arc<MlnrLog<'static, Modify>>>,
     fs_replica: Option<Arc<MlnrReplica<'static, MlnrKernelNode>>>,
 ) {
@@ -347,28 +345,23 @@ pub(super) fn boot_app_cores(
 
     // Let's go with one replica per NUMA node for now:
     let numa_nodes = core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes());
-
-    let mut replicas: Vec<Arc<Replica<'static, KernelNode>>> =
-        Vec::try_with_capacity(numa_nodes).expect("Not enough memory to initialize system");
+    let numa_nodes = NonZeroUsize::new(numa_nodes).expect("At least one NUMA node");
+         
     let mut fs_replicas: Vec<Arc<MlnrReplica<'static, MlnrKernelNode>>> =
-        Vec::try_with_capacity(numa_nodes).expect("Not enough memory to initialize system");
+        Vec::try_with_capacity(numa_nodes.get()).expect("Not enough memory to initialize system");
 
     // Push the replica for node 0
-    debug_assert!(replicas.capacity() >= 1, "No re-allocation.");
-    replicas.push(bsp_replica);
     if let Some(node_0_fs_replica) = fs_replica {
         debug_assert!(fs_replicas.capacity() >= 1, "No re-allocation.");
         fs_replicas.push(node_0_fs_replica);
     }
 
     let pcm = kcb::per_core_mem();
+    let numa_nodes = core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes());
+
     for node in 1..numa_nodes {
         pcm.set_mem_affinity(node as atopology::NodeId)
             .expect("Can't set affinity");
-
-        debug_assert!(replicas.capacity() > node, "No re-allocation.");
-        replicas.push(Replica::<'static, KernelNode>::new(&log));
-
         if fs_replicas.len() > 0 {
             debug_assert!(fs_replicas.capacity() > node, "No re-allocation.");
             fs_replicas.push(MlnrReplica::new(
@@ -434,8 +427,7 @@ pub(super) fn boot_app_cores(
             global_memory,
             global_pmem,
             thread: thread.id,
-            _log: log.clone(),
-            replica: replicas[node as usize]
+            replica: kernel_node
                 .try_clone()
                 .expect("Not enough memory to initialize system"),
             fs_replica: thread_fs_replica,
@@ -474,5 +466,4 @@ pub(super) fn boot_app_cores(
         pcm.set_mem_affinity(0).expect("Can't set affinity");
     }
 
-    core::mem::forget(replicas);
 }
diff --git a/kernel/src/arch/x86_64/mod.rs b/kernel/src/arch/x86_64/mod.rs
index 8d8ff2290..e557331b0 100644
--- a/kernel/src/arch/x86_64/mod.rs
+++ b/kernel/src/arch/x86_64/mod.rs
@@ -23,6 +23,7 @@ use alloc::sync::Arc;
 use core::mem::transmute;
 use core::sync::atomic::AtomicBool;
 use core::sync::atomic::Ordering;
+use core::num::NonZeroUsize;
 
 #[cfg(feature = "rackscale")]
 use crate::nr::NR_LOG;
@@ -31,10 +32,8 @@ use cnr::Replica as MlnrReplica;
 use fallible_collections::TryClone;
 use klogger::sprint;
 use log::{debug, error, info};
-use node_replication::Replica;
+use nr2::nr::{AffinityChange, NodeReplicated};
 use x86::{controlregs, cpuid};
-#[cfg(not(feature = "rackscale"))]
-use {crate::nr::Op, node_replication::Log};
 
 use crate::cmdline::CommandLineArguments;
 use crate::fs::cnrfs::MlnrKernelNode;
@@ -194,7 +193,7 @@ pub(crate) fn start_app_core(args: Arc<AppCoreArgs>, initialized: &AtomicBool) {
     serial::init();
 
     {
-        let local_ridx = args.replica.register().unwrap();
+        let local_ridx = args.replica.register(args.node).unwrap();
         crate::nr::NR_REPLICA.call_once(|| (args.replica.clone(), local_ridx));
 
         #[cfg(feature = "rackscale")]
@@ -244,9 +243,6 @@ pub(crate) fn start_app_core(args: Arc<AppCoreArgs>, initialized: &AtomicBool) {
 #[start]
 #[no_mangle]
 fn _start(argc: isize, _argv: *const *const u8) -> isize {
-    #[cfg(not(feature = "rackscale"))]
-    use crate::memory::LARGE_PAGE_SIZE;
-
     // Very early init:
     sprint!("\r\n");
     sprint!("NRK booting on x86_64...\r\n");
@@ -453,16 +449,27 @@ fn _start(argc: isize, _argv: *const *const u8) -> isize {
     // Set-up interrupt routing drivers (I/O APIC controllers)
     irq::ioapic_initialize();
 
-    // Create the global operation log and first replica and store it (needs
-    // TLS)
+
+    // Let's go with one replica per NUMA node for now:
+    let numa_nodes = core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes());
+    let numa_nodes = NonZeroUsize::new(numa_nodes).expect("At least one NUMA node");
+
     #[cfg(not(feature = "rackscale"))]
-    let (log, bsp_replica) = {
-        let log: Arc<Log<Op>> = Arc::try_new(Log::<Op>::new(LARGE_PAGE_SIZE))
-            .expect("Not enough memory to initialize system");
-        let bsp_replica = Replica::<KernelNode>::new(&log);
-        let local_ridx = bsp_replica.register().unwrap();
-        crate::nr::NR_REPLICA.call_once(|| (bsp_replica.clone(), local_ridx));
-        (log, bsp_replica)
+    let kernel_node = {
+        // Create the global operation log and first replica and store it (needs
+        // TLS)
+        let kernel_node: Arc<NodeReplicated<KernelNode>> = Arc::try_new(NodeReplicated::new(numa_nodes, |afc: AffinityChange| { 
+            let pcm = kcb::per_core_mem();
+            match afc {
+                AffinityChange::Replica(r) => pcm.set_mem_affinity(r).expect("Can't set affinity"),
+                AffinityChange::Revert(orig) => pcm.set_mem_affinity(orig).expect("Can't set affinity"),
+            }
+            return 0; // xxx
+            }).expect("Not enough memory to initialize system")).expect("Not enough memory to initialize system");
+
+        let local_ridx = kernel_node.register(0).unwrap();
+        crate::nr::NR_REPLICA.call_once(|| (kernel_node.clone(), local_ridx));
+        kernel_node
     };
 
     // Starting to initialize file-system
@@ -540,7 +547,7 @@ fn _start(argc: isize, _argv: *const *const u8) -> isize {
     }
 
     // Bring up the rest of the system (needs topology, APIC, and global memory)
-    coreboot::boot_app_cores(log.clone(), bsp_replica, fs_logs, fs_replica);
+    coreboot::boot_app_cores(kernel_node, fs_logs, fs_replica);
 
     // Done with initialization, now we go in
     // the arch-independent part:
diff --git a/kernel/src/arch/x86_64/rackscale/get_shmem_structure.rs b/kernel/src/arch/x86_64/rackscale/get_shmem_structure.rs
index 3c2a5f997..42aefaed5 100644
--- a/kernel/src/arch/x86_64/rackscale/get_shmem_structure.rs
+++ b/kernel/src/arch/x86_64/rackscale/get_shmem_structure.rs
@@ -9,7 +9,7 @@ use core2::io::Write;
 
 use atopology::NodeId;
 use crossbeam_queue::ArrayQueue;
-use node_replication::{Dispatch, Log};
+use nr2::nr::{Dispatch, Log};
 use rpc::rpc::*;
 
 use super::client_state::CLIENT_STATE;
diff --git a/kernel/src/environment.rs b/kernel/src/environment.rs
index d13bc50a7..449be8ed4 100644
--- a/kernel/src/environment.rs
+++ b/kernel/src/environment.rs
@@ -58,7 +58,7 @@ pub fn init_topology() {
         "We don't support as many replicas as we have NUMA nodes."
     );
     assert!(
-        node_replication::MAX_REPLICAS_PER_LOG >= nodes,
+        nr2::nr::MAX_REPLICAS_PER_LOG >= nodes,
         "We don't support as many replicas as we have NUMA nodes."
     );
     assert!(
diff --git a/kernel/src/integration_tests.rs b/kernel/src/integration_tests.rs
index 7b93fe64a..4b132d461 100644
--- a/kernel/src/integration_tests.rs
+++ b/kernel/src/integration_tests.rs
@@ -422,7 +422,7 @@ fn coreboot_nrlog() {
     use core::sync::atomic::{AtomicBool, Ordering};
     use klogger::sprintln;
     use log::info;
-    use node_replication::Log;
+    use nr2::Log;
 
     let log: Arc<Log<usize>> =
         Arc::try_new(Log::<usize>::new(1024 * 1024 * 1)).expect("Can't Arc this");
diff --git a/kernel/src/nr.rs b/kernel/src/nr.rs
index 7ba2929fa..b5554cc79 100644
--- a/kernel/src/nr.rs
+++ b/kernel/src/nr.rs
@@ -7,7 +7,7 @@ use core::fmt::Debug;
 use alloc::sync::Arc;
 use hashbrown::HashMap;
 use log::{error, trace};
-use node_replication::{Dispatch, Replica, ReplicaToken};
+use nr2::nr::{NodeReplicated, ThreadToken, Dispatch};
 use spin::Once;
 
 #[cfg(feature = "rackscale")]
@@ -19,18 +19,18 @@ use crate::process::{Pid, MAX_PROCESSES};
 
 /// Kernel scheduler / process mgmt. replica
 #[thread_local]
-pub(crate) static NR_REPLICA: Once<(Arc<Replica<'static, KernelNode>>, ReplicaToken)> = Once::new();
+pub(crate) static NR_REPLICA: Once<(Arc<NodeReplicated<KernelNode>>, ThreadToken)> = Once::new();
 
 // Base nr log. The rackscale controller needs to save a reference to this, so it can give
 // clones to client so they can create replicas of their own.
 #[cfg(feature = "rackscale")]
 lazy_static! {
-    pub(crate) static ref NR_LOG: Arc<node_replication::Log<'static, Op>> = {
+    pub(crate) static ref NR_LOG: Arc<nr2::nr::Log<Op>> = {
         if crate::CMDLINE
             .get()
             .map_or(false, |c| c.mode == crate::cmdline::Mode::Controller)
         {
-            use node_replication::Log;
+            use nr2::nr::Log;
             use crate::arch::kcb::per_core_mem;
             use crate::memory::{LARGE_PAGE_SIZE, shmem_affinity::local_shmem_affinity};
 
@@ -47,7 +47,7 @@ lazy_static! {
 
             log
         } else {
-            use node_replication::Log;
+            use nr2::nr::Log;
             use crate::memory::{paddr_to_kernel_vaddr, PAddr};
 
             use crate::arch::rackscale::get_shmem_structure::{rpc_get_shmem_structure, ShmemStructure};
@@ -99,6 +99,8 @@ pub(crate) struct CoreInfo {
     pub entry_point: VAddr,
 }
 
+
+#[derive(Debug, Clone)]
 pub(crate) struct KernelNode {
     process_map: HashMap<Pid, ()>,
     scheduler_map: HashMap<kpi::system::GlobalThreadId, CoreInfo>,

From 9eeb0f6f95e3d31ef381874b97fa5e906e0d1c92 Mon Sep 17 00:00:00 2001
From: Gerd Zellweger <mail@gerdzellweger.com>
Date: Thu, 10 Aug 2023 16:14:33 -0700
Subject: [PATCH 02/32] Add submodule.

Signed-off-by: Gerd Zellweger <mail@gerdzellweger.com>
---
 lib/node-replication2 | 1 +
 1 file changed, 1 insertion(+)
 create mode 160000 lib/node-replication2

diff --git a/lib/node-replication2 b/lib/node-replication2
new file mode 160000
index 000000000..f0a91fdca
--- /dev/null
+++ b/lib/node-replication2
@@ -0,0 +1 @@
+Subproject commit f0a91fdca08adfdd5ad8d11c2e78d7b2d852e512

From eb5c56c637ead37b7c29ad521a360c2e14bbc79e Mon Sep 17 00:00:00 2001
From: Gerd Zellweger <mail@gerdzellweger.com>
Date: Mon, 11 Sep 2023 00:20:05 -0700
Subject: [PATCH 03/32] Migrate process code to new nr library.

Signed-off-by: Gerd Zellweger <mail@gerdzellweger.com>
---
 kernel/src/arch/unix/mod.rs                 |  34 ++++--
 kernel/src/arch/unix/process.rs             |  80 ++++++------
 kernel/src/arch/x86_64/coreboot.rs          |   5 +-
 kernel/src/arch/x86_64/mod.rs               |  28 +++--
 kernel/src/arch/x86_64/process.rs           | 129 +++++++++++---------
 kernel/src/arch/x86_64/vspace/mod.rs        |   8 +-
 kernel/src/arch/x86_64/vspace/page_table.rs |  23 ++--
 kernel/src/integration_tests.rs             |   2 +-
 kernel/src/nr.rs                            |   3 +-
 kernel/src/nrproc.rs                        | 112 +++++------------
 kernel/src/process.rs                       |   2 +-
 11 files changed, 209 insertions(+), 217 deletions(-)

diff --git a/kernel/src/arch/unix/mod.rs b/kernel/src/arch/unix/mod.rs
index 50b34401b..a0d2fedad 100644
--- a/kernel/src/arch/unix/mod.rs
+++ b/kernel/src/arch/unix/mod.rs
@@ -3,6 +3,7 @@
 
 use alloc::boxed::Box;
 use alloc::sync::Arc;
+use core::num::NonZeroUsize;
 use core::sync::atomic::{AtomicBool, Ordering};
 
 use arrayvec::ArrayVec;
@@ -10,7 +11,7 @@ use cnr::Replica as MlnrReplica;
 use ctor::ctor;
 use fallible_collections::TryClone;
 use log::{debug, info};
-use nr2::nr::{Log, Replica};
+use nr2::nr::{AffinityChange, Log, NodeReplicated, Replica};
 use x86::current::paging::HUGE_PAGE_SIZE;
 
 use crate::fs::cnrfs::MlnrKernelNode;
@@ -102,14 +103,33 @@ fn init_setup() {
     unsafe { kcb::PER_CORE_MEMORY.set_global_mem(global_memory_static) };
     debug!("Memory allocation should work at this point...");
 
-    let log: Arc<Log<Op>> = Arc::try_new(Log::<Op>::new(LARGE_PAGE_SIZE))
+    let kernel_node = {
+        // Create the global operation log and first replica and store it (needs
+        // TLS)
+        let kernel_node: Arc<NodeReplicated<KernelNode>> = Arc::try_new(
+            NodeReplicated::new(NonZeroUsize::new(1).unwrap(), |afc: AffinityChange| {
+                return 0; // xxx
+            })
+            .expect("Not enough memory to initialize system"),
+        )
         .expect("Not enough memory to initialize system");
-    let bsp_replica = Replica::<KernelNode>::new(&log);
-    let local_ridx = bsp_replica
-        .register()
-        .expect("Failed to register with Replica.");
-    crate::nr::NR_REPLICA.call_once(|| (bsp_replica.clone(), local_ridx));
 
+        let local_ridx = kernel_node.register(0).unwrap();
+        crate::nr::NR_REPLICA.call_once(|| (kernel_node.clone(), local_ridx));
+        kernel_node
+    };
+
+    /*
+
+       let log: Arc<Log<Op>> = Arc::try_new(Log::<Op>::new(LARGE_PAGE_SIZE))
+           .expect("Not enough memory to initialize system");
+       let bsp_replica = Replica::<KernelNode>::new(&log);
+       let local_ridx = bsp_replica
+           .register()
+           .expect("Failed to register with Replica.");
+       crate::nr::NR_REPLICA.call_once(|| (bsp_replica.clone(), local_ridx));
+
+    */
     // Starting to initialize file-system
     let fs_logs = crate::fs::cnrfs::allocate_logs();
     // Construct the first replica
diff --git a/kernel/src/arch/unix/process.rs b/kernel/src/arch/unix/process.rs
index 7ab2829f7..2117d844f 100644
--- a/kernel/src/arch/unix/process.rs
+++ b/kernel/src/arch/unix/process.rs
@@ -8,14 +8,16 @@ use alloc::vec::Vec;
 use bootloader_shared::Module;
 use core::alloc::Allocator;
 use core::cell::RefCell;
+use core::num::NonZeroUsize;
 use core::ops::{Deref, DerefMut};
+use core::sync::atomic::{AtomicUsize, Ordering};
 use x86::current::paging::PAddr;
 
 use arrayvec::ArrayVec;
 use kpi::process::FrameId;
 use lazy_static::lazy_static;
 
-use nr2::nr::{Dispatch, Log, Replica};
+use nr2::nr::{AffinityChange, Dispatch, NodeReplicated, ThreadToken};
 
 use crate::arch::kcb::get_kcb;
 use crate::error::{KError, KResult};
@@ -63,41 +65,19 @@ pub(crate) fn swap_current_executor(_current_executor: Box<UnixThread>) -> Optio
 }
 
 lazy_static! {
-    pub(crate) static ref PROCESS_TABLE: ArrayVec<ArrayVec<Arc<Replica<'static, NrProcess<UnixProcess>>>, MAX_PROCESSES>, MAX_NUMA_NODES> = {
+    pub(crate) static ref PROCESS_TABLE: ArrayVec<Arc<NodeReplicated<NrProcess<UnixProcess>>>, MAX_PROCESSES> = {
+        debug_assert_eq!(*crate::environment::NODE_ID, 0, "Expect initialization to happen on node 0.");
         // Want at least one replica...
-        let numa_nodes = core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes());
-
-        let mut numa_cache = ArrayVec::new();
-        for _n in 0..numa_nodes {
-            let process_replicas = ArrayVec::new();
-            debug_assert!(!numa_cache.is_full(), "Ensured by loop range");
-            numa_cache.push(process_replicas)
-        }
+        let num_replicas = NonZeroUsize::new(core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes())).expect("At least one numa node");
 
+        let mut processes = ArrayVec::new();
         for pid in 0..MAX_PROCESSES {
-                let log = Arc::try_new(Log::<<NrProcess<UnixProcess> as Dispatch>::WriteOperation>::new(
-                    LARGE_PAGE_SIZE,
-                )).expect("Can't initialize processes, out of memory.");
-
-            let da = DA::new().expect("Can't initialize process deterministic memory allocator");
-            for node in 0..numa_nodes {
-                let pcm = super::kcb::per_core_mem();
-                assert!(pcm.set_mem_affinity(node as atopology::NodeId).is_ok());
-
-                debug_assert!(!numa_cache[node].is_full(), "Ensured by loop range");
-
-
-                let p = Box::try_new(UnixProcess::new(pid, Box::new(da.clone())).expect("Can't create process during init")).expect("Not enough memory to initialize processes");
-                let nrp = NrProcess::new(p, Box::new(da.clone()));
-
-                numa_cache[node].push(Replica::<NrProcess<UnixProcess>>::with_data(&log, nrp));
-
-                debug_assert_eq!(*crate::environment::NODE_ID, 0, "Expect initialization to happen on node 0.");
-                assert!(pcm.set_mem_affinity(0).is_ok());
-            }
+            processes.push(
+                Arc::try_new(NodeReplicated::<NrProcess<UnixProcess>>::new(num_replicas, |afc: AffinityChange| {
+                    return 0; // TODO(dynrep): Return error code
+                }).expect("Not enough memory to initialize system")).expect("Not enough memory to initialize system"));
         }
-
-        numa_cache
+        processes
     };
 }
 
@@ -108,10 +88,7 @@ impl crate::nrproc::ProcessManager for ArchProcessManagement {
 
     fn process_table(
         &self,
-    ) -> &'static ArrayVec<
-        ArrayVec<Arc<Replica<'static, NrProcess<Self::Process>>>, MAX_PROCESSES>,
-        MAX_NUMA_NODES,
-    > {
+    ) -> &'static ArrayVec<Arc<NodeReplicated<NrProcess<UnixProcess>>>, MAX_PROCESSES> {
         &super::process::PROCESS_TABLE
     }
 }
@@ -127,8 +104,37 @@ pub(crate) struct UnixProcess {
     pub frames: ArrayVec<Option<Frame>, MAX_FRAMES_PER_PROCESS>,
 }
 
+static NEXT_PID: AtomicUsize = AtomicUsize::new(0);
+
+impl Default for NrProcess<UnixProcess> {
+    fn default() -> Self {
+        let next_pid = NEXT_PID.fetch_add(1, Ordering::Relaxed);
+        NrProcess::new(
+            Box::try_new(
+                UnixProcess::new(next_pid as Pid).expect("Failed to set-up process during init"),
+            )
+            .expect("Failed to initialize process during init"),
+        )
+    }
+}
+
+impl Clone for UnixProcess {
+    fn clone(&self) -> Self {
+        unimplemented!("Clone not yet implemented for UnixProcess")
+        /*
+        UnixProcess {
+            pid: self.pid,
+            vspace: self.vspace.clone(),
+            fds: self.fds.clone(),
+            pinfo: self.pinfo.clone(),
+            frames: self.frames.clone(),
+        }
+         */
+    }
+}
+
 impl UnixProcess {
-    fn new(pid: Pid, _allocator: Box<dyn Allocator + Send + Sync>) -> Result<Self, KError> {
+    fn new(pid: Pid) -> Result<Self, KError> {
         Ok(UnixProcess {
             pid,
             vspace: VSpace::new(),
diff --git a/kernel/src/arch/x86_64/coreboot.rs b/kernel/src/arch/x86_64/coreboot.rs
index e41245618..e3cdfa9a4 100644
--- a/kernel/src/arch/x86_64/coreboot.rs
+++ b/kernel/src/arch/x86_64/coreboot.rs
@@ -18,7 +18,7 @@ use fallible_collections::FallibleVecGlobal;
 use fallible_collections::TryClone;
 use log::debug;
 use log::trace;
-use nr2::nr::{NodeReplicated};
+use nr2::nr::NodeReplicated;
 use x86::apic::ApicId;
 use x86::current::paging::PAddr;
 
@@ -346,7 +346,7 @@ pub(super) fn boot_app_cores(
     // Let's go with one replica per NUMA node for now:
     let numa_nodes = core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes());
     let numa_nodes = NonZeroUsize::new(numa_nodes).expect("At least one NUMA node");
-         
+
     let mut fs_replicas: Vec<Arc<MlnrReplica<'static, MlnrKernelNode>>> =
         Vec::try_with_capacity(numa_nodes.get()).expect("Not enough memory to initialize system");
 
@@ -465,5 +465,4 @@ pub(super) fn boot_app_cores(
         debug!("Core {:?} has started", thread.apic_id());
         pcm.set_mem_affinity(0).expect("Can't set affinity");
     }
-
 }
diff --git a/kernel/src/arch/x86_64/mod.rs b/kernel/src/arch/x86_64/mod.rs
index e557331b0..7e4a7e4ae 100644
--- a/kernel/src/arch/x86_64/mod.rs
+++ b/kernel/src/arch/x86_64/mod.rs
@@ -21,9 +21,9 @@
 
 use alloc::sync::Arc;
 use core::mem::transmute;
+use core::num::NonZeroUsize;
 use core::sync::atomic::AtomicBool;
 use core::sync::atomic::Ordering;
-use core::num::NonZeroUsize;
 
 #[cfg(feature = "rackscale")]
 use crate::nr::NR_LOG;
@@ -449,7 +449,6 @@ fn _start(argc: isize, _argv: *const *const u8) -> isize {
     // Set-up interrupt routing drivers (I/O APIC controllers)
     irq::ioapic_initialize();
 
-
     // Let's go with one replica per NUMA node for now:
     let numa_nodes = core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes());
     let numa_nodes = NonZeroUsize::new(numa_nodes).expect("At least one NUMA node");
@@ -458,14 +457,22 @@ fn _start(argc: isize, _argv: *const *const u8) -> isize {
     let kernel_node = {
         // Create the global operation log and first replica and store it (needs
         // TLS)
-        let kernel_node: Arc<NodeReplicated<KernelNode>> = Arc::try_new(NodeReplicated::new(numa_nodes, |afc: AffinityChange| { 
-            let pcm = kcb::per_core_mem();
-            match afc {
-                AffinityChange::Replica(r) => pcm.set_mem_affinity(r).expect("Can't set affinity"),
-                AffinityChange::Revert(orig) => pcm.set_mem_affinity(orig).expect("Can't set affinity"),
-            }
-            return 0; // xxx
-            }).expect("Not enough memory to initialize system")).expect("Not enough memory to initialize system");
+        let kernel_node: Arc<NodeReplicated<KernelNode>> = Arc::try_new(
+            NodeReplicated::new(numa_nodes, |afc: AffinityChange| {
+                let pcm = kcb::per_core_mem();
+                match afc {
+                    AffinityChange::Replica(r) => {
+                        pcm.set_mem_affinity(r).expect("Can't set affinity")
+                    }
+                    AffinityChange::Revert(orig) => {
+                        pcm.set_mem_affinity(orig).expect("Can't set affinity")
+                    }
+                }
+                return 0; // xxx
+            })
+            .expect("Not enough memory to initialize system"),
+        )
+        .expect("Not enough memory to initialize system");
 
         let local_ridx = kernel_node.register(0).unwrap();
         crate::nr::NR_REPLICA.call_once(|| (kernel_node.clone(), local_ridx));
@@ -508,6 +515,7 @@ fn _start(argc: isize, _argv: *const *const u8) -> isize {
     crate::pci::init();
 
     // Initialize processes
+    #[cfg(feature = "rackscale")]
     lazy_static::initialize(&process::PROCESS_LOGS);
 
     #[cfg(not(feature = "rackscale"))]
diff --git a/kernel/src/arch/x86_64/process.rs b/kernel/src/arch/x86_64/process.rs
index 54c76eb01..b830712ea 100644
--- a/kernel/src/arch/x86_64/process.rs
+++ b/kernel/src/arch/x86_64/process.rs
@@ -6,11 +6,11 @@ use alloc::collections::TryReserveError;
 use alloc::string::String;
 use alloc::sync::Arc;
 use alloc::vec::Vec;
-use core::alloc::Allocator;
 use core::arch::asm;
 use core::cell::RefCell;
 use core::cmp::PartialEq;
 use core::iter::Iterator;
+use core::sync::atomic::{AtomicUsize, Ordering};
 use core::{fmt, ptr};
 
 use arrayvec::ArrayVec;
@@ -20,12 +20,13 @@ use kpi::arch::SaveArea;
 use kpi::process::{FrameId, ELF_OFFSET, EXECUTOR_OFFSET};
 use lazy_static::lazy_static;
 use log::{debug, info, trace, warn};
+#[cfg(feature = "rackscale")]
 use node_replication::{Dispatch, Log, Replica};
+use nr2::nr::NodeReplicated;
 use x86::bits64::paging::*;
 use x86::bits64::rflags;
 use x86::{controlregs, Ring};
 
-use crate::arch::kcb::per_core_mem;
 use crate::error::{KError, KResult};
 use crate::fs::{fd::FileDescriptorEntry, MAX_FILES_PER_PROCESS};
 use crate::memory::vspace::{AddressSpace, MapAction};
@@ -69,6 +70,7 @@ pub(crate) fn current_pid() -> KResult<Pid> {
         .pid)
 }
 
+#[cfg(feature = "rackscale")]
 lazy_static! {
     pub(crate) static ref PROCESS_LOGS: Box<
         ArrayVec<
@@ -78,7 +80,6 @@ lazy_static! {
     > = {
 
 
-        #[cfg(feature = "rackscale")]
         if crate::CMDLINE
             .get()
             .map_or(false, |c| c.mode == crate::cmdline::Mode::Controller)
@@ -117,7 +118,6 @@ lazy_static! {
             process_logs
         };
 
-        #[cfg(feature = "rackscale")]
         if crate::CMDLINE
             .get()
             .map_or(false, |c| c.mode == crate::cmdline::Mode::Controller)
@@ -132,61 +132,49 @@ lazy_static! {
 }
 
 lazy_static! {
-    pub(crate) static ref PROCESS_TABLE: ArrayVec<
-        ArrayVec<Arc<Replica<'static, NrProcess<Ring3Process>>>, MAX_PROCESSES>,
-        MAX_NUMA_NODES,
-    > = create_process_table();
+    pub(crate) static ref PROCESS_TABLE: ArrayVec<Arc<NodeReplicated<NrProcess<Ring3Process>>>, MAX_PROCESSES> =
+        create_process_table();
 }
 
 #[cfg(not(feature = "rackscale"))]
-fn create_process_table(
-) -> ArrayVec<ArrayVec<Arc<Replica<'static, NrProcess<Ring3Process>>>, MAX_PROCESSES>, MAX_NUMA_NODES>
-{
-    use crate::memory::detmem::DA;
+fn create_process_table() -> ArrayVec<Arc<NodeReplicated<NrProcess<Ring3Process>>>, MAX_PROCESSES> {
+    use crate::arch::kcb;
+    use core::num::NonZeroUsize;
+    use nr2::nr::AffinityChange;
 
     // Want at least one replica...
-    let numa_nodes = core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes());
-
-    let mut numa_cache = ArrayVec::new();
-    for _n in 0..numa_nodes {
-        let process_replicas = ArrayVec::new();
-        debug_assert!(!numa_cache.is_full());
-        numa_cache.push(process_replicas)
-    }
-
-    for pid in 0..MAX_PROCESSES {
-        let allocator = DA::new().expect("Can't initialize process deterministic memory allocator");
+    let num_replicas =
+        NonZeroUsize::new(core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes())).unwrap();
+    let mut processes = ArrayVec::new();
 
-        for node in 0..numa_nodes {
-            debug_assert!(!numa_cache[node].is_full());
+    for _pid in 0..MAX_PROCESSES {
+        debug_assert_eq!(
+            *crate::environment::NODE_ID,
+            0,
+            "Expect initialization to happen on node 0."
+        );
 
-            let pcm = per_core_mem();
-            pcm.set_mem_affinity(node as atopology::NodeId)
-                .expect("Can't change affinity");
+        let process: Arc<NodeReplicated<NrProcess<Ring3Process>>> = Arc::try_new(
+            NodeReplicated::new(num_replicas, |afc: AffinityChange| {
+                let pcm = kcb::per_core_mem();
+                match afc {
+                    AffinityChange::Replica(r) => {
+                        pcm.set_mem_affinity(r).expect("Can't set affinity")
+                    }
+                    AffinityChange::Revert(orig) => {
+                        pcm.set_mem_affinity(orig).expect("Can't set affinity")
+                    }
+                }
+                return 0; // TODO(dynrep): Return error code
+            })
+            .expect("Not enough memory to initialize system"),
+        )
+        .expect("Not enough memory to initialize system");
 
-            let p = Box::try_new(
-                Ring3Process::new(pid, Box::new(allocator.clone()))
-                    .expect("Can't create process during init"),
-            )
-            .expect("Not enough memory to initialize processes");
-            let nrp = NrProcess::new(p, Box::new(allocator.clone()));
-
-            numa_cache[node].push(Replica::<NrProcess<Ring3Process>>::with_data(
-                &PROCESS_LOGS[pid],
-                nrp,
-            ));
-
-            pcm.set_mem_affinity(0 as atopology::NodeId)
-                .expect("Can't change affinity");
-            debug_assert_eq!(
-                *crate::environment::NODE_ID,
-                0,
-                "Expect initialization to happen on node 0."
-            );
-        }
+        processes.push(process)
     }
 
-    numa_cache
+    processes
 }
 
 #[cfg(feature = "rackscale")]
@@ -266,10 +254,7 @@ impl crate::nrproc::ProcessManager for ArchProcessManagement {
 
     fn process_table(
         &self,
-    ) -> &'static ArrayVec<
-        ArrayVec<Arc<Replica<'static, NrProcess<Self::Process>>>, MAX_PROCESSES>,
-        MAX_NUMA_NODES,
-    > {
+    ) -> &'static ArrayVec<Arc<NodeReplicated<NrProcess<Self::Process>>>, MAX_PROCESSES> {
         &*super::process::PROCESS_TABLE
     }
 }
@@ -1026,8 +1011,42 @@ pub(crate) struct Ring3Process {
     pub read_only_offset: VAddr,
 }
 
+static NEXT_PID: AtomicUsize = AtomicUsize::new(0);
+
+impl Default for NrProcess<Ring3Process> {
+    fn default() -> Self {
+        let next_pid = NEXT_PID.fetch_add(1, Ordering::Relaxed);
+        NrProcess::new(
+            Box::try_new(
+                Ring3Process::new(next_pid as Pid).expect("Failed to set-up process during init"),
+            )
+            .expect("Failed to initialize process during init"),
+        )
+    }
+}
+
+impl Clone for Ring3Process {
+    fn clone(&self) -> Self {
+        unimplemented!("Clone not implemented for Ring3Process")
+        /*Ring3Process {
+            pid: self.pid,
+            current_eid: self.current_eid,
+            vspace: self.vspace.clone(),
+            offset: self.offset,
+            pinfo: self.pinfo.clone(),
+            entry_point: self.entry_point,
+            executor_cache: self.executor_cache.clone(),
+            executor_offset: self.executor_offset,
+            fds: self.fds.clone(),
+            pfm: self.pfm.clone(),
+            writeable_sections: self.writeable_sections.clone(),
+            read_only_offset: self.read_only_offset,
+        }*/
+    }
+}
+
 impl Ring3Process {
-    fn new(pid: Pid, allocator: Box<dyn Allocator + Sync + Send>) -> Result<Self, KError> {
+    fn new(pid: Pid) -> Result<Self, KError> {
         const NONE_EXECUTOR: Option<Vec<Box<Ring3Executor>>> = None;
         #[cfg(not(feature = "rackscale"))]
         let executor_cache: ArrayVec<Option<Vec<Box<Ring3Executor>>>, MAX_NUMA_NODES> =
@@ -1051,7 +1070,7 @@ impl Ring3Process {
             pid: pid,
             current_eid: 0,
             offset: VAddr::from(ELF_OFFSET),
-            vspace: VSpace::new(allocator)?,
+            vspace: VSpace::new()?,
             entry_point: VAddr::from(0usize),
             executor_cache,
             executor_offset: VAddr::from(EXECUTOR_OFFSET),
diff --git a/kernel/src/arch/x86_64/vspace/mod.rs b/kernel/src/arch/x86_64/vspace/mod.rs
index 08c504004..42ea4effb 100644
--- a/kernel/src/arch/x86_64/vspace/mod.rs
+++ b/kernel/src/arch/x86_64/vspace/mod.rs
@@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0 OR MIT
 
 use alloc::boxed::Box;
-use core::alloc::Allocator;
 use core::ops::Bound::*;
 
 use fallible_collections::btree::BTreeMap;
@@ -70,8 +69,7 @@ lazy_static! {
             //   allocated with slabmalloc (maybe we can have a no_drop variant
             //   of PageTable?)
             PageTable {
-                pml4: Box::into_pin(Box::from_raw(pml4_table)),
-                allocator: None,
+                pml4: Box::into_pin(Box::from_raw(pml4_table))
             }
         }
 
@@ -201,10 +199,10 @@ impl Drop for VSpace {
 }
 
 impl VSpace {
-    pub(crate) fn new(allocator: Box<dyn Allocator + Sync + Send>) -> Result<Self, KError> {
+    pub(crate) fn new() -> Result<Self, KError> {
         Ok(VSpace {
             mappings: BTreeMap::new(),
-            page_table: PageTable::new(allocator)?,
+            page_table: PageTable::new()?,
         })
     }
 
diff --git a/kernel/src/arch/x86_64/vspace/page_table.rs b/kernel/src/arch/x86_64/vspace/page_table.rs
index 20ec9ac09..a21e8db17 100644
--- a/kernel/src/arch/x86_64/vspace/page_table.rs
+++ b/kernel/src/arch/x86_64/vspace/page_table.rs
@@ -2,7 +2,7 @@
 // SPDX-License-Identifier: Apache-2.0 OR MIT
 
 use alloc::boxed::Box;
-use core::alloc::{Allocator, Layout};
+use core::alloc::Layout;
 use core::mem::transmute;
 use core::pin::Pin;
 use core::ptr::NonNull;
@@ -33,7 +33,6 @@ enum Modify {
 /// The actual page-table. We allocate the PML4 upfront.
 pub(crate) struct PageTable {
     pub pml4: Pin<Box<PML4>>,
-    pub allocator: Option<Box<dyn Allocator + Send + Sync>>,
 }
 
 impl Drop for PageTable {
@@ -171,14 +170,13 @@ impl PageTable {
     /// Create a new address-space.
     ///
     /// Allocate an initial PML4 table for it.
-    pub(crate) fn new(allocator: Box<dyn Allocator + Send + Sync>) -> Result<PageTable, KError> {
+    pub(crate) fn new() -> Result<PageTable, KError> {
         let pml4 = Box::try_new(
             [PML4Entry::new(PAddr::from(0x0u64), PML4Flags::empty()); PAGE_SIZE_ENTRIES],
         )?;
 
         Ok(PageTable {
             pml4: Box::into_pin(pml4),
-            allocator: Some(allocator),
         })
     }
 
@@ -820,16 +818,13 @@ impl PageTable {
     }
 
     fn alloc_frame(&self) -> Frame {
-        let frame_ptr = self.allocator.as_ref().map_or_else(
-            || unsafe {
-                let ptr = alloc::alloc::alloc(PT_LAYOUT);
-                debug_assert!(!ptr.is_null());
-
-                let nptr = NonNull::new_unchecked(ptr);
-                NonNull::slice_from_raw_parts(nptr, PT_LAYOUT.size())
-            },
-            |allocator| allocator.allocate(PT_LAYOUT).unwrap(),
-        );
+        let frame_ptr = unsafe {
+            let ptr = alloc::alloc::alloc(PT_LAYOUT);
+            debug_assert!(!ptr.is_null());
+
+            let nptr = NonNull::new_unchecked(ptr);
+            NonNull::slice_from_raw_parts(nptr, PT_LAYOUT.size())
+        };
         let vaddr = VAddr::from(frame_ptr.as_ptr() as *const u8 as u64);
         let paddr = crate::arch::memory::kernel_vaddr_to_paddr(vaddr);
         let mut frame = Frame::new(paddr, PT_LAYOUT.size(), 0);
diff --git a/kernel/src/integration_tests.rs b/kernel/src/integration_tests.rs
index 4b132d461..7b93fe64a 100644
--- a/kernel/src/integration_tests.rs
+++ b/kernel/src/integration_tests.rs
@@ -422,7 +422,7 @@ fn coreboot_nrlog() {
     use core::sync::atomic::{AtomicBool, Ordering};
     use klogger::sprintln;
     use log::info;
-    use nr2::Log;
+    use node_replication::Log;
 
     let log: Arc<Log<usize>> =
         Arc::try_new(Log::<usize>::new(1024 * 1024 * 1)).expect("Can't Arc this");
diff --git a/kernel/src/nr.rs b/kernel/src/nr.rs
index b5554cc79..09de748c5 100644
--- a/kernel/src/nr.rs
+++ b/kernel/src/nr.rs
@@ -7,7 +7,7 @@ use core::fmt::Debug;
 use alloc::sync::Arc;
 use hashbrown::HashMap;
 use log::{error, trace};
-use nr2::nr::{NodeReplicated, ThreadToken, Dispatch};
+use nr2::nr::{Dispatch, NodeReplicated, ThreadToken};
 use spin::Once;
 
 #[cfg(feature = "rackscale")]
@@ -99,7 +99,6 @@ pub(crate) struct CoreInfo {
     pub entry_point: VAddr,
 }
 
-
 #[derive(Debug, Clone)]
 pub(crate) struct KernelNode {
     process_map: HashMap<Pid, ()>,
diff --git a/kernel/src/nrproc.rs b/kernel/src/nrproc.rs
index a021fc002..61e2502de 100644
--- a/kernel/src/nrproc.rs
+++ b/kernel/src/nrproc.rs
@@ -4,7 +4,6 @@
 use crate::prelude::*;
 use alloc::sync::Arc;
 use alloc::vec::Vec;
-use core::alloc::Allocator;
 use core::mem::MaybeUninit;
 use fallible_collections::FallibleVecGlobal;
 
@@ -12,11 +11,12 @@ use arrayvec::ArrayVec;
 use fallible_collections::vec::FallibleVec;
 use kpi::process::{FrameId, ProcessInfo};
 use kpi::MemType;
-use node_replication::{Dispatch, Replica, ReplicaToken};
+use nr2::nr::{Dispatch, NodeReplicated, ThreadToken};
 use spin::Once;
 
 use crate::arch::process::PROCESS_TABLE;
 use crate::arch::MAX_NUMA_NODES;
+use crate::arch::Module;
 use crate::error::{KError, KResult};
 use crate::memory::vspace::{AddressSpace, MapAction, TlbFlushHandle};
 use crate::memory::{Frame, PAddr, VAddr};
@@ -26,7 +26,7 @@ use crate::process::{
 
 /// The tokens per core to access the process replicas.
 #[thread_local]
-pub(crate) static PROCESS_TOKEN: Once<ArrayVec<ReplicaToken, { MAX_PROCESSES }>> = Once::new();
+pub(crate) static PROCESS_TOKEN: Once<ArrayVec<ThreadToken, { MAX_PROCESSES }>> = Once::new();
 
 /// Initializes `PROCESS_TOKEN`.
 ///
@@ -38,9 +38,9 @@ pub(crate) fn register_thread_with_process_replicas() {
     PROCESS_TOKEN.call_once(|| {
         let mut tokens = ArrayVec::new();
         for pid in 0..MAX_PROCESSES {
-            debug_assert!(PROCESS_TABLE[node].len() > pid, "Invalid PID");
+            debug_assert!(PROCESS_TABLE.len() > pid, "Invalid PID");
 
-            let token = PROCESS_TABLE[node][pid].register();
+            let token = PROCESS_TABLE[pid].register(node);
             tokens.push(token.expect("Need to be able to register"));
         }
 
@@ -115,11 +115,7 @@ pub(crate) enum ProcessResult<E: Executor> {
 
 /// Advances the replica of all the processes on the current NUMA node.
 pub(crate) fn advance_all() {
-    let node = *crate::environment::NODE_ID;
-
-    for pid in 0..MAX_PROCESSES {
-        PROCESS_TABLE[node][pid].sync(PROCESS_TOKEN.get().unwrap()[pid]);
-    }
+    unreachable!("shouldn't be called anymore dynrep");
 }
 
 pub(crate) trait ProcessManager {
@@ -128,25 +124,20 @@ pub(crate) trait ProcessManager {
     #[allow(clippy::type_complexity)] // fix this once `associated_type_defaults` works
     fn process_table(
         &self,
-    ) -> &'static ArrayVec<
-        ArrayVec<Arc<Replica<'static, NrProcess<Self::Process>>>, MAX_PROCESSES>,
-        MAX_NUMA_NODES,
-    >;
+    ) -> &'static ArrayVec<Arc<NodeReplicated<NrProcess<Self::Process>>>, MAX_PROCESSES>;
 }
 
 /// A node-replicated process.
-pub(crate) struct NrProcess<P: Process, M: Allocator + Clone = alloc::alloc::Global> {
+#[derive(Clone)]
+pub(crate) struct NrProcess<P: Process> {
     /// A list of all cores where the current process is running.
-    active_cores: Vec<(kpi::system::GlobalThreadId, Eid), M>,
+    active_cores: Vec<(kpi::system::GlobalThreadId, Eid)>,
     /// The process struct itself.
     process: Box<P>,
 }
 
 impl<P: Process> NrProcess<P> {
-    pub(crate) fn new(
-        process: Box<P>,
-        _allocator: Box<dyn Allocator + Send + Sync>,
-    ) -> NrProcess<P> {
+    pub(crate) fn new(process: Box<P>) -> NrProcess<P> {
         NrProcess {
             active_cores: Vec::new(),
             process,
@@ -161,10 +152,7 @@ impl<P: Process> NrProcess<P> {
         writeable_sections: Vec<Frame>,
     ) -> Result<(), KError> {
         debug_assert!(pid < MAX_PROCESSES, "Invalid PID");
-
-        let node = *crate::environment::NODE_ID;
-
-        let response = PROCESS_TABLE[node][pid].execute_mut(
+        let response = PROCESS_TABLE[pid].execute_mut(
             ProcessOpMut::Load(pid, module_name, writeable_sections),
             PROCESS_TOKEN.get().unwrap()[pid],
         );
@@ -178,10 +166,7 @@ impl<P: Process> NrProcess<P> {
     pub(crate) fn resolve(pid: Pid, base: VAddr) -> Result<(u64, u64), KError> {
         debug_assert!(pid < MAX_PROCESSES, "Invalid PID");
         debug_assert!(base.as_u64() < kpi::KERNEL_BASE, "Invalid base");
-
-        let node = *crate::environment::NODE_ID;
-
-        let response = PROCESS_TABLE[node][pid].execute(
+        let response = PROCESS_TABLE[pid].execute(
             ProcessOp::MemResolve(base),
             PROCESS_TOKEN.get().unwrap()[pid],
         );
@@ -194,10 +179,7 @@ impl<P: Process> NrProcess<P> {
 
     pub(crate) fn synchronize(pid: Pid) {
         debug_assert!(pid < MAX_PROCESSES, "Invalid PID");
-
-        let node = *crate::environment::NODE_ID;
-
-        PROCESS_TABLE[node][pid].sync(PROCESS_TOKEN.get().unwrap()[pid]);
+        PROCESS_TABLE[pid].sync(PROCESS_TOKEN.get().unwrap()[pid]);
     }
 
     pub(crate) fn map_device_frame(
@@ -206,10 +188,7 @@ impl<P: Process> NrProcess<P> {
         action: MapAction,
     ) -> Result<(u64, u64), KError> {
         debug_assert!(pid < MAX_PROCESSES, "Invalid PID");
-
-        let node = *crate::environment::NODE_ID;
-
-        let response = PROCESS_TABLE[node][pid].execute_mut(
+        let response = PROCESS_TABLE[pid].execute_mut(
             ProcessOpMut::MemMapDevice(frame, action),
             PROCESS_TOKEN.get().unwrap()[pid],
         );
@@ -222,10 +201,7 @@ impl<P: Process> NrProcess<P> {
 
     pub(crate) fn unmap(pid: Pid, base: VAddr) -> Result<Vec<TlbFlushHandle>, KError> {
         debug_assert!(pid < MAX_PROCESSES, "Invalid PID");
-
-        let node = *crate::environment::NODE_ID;
-
-        let response = PROCESS_TABLE[node][pid].execute_mut(
+        let response = PROCESS_TABLE[pid].execute_mut(
             ProcessOpMut::MemUnmap(base),
             PROCESS_TOKEN.get().unwrap()[pid],
         );
@@ -243,11 +219,8 @@ impl<P: Process> NrProcess<P> {
         action: MapAction,
     ) -> Result<(PAddr, usize), KError> {
         debug_assert!(pid < MAX_PROCESSES, "Invalid PID");
-
-        let node = *crate::environment::NODE_ID;
         //action.multiple_mappings(true);
-
-        let response = PROCESS_TABLE[node][pid].execute_mut(
+        let response = PROCESS_TABLE[pid].execute_mut(
             ProcessOpMut::MemMapFrameId(base, frame_id, action),
             PROCESS_TOKEN.get().unwrap()[pid],
         );
@@ -265,12 +238,9 @@ impl<P: Process> NrProcess<P> {
         action: MapAction,
     ) -> Result<(u64, u64), KError> {
         debug_assert!(pid < MAX_PROCESSES, "Invalid PID");
-
-        let node = *crate::environment::NODE_ID;
-
         let mut virtual_offset = 0;
         for frame in frames {
-            let response = PROCESS_TABLE[node][pid].execute_mut(
+            let response = PROCESS_TABLE[pid].execute_mut(
                 ProcessOpMut::MemMapFrame(base + virtual_offset, frame, action),
                 PROCESS_TOKEN.get().unwrap()[pid],
             );
@@ -293,11 +263,8 @@ impl<P: Process> NrProcess<P> {
 
     pub(crate) fn pinfo(pid: Pid) -> Result<ProcessInfo, KError> {
         debug_assert!(pid < MAX_PROCESSES, "Invalid PID");
-
-        let node = *crate::environment::NODE_ID;
-
-        let response = PROCESS_TABLE[node][pid]
-            .execute(ProcessOp::ProcessInfo, PROCESS_TOKEN.get().unwrap()[pid]);
+        let response =
+            PROCESS_TABLE[pid].execute(ProcessOp::ProcessInfo, PROCESS_TOKEN.get().unwrap()[pid]);
         match response {
             Ok(ProcessResult::ProcessInfo(pinfo)) => Ok(pinfo),
             Err(e) => Err(e),
@@ -313,7 +280,7 @@ impl<P: Process> NrProcess<P> {
         let gtid = *crate::environment::CORE_ID;
         let node = *crate::environment::NODE_ID;
 
-        let response = pm.process_table()[node][pid].execute_mut(
+        let response = pm.process_table()[pid].execute_mut(
             ProcessOpMut::AssignExecutor(gtid, node),
             PROCESS_TOKEN.get().unwrap()[pid],
         );
@@ -344,10 +311,7 @@ impl<P: Process> NrProcess<P> {
 
     pub(crate) fn allocate_frame_to_process(pid: Pid, frame: Frame) -> Result<FrameId, KError> {
         debug_assert!(pid < MAX_PROCESSES, "Invalid PID");
-
-        let node = *crate::environment::NODE_ID;
-
-        let response = PROCESS_TABLE[node][pid].execute_mut(
+        let response = PROCESS_TABLE[pid].execute_mut(
             ProcessOpMut::AllocateFrameToProcess(frame),
             PROCESS_TOKEN.get().unwrap()[pid],
         );
@@ -361,10 +325,7 @@ impl<P: Process> NrProcess<P> {
     pub(crate) fn release_frame_from_process(pid: Pid, fid: FrameId) -> Result<Frame, KError> {
         debug_assert!(pid < MAX_PROCESSES, "Invalid PID");
         debug_assert!(fid < MAX_FRAMES_PER_PROCESS, "Invalid FID");
-
-        let node = *crate::environment::NODE_ID;
-
-        let response = PROCESS_TABLE[node][pid].execute_mut(
+        let response = PROCESS_TABLE[pid].execute_mut(
             ProcessOpMut::ReleaseFrameFromProcess(fid),
             PROCESS_TOKEN.get().unwrap()[pid],
         );
@@ -377,13 +338,10 @@ impl<P: Process> NrProcess<P> {
 
     pub(crate) fn allocate_dispatchers(pid: Pid, frame: Frame) -> Result<usize, KError> {
         debug_assert!(pid < MAX_PROCESSES, "Invalid PID");
-
-        let node = *crate::environment::NODE_ID;
-
         #[cfg(feature = "rackscale")]
         let mid = *crate::environment::MACHINE_ID;
 
-        let response = PROCESS_TABLE[node][pid].execute_mut(
+        let response = PROCESS_TABLE[pid].execute_mut(
             #[cfg(not(feature = "rackscale"))]
             ProcessOpMut::DispatcherAllocation(frame),
             #[cfg(feature = "rackscale")]
@@ -399,9 +357,7 @@ impl<P: Process> NrProcess<P> {
     }
 
     pub(crate) fn userslice_to_arc_slice(from: UserSlice) -> Result<Arc<[u8]>, KError> {
-        let node = *crate::environment::NODE_ID;
-
-        let response = PROCESS_TABLE[node][from.pid].execute(
+        let response = PROCESS_TABLE[from.pid].execute(
             ProcessOp::ReadSlice(from),
             PROCESS_TOKEN.get().unwrap()[from.pid],
         );
@@ -413,9 +369,7 @@ impl<P: Process> NrProcess<P> {
     }
 
     pub(crate) fn read_string_from_userspace(from: UserSlice) -> Result<String, KError> {
-        let node = *crate::environment::NODE_ID;
-
-        let response = PROCESS_TABLE[node][from.pid].execute(
+        let response = PROCESS_TABLE[from.pid].execute(
             ProcessOp::ReadString(from),
             PROCESS_TOKEN.get().unwrap()[from.pid],
         );
@@ -427,10 +381,9 @@ impl<P: Process> NrProcess<P> {
     }
 
     pub(crate) fn write_to_userspace(to: &mut UserSlice, kbuf: &[u8]) -> Result<(), KError> {
-        let node = *crate::environment::NODE_ID;
         let pid = to.pid;
 
-        let response = PROCESS_TABLE[node][pid].execute(
+        let response = PROCESS_TABLE[pid].execute(
             ProcessOp::WriteSlice(to, kbuf),
             PROCESS_TOKEN.get().unwrap()[pid],
         );
@@ -446,9 +399,7 @@ impl<P: Process> NrProcess<P> {
         on: UserSlice,
         f: Box<dyn Fn(&mut [u8]) -> KResult<(u64, u64)>>,
     ) -> Result<(u64, u64), KError> {
-        let node = *crate::environment::NODE_ID;
-
-        let response = PROCESS_TABLE[node][on.pid].execute(
+        let response = PROCESS_TABLE[on.pid].execute(
             ProcessOp::ExecSliceMut(on, f),
             PROCESS_TOKEN.get().unwrap()[on.pid],
         );
@@ -463,9 +414,7 @@ impl<P: Process> NrProcess<P> {
         on: &'a UserSlice,
         f: Box<dyn Fn(&'a [u8]) -> KResult<()>>,
     ) -> Result<(), KError> {
-        let node = *crate::environment::NODE_ID;
-
-        let response = PROCESS_TABLE[node][on.pid].execute(
+        let response = PROCESS_TABLE[on.pid].execute(
             ProcessOp::ExecSlice(on, f),
             PROCESS_TOKEN.get().unwrap()[on.pid],
         );
@@ -477,11 +426,10 @@ impl<P: Process> NrProcess<P> {
     }
 }
 
-impl<P, M> Dispatch for NrProcess<P, M>
+impl<P> Dispatch for NrProcess<P>
 where
     P: Process,
     P::E: Copy,
-    M: Allocator + Clone,
 {
     type ReadOperation<'buf> = ProcessOp<'buf>;
     type WriteOperation = ProcessOpMut;
diff --git a/kernel/src/process.rs b/kernel/src/process.rs
index 5d19bd798..77ad93dd9 100644
--- a/kernel/src/process.rs
+++ b/kernel/src/process.rs
@@ -58,7 +58,7 @@ pub(crate) const MAX_FRAMES_PER_PROCESS: usize = MAX_CORES;
 pub(crate) const MAX_WRITEABLE_SECTIONS_PER_PROCESS: usize = 4;
 
 /// Abstract definition of a process.
-pub(crate) trait Process: FrameManagement {
+pub(crate) trait Process: FrameManagement + Clone {
     type E: Executor + Copy + Sync + Send + Debug + PartialEq;
     type A: AddressSpace;
 

From 04baf3105b4f571bb9e1783a886484a0b6de22be Mon Sep 17 00:00:00 2001
From: Gerd Zellweger <mail@gerdzellweger.com>
Date: Sun, 24 Sep 2023 21:11:02 -0700
Subject: [PATCH 04/32] Make tests work with new node-replication code.

Signed-off-by: Gerd Zellweger <mail@gerdzellweger.com>
---
 kernel/src/arch/x86_64/process.rs             | 37 +++++++++++++++++++
 .../src/arch/x86_64/rackscale/controller.rs   |  3 ++
 .../x86_64/rackscale/get_shmem_structure.rs   |  2 +
 kernel/src/arch/x86_64/vspace/page_table.rs   |  2 +-
 kernel/src/arch/x86_64/vspace/test.rs         |  2 +-
 kernel/src/nrproc.rs                          |  5 ---
 kernel/src/scheduler/mod.rs                   |  1 -
 kernel/tests/s04_user_runtime_tests.rs        |  1 +
 kernel/tests/s10_benchmarks.rs                |  8 ++--
 lib/node-replication2                         |  2 +-
 usr/init/src/vmops/mod.rs                     |  2 +-
 11 files changed, 51 insertions(+), 14 deletions(-)

diff --git a/kernel/src/arch/x86_64/process.rs b/kernel/src/arch/x86_64/process.rs
index b830712ea..4c0074e7d 100644
--- a/kernel/src/arch/x86_64/process.rs
+++ b/kernel/src/arch/x86_64/process.rs
@@ -106,6 +106,43 @@ lazy_static! {
             return process_logs;
         }
 
+        // TODO(dynrep): here we create the Log on the controller for sending it
+        // to the data-kernels this would probably need to create a
+        // NodeReplicated<DataKernel> NodeReplicated<Process> instance
+
+        // NodeReplicated::new(#data-kernels) ->
+        //  - for data_kernel in 0..#data-kernels {
+        //      - change affinity to data_kernel
+        //      - Box::new(bla) [allocator will go go to DCM if necessary]
+        //      - change affinity back to controller
+        //  }
+
+        /*
+         == Controller:
+            |afc: AffinityChange| {
+            let pcm = kcb::per_core_mem();
+            match afc {
+                AffinityChange::Replica(r: MachineId) => {
+                    // We want to allocate the logs in controller shared memory
+                    use crate::memory::shmem_affinity::local_shmem_affinity;
+                    let pcm = per_core_mem();
+                    pcm.set_mem_affinity(mid_to_shmem_affinity(r)).expect("Can't change affinity");
+                }
+                AffinityChange::Revert(orig) => {
+                    // We want to allocate the logs in controller shared memory
+                    use crate::memory::shmem_affinity::local_shmem_affinity;
+                    let pcm = per_core_mem();
+                    - pcm.set_mem_affinity(local_shmem_affinity()).expect("Can't change affinity");
+                    OR 
+                    - pcm.set_mem_affinity(orig).expect("Can't change affinity");
+                }
+            }
+            return 0; // TODO(dynrep): Return error code
+
+         == Data kernel
+            - The closure when set on controller probably won't work in data-kernel (diff symbol addresses?)
+            - The binary might be fine because it's identical!
+        */
         let process_logs = {
             let mut process_logs = Box::try_new(ArrayVec::new()).expect("Can't initialize process log vector.");
             for _pid in 0..MAX_PROCESSES {
diff --git a/kernel/src/arch/x86_64/rackscale/controller.rs b/kernel/src/arch/x86_64/rackscale/controller.rs
index 23339f0ef..20def97b5 100644
--- a/kernel/src/arch/x86_64/rackscale/controller.rs
+++ b/kernel/src/arch/x86_64/rackscale/controller.rs
@@ -79,6 +79,9 @@ pub(crate) fn run() {
     {
     }
 
+    // TODO(dynrep): here is the point where we have all the memory regions and
+    // could create NodeReplicated instances for each client.
+
     #[cfg(feature = "test-controller-shmem-alloc")]
     {
         if mid == 1 {
diff --git a/kernel/src/arch/x86_64/rackscale/get_shmem_structure.rs b/kernel/src/arch/x86_64/rackscale/get_shmem_structure.rs
index 42aefaed5..f2d8e4a77 100644
--- a/kernel/src/arch/x86_64/rackscale/get_shmem_structure.rs
+++ b/kernel/src/arch/x86_64/rackscale/get_shmem_structure.rs
@@ -29,6 +29,8 @@ use crate::process::MAX_PROCESSES;
 #[derive(Debug, Eq, PartialEq, PartialOrd, Clone, Copy)]
 #[repr(u8)]
 pub enum ShmemStructure {
+    // TODO(dynrep): remove NrProcLogs/NrLog add NodeReplicated<Process> and
+    // NodeReplicated<KernelNode> instead that gets sent from controller
     NrProcLogs = 0,
     NrLog = 1,
     WorkQueues = 2,
diff --git a/kernel/src/arch/x86_64/vspace/page_table.rs b/kernel/src/arch/x86_64/vspace/page_table.rs
index a21e8db17..5ed05cc36 100644
--- a/kernel/src/arch/x86_64/vspace/page_table.rs
+++ b/kernel/src/arch/x86_64/vspace/page_table.rs
@@ -719,7 +719,7 @@ impl PageTable {
                 return Err(KError::AlreadyMapped { base: vbase });
             } else {
                 panic!(
-                    "An existing mapping already covers the 2 MiB range we're trying to map in?"
+                    "An existing mapping already covers the 2 MiB range we're trying to map in? {vbase}"
                 );
             }
         }
diff --git a/kernel/src/arch/x86_64/vspace/test.rs b/kernel/src/arch/x86_64/vspace/test.rs
index ae357dfc0..242117a8a 100644
--- a/kernel/src/arch/x86_64/vspace/test.rs
+++ b/kernel/src/arch/x86_64/vspace/test.rs
@@ -95,7 +95,7 @@ proptest! {
         use TestAction::*;
         use crate::memory::detmem::DA;
 
-        let mut totest = VSpace::new(Box::new(DA::new().expect("Unable to create DA"))).expect("Unable to create vspace");;
+        let mut totest = VSpace::new().expect("Unable to create vspace");;
         let mut model: ModelAddressSpace = Default::default();
 
         for action in ops {
diff --git a/kernel/src/nrproc.rs b/kernel/src/nrproc.rs
index 61e2502de..d573dc2cf 100644
--- a/kernel/src/nrproc.rs
+++ b/kernel/src/nrproc.rs
@@ -113,11 +113,6 @@ pub(crate) enum ProcessResult<E: Executor> {
     ReadString(String),
 }
 
-/// Advances the replica of all the processes on the current NUMA node.
-pub(crate) fn advance_all() {
-    unreachable!("shouldn't be called anymore dynrep");
-}
-
 pub(crate) trait ProcessManager {
     type Process: Process + Sync;
 
diff --git a/kernel/src/scheduler/mod.rs b/kernel/src/scheduler/mod.rs
index cd40ec90a..e55a57893 100644
--- a/kernel/src/scheduler/mod.rs
+++ b/kernel/src/scheduler/mod.rs
@@ -67,7 +67,6 @@ pub(crate) fn schedule() -> ! {
                             // There is no process but we're the "main" thread,
                             // aggressively try and advance the replica
                             let start = rawtime::Instant::now();
-                            crate::nrproc::advance_all();
                             crate::arch::advance_fs_replica();
 
                             if start.elapsed().as_millis() < 1 {
diff --git a/kernel/tests/s04_user_runtime_tests.rs b/kernel/tests/s04_user_runtime_tests.rs
index ba0dc4bdd..e8d749895 100644
--- a/kernel/tests/s04_user_runtime_tests.rs
+++ b/kernel/tests/s04_user_runtime_tests.rs
@@ -31,6 +31,7 @@ fn s04_userspace_multicore() {
         .user_feature("test-scheduler-smp")
         .build();
     let cmdline = RunnerArgs::new_with_build("userspace-smp", &build)
+        .nodes(num_cores / 16)
         .cores(num_cores)
         .memory(4096)
         .timeout(120_000);
diff --git a/kernel/tests/s10_benchmarks.rs b/kernel/tests/s10_benchmarks.rs
index f590ace52..4bff3512d 100644
--- a/kernel/tests/s10_benchmarks.rs
+++ b/kernel/tests/s10_benchmarks.rs
@@ -487,13 +487,13 @@ fn s10_fxmark_benchmark() {
                 if cfg!(feature = "smoke") {
                     cmdline = cmdline.memory(8192);
                 } else {
-                    cmdline = cmdline.memory(core::cmp::max(73728, cores * 2048));
+                    cmdline = cmdline.memory(core::cmp::max(87728, cores * 2048));
                 }
 
                 if cfg!(feature = "smoke") && cores > 2 {
-                    cmdline = cmdline.nodes(2);
+                    cmdline = cmdline.nodes(std::cmp::max(machine.max_cores() / 16, 1));
                 } else {
-                    cmdline = cmdline.nodes(machine.max_numa_nodes());
+                    cmdline = cmdline.nodes(std::cmp::max(machine.max_cores() / 16, machine.max_numa_nodes()));
                 }
 
                 let mut output = String::new();
@@ -834,7 +834,7 @@ fn s10_leveldb_benchmark() {
 }
 
 #[test]
-fn s10_memcached_benchmark_internal() {
+fn s10_xmemcached_benchmark_internal() {
     setup_network(1);
 
     let machine = Machine::determine();
diff --git a/lib/node-replication2 b/lib/node-replication2
index f0a91fdca..23822c47c 160000
--- a/lib/node-replication2
+++ b/lib/node-replication2
@@ -1 +1 @@
-Subproject commit f0a91fdca08adfdd5ad8d11c2e78d7b2d852e512
+Subproject commit 23822c47c8be92eee9f440c93e6a836061793967
diff --git a/usr/init/src/vmops/mod.rs b/usr/init/src/vmops/mod.rs
index 4ad3f4fb6..c802a23ad 100644
--- a/usr/init/src/vmops/mod.rs
+++ b/usr/init/src/vmops/mod.rs
@@ -35,7 +35,7 @@ fn maponly_bencher(cores: usize) {
 
     // see process.rs the heap split up by core from slots 1..128, so we start from there
     let vspace_offset = lineup::tls2::Environment::tid().0 + 1;
-    let mut base: u64 = (128 * PML4_SLOT_SIZE + (PML4_SLOT_SIZE * vspace_offset)) as u64;
+    let mut base: u64 = (220 * PML4_SLOT_SIZE + (PML4_SLOT_SIZE * vspace_offset)) as u64;
     info!("start mapping at {:#x}", base);
 
     #[cfg(feature = "latency")]

From 6353729d9856060a55ae03c06754453b3cd6d4b2 Mon Sep 17 00:00:00 2001
From: Gerd Zellweger <mail@gerdzellweger.com>
Date: Sun, 24 Sep 2023 21:16:46 -0700
Subject: [PATCH 05/32] Update submodule.

Signed-off-by: Gerd Zellweger <mail@gerdzellweger.com>
---
 .gitmodules | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.gitmodules b/.gitmodules
index f18158aec..c550518e3 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -2,3 +2,7 @@
 	path = lib/node-replication
 	branch = mut_scan_vec
 	url = git@github.com:gz/node-replication.git
+[submodule "lib/node-replication2"]
+	path = lib/node-replication2
+	url = git@github.com:gz/node-replication.git
+	branch = nr-dymanic-replication

From d5664111c80d84eb17f719f15ff9f843be6c0960 Mon Sep 17 00:00:00 2001
From: Gerd Zellweger <mail@gerdzellweger.com>
Date: Mon, 25 Sep 2023 01:48:54 -0700
Subject: [PATCH 06/32] Compilable rack-scale code.

Signed-off-by: Gerd Zellweger <mail@gerdzellweger.com>
---
 kernel/src/arch/x86_64/mod.rs                 |  39 +++----
 kernel/src/arch/x86_64/process.rs             | 105 ++++++++++--------
 .../x86_64/rackscale/get_shmem_structure.rs   |  17 ++-
 kernel/src/memory/shmemalloc.rs               |   1 +
 kernel/src/nr.rs                              |  56 +++++-----
 5 files changed, 116 insertions(+), 102 deletions(-)

diff --git a/kernel/src/arch/x86_64/mod.rs b/kernel/src/arch/x86_64/mod.rs
index 7e4a7e4ae..2839519a5 100644
--- a/kernel/src/arch/x86_64/mod.rs
+++ b/kernel/src/arch/x86_64/mod.rs
@@ -21,18 +21,14 @@
 
 use alloc::sync::Arc;
 use core::mem::transmute;
-use core::num::NonZeroUsize;
 use core::sync::atomic::AtomicBool;
 use core::sync::atomic::Ordering;
 
-#[cfg(feature = "rackscale")]
-use crate::nr::NR_LOG;
 pub use bootloader_shared::*;
 use cnr::Replica as MlnrReplica;
 use fallible_collections::TryClone;
 use klogger::sprint;
 use log::{debug, error, info};
-use nr2::nr::{AffinityChange, NodeReplicated};
 use x86::{controlregs, cpuid};
 
 use crate::cmdline::CommandLineArguments;
@@ -40,7 +36,6 @@ use crate::fs::cnrfs::MlnrKernelNode;
 use crate::memory::global::GlobalMemory;
 use crate::memory::mcache;
 use crate::memory::per_core::PerCoreMemory;
-use crate::nr::KernelNode;
 use crate::ExitReason;
 
 use coreboot::AppCoreArgs;
@@ -449,12 +444,16 @@ fn _start(argc: isize, _argv: *const *const u8) -> isize {
     // Set-up interrupt routing drivers (I/O APIC controllers)
     irq::ioapic_initialize();
 
-    // Let's go with one replica per NUMA node for now:
-    let numa_nodes = core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes());
-    let numa_nodes = NonZeroUsize::new(numa_nodes).expect("At least one NUMA node");
-
     #[cfg(not(feature = "rackscale"))]
     let kernel_node = {
+        use core::num::NonZeroUsize;
+        use crate::nr::KernelNode;
+        use nr2::nr::{AffinityChange, NodeReplicated};
+
+        // Let's go with one replica per NUMA node for now:
+        let numa_nodes = core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes());
+        let numa_nodes = NonZeroUsize::new(numa_nodes).expect("At least one NUMA node");
+
         // Create the global operation log and first replica and store it (needs
         // TLS)
         let kernel_node: Arc<NodeReplicated<KernelNode>> = Arc::try_new(
@@ -516,7 +515,12 @@ fn _start(argc: isize, _argv: *const *const u8) -> isize {
 
     // Initialize processes
     #[cfg(feature = "rackscale")]
-    lazy_static::initialize(&process::PROCESS_LOGS);
+    if crate::CMDLINE
+        .get()
+        .map_or(false, |c| c.mode == crate::cmdline::Mode::Controller)
+    {
+        lazy_static::initialize(&process::PROCESS_TABLE);
+    }
 
     #[cfg(not(feature = "rackscale"))]
     {
@@ -525,7 +529,7 @@ fn _start(argc: isize, _argv: *const *const u8) -> isize {
     }
 
     #[cfg(feature = "rackscale")]
-    let (log, bsp_replica) = {
+    let kernel_node = {
         if crate::CMDLINE
             .get()
             .map_or(false, |c| c.mode == crate::cmdline::Mode::Client)
@@ -534,15 +538,12 @@ fn _start(argc: isize, _argv: *const *const u8) -> isize {
             crate::nrproc::register_thread_with_process_replicas();
         }
 
-        // this calls an RPC on the client, which is why we do this later in initialization than in non-rackscale
-        lazy_static::initialize(&NR_LOG);
+        lazy_static::initialize(&crate::nr::KERNEL_NODE_INSTANCE);
+        let kernel_node = crate::nr::KERNEL_NODE_INSTANCE.clone();
 
-        // For rackscale, only the controller is going to create the base log.
-        // All clients will use this to create replicas.
-        let bsp_replica = Replica::<KernelNode>::new(&NR_LOG);
-        let local_ridx = bsp_replica.register().unwrap();
-        crate::nr::NR_REPLICA.call_once(|| (bsp_replica.clone(), local_ridx));
-        (&NR_LOG.clone(), bsp_replica)
+        let local_ridx = kernel_node.register(0).unwrap();
+        crate::nr::NR_REPLICA.call_once(|| (kernel_node.clone(), local_ridx));
+        kernel_node
     };
 
     #[cfg(feature = "gdb")]
diff --git a/kernel/src/arch/x86_64/process.rs b/kernel/src/arch/x86_64/process.rs
index 4c0074e7d..b9dbd712f 100644
--- a/kernel/src/arch/x86_64/process.rs
+++ b/kernel/src/arch/x86_64/process.rs
@@ -20,9 +20,9 @@ use kpi::arch::SaveArea;
 use kpi::process::{FrameId, ELF_OFFSET, EXECUTOR_OFFSET};
 use lazy_static::lazy_static;
 use log::{debug, info, trace, warn};
-#[cfg(feature = "rackscale")]
-use node_replication::{Dispatch, Log, Replica};
-use nr2::nr::NodeReplicated;
+use crate::arch::kcb::{self, per_core_mem};
+use core::num::NonZeroUsize;
+use nr2::nr::{NodeReplicated, AffinityChange};
 use x86::bits64::paging::*;
 use x86::bits64::rflags;
 use x86::{controlregs, Ring};
@@ -72,25 +72,16 @@ pub(crate) fn current_pid() -> KResult<Pid> {
 
 #[cfg(feature = "rackscale")]
 lazy_static! {
-    pub(crate) static ref PROCESS_LOGS: Box<
-        ArrayVec<
-            Arc<Log::<'static, <NrProcess<Ring3Process> as Dispatch>::WriteOperation>>,
-            MAX_PROCESSES,
-        >,
-    > = {
-
+    pub(crate) static ref PROCESS_TABLE: ArrayVec<Arc<NodeReplicated<NrProcess<Ring3Process>>>, MAX_PROCESSES> = {
+        use crate::memory::shmem_affinity::mid_to_shmem_affinity;
 
-        if crate::CMDLINE
+        if !crate::CMDLINE
             .get()
             .map_or(false, |c| c.mode == crate::cmdline::Mode::Controller)
         {
-            // We want to allocate the logs in controller shared memory
-            use crate::memory::shmem_affinity::local_shmem_affinity;
-            let pcm = per_core_mem();
-            pcm.set_mem_affinity(local_shmem_affinity()).expect("Can't change affinity");
-        } else {
             // Get location of the logs from the controller, who will have created them in shared memory
-            use crate::arch::rackscale::get_shmem_structure::{rpc_get_shmem_structure, ShmemStructure};
+            
+            /*use crate::arch::rackscale::get_shmem_structure::{rpc_get_shmem_structure, ShmemStructure};
 
             let mut log_ptrs = [0u64; MAX_PROCESSES];
             rpc_get_shmem_structure(ShmemStructure::NrProcLogs, &mut log_ptrs[..]).expect("Failed to get process log pointers");
@@ -103,12 +94,54 @@ lazy_static! {
                 };
                 process_logs.push(local_log_arc);
             }
-            return process_logs;
+            return process_logs;*/
+            unimplemented!("Need to get NodeReplicated from controller")
         }
 
-        // TODO(dynrep): here we create the Log on the controller for sending it
-        // to the data-kernels this would probably need to create a
-        // NodeReplicated<DataKernel> NodeReplicated<Process> instance
+        // We want to allocate the logs in controller shared memory
+        use crate::memory::shmem_affinity::local_shmem_affinity;
+        let pcm = per_core_mem();
+        pcm.set_mem_affinity(local_shmem_affinity()).expect("Can't change affinity");
+        
+        // Want at least one replica...
+        let num_replicas =
+            NonZeroUsize::new(core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes())).unwrap();
+        let mut processes = ArrayVec::new();
+
+        for _pid in 0..MAX_PROCESSES {
+            debug_assert_eq!(
+                *crate::environment::NODE_ID,
+                0,
+                "Expect initialization to happen on node 0."
+            );
+
+            let process: Arc<NodeReplicated<NrProcess<Ring3Process>>> = Arc::try_new(
+                NodeReplicated::new(num_replicas, |afc: AffinityChange| {
+                    let pcm = kcb::per_core_mem();
+                    match afc {
+                        AffinityChange::Replica(r) => {
+                            pcm.set_mem_affinity(mid_to_shmem_affinity(r)).expect("Can't change affinity");
+                        }
+                        AffinityChange::Revert(_orig) => {
+                            pcm.set_mem_affinity(local_shmem_affinity()).expect("Can't set affinity")
+                        }
+                    }
+                    return 0; // TODO(dynrep): Return error code
+                })
+                .expect("Not enough memory to initialize system"),
+            )
+            .expect("Not enough memory to initialize system");
+
+            processes.push(process)
+        }
+
+
+        // Reset mem allocator to use per core memory again
+        let pcm = per_core_mem();
+        pcm.set_mem_affinity(0 as atopology::NodeId).expect("Can't change affinity");
+
+        processes
+
 
         // NodeReplicated::new(#data-kernels) ->
         //  - for data_kernel in 0..#data-kernels {
@@ -143,31 +176,10 @@ lazy_static! {
             - The closure when set on controller probably won't work in data-kernel (diff symbol addresses?)
             - The binary might be fine because it's identical!
         */
-        let process_logs = {
-            let mut process_logs = Box::try_new(ArrayVec::new()).expect("Can't initialize process log vector.");
-            for _pid in 0..MAX_PROCESSES {
-                let log = Arc::try_new(
-                    Log::<<NrProcess<Ring3Process> as Dispatch>::WriteOperation>::new(LARGE_PAGE_SIZE),
-                )
-                .expect("Can't initialize process logs, out of memory.");
-                process_logs.push(log);
-            }
-            process_logs
-        };
-
-        if crate::CMDLINE
-            .get()
-            .map_or(false, |c| c.mode == crate::cmdline::Mode::Controller)
-        {
-            // Reset mem allocator to use per core memory again
-            let pcm = per_core_mem();
-            pcm.set_mem_affinity(0 as atopology::NodeId).expect("Can't change affinity");
-        }
-
-        process_logs
     };
 }
 
+#[cfg(not(feature = "rackscale"))]
 lazy_static! {
     pub(crate) static ref PROCESS_TABLE: ArrayVec<Arc<NodeReplicated<NrProcess<Ring3Process>>>, MAX_PROCESSES> =
         create_process_table();
@@ -175,10 +187,6 @@ lazy_static! {
 
 #[cfg(not(feature = "rackscale"))]
 fn create_process_table() -> ArrayVec<Arc<NodeReplicated<NrProcess<Ring3Process>>>, MAX_PROCESSES> {
-    use crate::arch::kcb;
-    use core::num::NonZeroUsize;
-    use nr2::nr::AffinityChange;
-
     // Want at least one replica...
     let num_replicas =
         NonZeroUsize::new(core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes())).unwrap();
@@ -214,6 +222,7 @@ fn create_process_table() -> ArrayVec<Arc<NodeReplicated<NrProcess<Ring3Process>
     processes
 }
 
+/*
 #[cfg(feature = "rackscale")]
 fn create_process_table(
 ) -> ArrayVec<ArrayVec<Arc<Replica<'static, NrProcess<Ring3Process>>>, MAX_PROCESSES>, MAX_NUMA_NODES>
@@ -283,7 +292,7 @@ fn create_process_table(
 
     numa_cache
 }
-
+ */
 pub(crate) struct ArchProcessManagement;
 
 impl crate::nrproc::ProcessManager for ArchProcessManagement {
diff --git a/kernel/src/arch/x86_64/rackscale/get_shmem_structure.rs b/kernel/src/arch/x86_64/rackscale/get_shmem_structure.rs
index f2d8e4a77..bf3b8f49b 100644
--- a/kernel/src/arch/x86_64/rackscale/get_shmem_structure.rs
+++ b/kernel/src/arch/x86_64/rackscale/get_shmem_structure.rs
@@ -9,19 +9,19 @@ use core2::io::Write;
 
 use atopology::NodeId;
 use crossbeam_queue::ArrayQueue;
-use nr2::nr::{Dispatch, Log};
+use nr2::nr::{Dispatch, Log, NodeReplicated};
 use rpc::rpc::*;
 
 use super::client_state::CLIENT_STATE;
 use super::kernelrpc::*;
 use crate::arch::kcb::per_core_mem;
-use crate::arch::process::{Ring3Process, PROCESS_LOGS};
+use crate::arch::process::{Ring3Process, PROCESS_TABLE};
 use crate::arch::tlb::{Shootdown, RACKSCALE_CLIENT_WORKQUEUES};
 use crate::error::{KError, KResult};
 use crate::memory::shmem_affinity::local_shmem_affinity;
 use crate::memory::vspace::TlbFlushHandle;
 use crate::memory::{kernel_vaddr_to_paddr, paddr_to_kernel_vaddr, PAddr, VAddr};
-use crate::nr::{Op, NR_LOG};
+use crate::nr::{Op, KERNEL_NODE_INSTANCE};
 use crate::nrproc::NrProcess;
 use crate::process::MAX_PROCESSES;
 
@@ -127,20 +127,17 @@ pub(crate) fn handle_get_shmem_structure(
         ShmemStructure::NrProcLogs => {
             let mut logs = [0u64; MAX_PROCESSES];
 
-            for i in 0..PROCESS_LOGS.len() {
+            for i in 0..PROCESS_TABLE.len() {
                 // Create a clone in shared memory, and get the raw representation of it
                 // The clone increments the strong counter, and the into_raw consumes this clone of the arc.
-                let client_clone = Arc::into_raw(Arc::clone(&PROCESS_LOGS[i]));
+                let client_clone = Arc::into_raw(Arc::clone(&PROCESS_TABLE[i]));
 
                 // Send the raw pointer to the client clone address. To do this, we'll convert the kernel address
                 // to a physical address, and then change it to a shmem offset by subtracting the shmem base.
                 // TODO(rackscale): try to simplify this, and below?
                 let arc_log_paddr = kernel_vaddr_to_paddr(VAddr::from_u64(
                     (*&client_clone
-                        as *const Log<
-                            'static,
-                            <NrProcess<Ring3Process> as Dispatch>::WriteOperation,
-                        >) as u64,
+                        as *const NodeReplicated<NrProcess<Ring3Process>>) as u64,
                 ));
                 logs[i] = arc_log_paddr.as_u64();
             }
@@ -150,7 +147,7 @@ pub(crate) fn handle_get_shmem_structure(
             hdr.msg_len = core::mem::size_of::<[u64; MAX_PROCESSES]>() as MsgLen;
         }
         ShmemStructure::NrLog => {
-            let log_clone = Arc::into_raw(Arc::clone(&NR_LOG));
+            let log_clone = Arc::into_raw(Arc::clone(&KERNEL_NODE_INSTANCE));
             let log_paddr =
                 kernel_vaddr_to_paddr(VAddr::from_u64((*&log_clone as *const Log<Op>) as u64))
                     .as_u64();
diff --git a/kernel/src/memory/shmemalloc.rs b/kernel/src/memory/shmemalloc.rs
index e1f39d842..2b2c13b5d 100644
--- a/kernel/src/memory/shmemalloc.rs
+++ b/kernel/src/memory/shmemalloc.rs
@@ -20,6 +20,7 @@ pub(crate) struct ShmemAlloc {
 }
 
 impl ShmemAlloc {
+    #[allow(dead_code)]
     pub(crate) fn new(affinity: NodeId) -> ShmemAlloc {
         assert!(
             is_shmem_affinity(affinity)
diff --git a/kernel/src/nr.rs b/kernel/src/nr.rs
index 09de748c5..538ab55e0 100644
--- a/kernel/src/nr.rs
+++ b/kernel/src/nr.rs
@@ -3,12 +3,14 @@
 
 use crate::prelude::*;
 use core::fmt::Debug;
+use core::num::NonZeroUsize;
 
 use alloc::sync::Arc;
 use hashbrown::HashMap;
 use log::{error, trace};
-use nr2::nr::{Dispatch, NodeReplicated, ThreadToken};
+use nr2::nr::{Dispatch, NodeReplicated, ThreadToken, AffinityChange};
 use spin::Once;
+use crate::arch::kcb;
 
 #[cfg(feature = "rackscale")]
 use lazy_static::lazy_static;
@@ -25,39 +27,43 @@ pub(crate) static NR_REPLICA: Once<(Arc<NodeReplicated<KernelNode>>, ThreadToken
 // clones to client so they can create replicas of their own.
 #[cfg(feature = "rackscale")]
 lazy_static! {
-    pub(crate) static ref NR_LOG: Arc<nr2::nr::Log<Op>> = {
+    pub(crate) static ref KERNEL_NODE_INSTANCE: Arc<NodeReplicated<KernelNode>> = {
+        use crate::memory::shmem_affinity::mid_to_shmem_affinity;
+        use crate::memory::shmem_affinity::local_shmem_affinity;
+
         if crate::CMDLINE
             .get()
             .map_or(false, |c| c.mode == crate::cmdline::Mode::Controller)
         {
-            use nr2::nr::Log;
-            use crate::arch::kcb::per_core_mem;
-            use crate::memory::{LARGE_PAGE_SIZE, shmem_affinity::local_shmem_affinity};
-
-            let pcm = per_core_mem();
-            pcm.set_mem_affinity(local_shmem_affinity())
-                .expect("Can't change affinity");
-
-            let log = Arc::try_new(Log::<Op>::new(LARGE_PAGE_SIZE)).expect("Not enough memory to initialize system");
-
-            // Reset mem allocator to use per core memory again
-            let pcm = per_core_mem();
-            pcm.set_mem_affinity(0 as atopology::NodeId)
-                .expect("Can't change affinity");
-
-            log
+            // Want at least one replica...
+            let num_replicas =
+                NonZeroUsize::new(core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes())).unwrap();
+            Arc::try_new(
+                NodeReplicated::new(num_replicas, |afc: AffinityChange| {
+                    let pcm = kcb::per_core_mem();
+                    match afc {
+                        AffinityChange::Replica(r) => {
+                            pcm.set_mem_affinity(mid_to_shmem_affinity(r)).expect("Can't change affinity");
+                        }
+                        AffinityChange::Revert(_orig) => {
+                            pcm.set_mem_affinity(local_shmem_affinity()).expect("Can't set affinity")
+                        }
+                    }
+                    return 0; // TODO(dynrep): Return error code
+                })
+                .expect("Not enough memory to initialize system"),
+            )
+            .expect("Not enough memory to initialize system")
         } else {
-            use nr2::nr::Log;
             use crate::memory::{paddr_to_kernel_vaddr, PAddr};
-
             use crate::arch::rackscale::get_shmem_structure::{rpc_get_shmem_structure, ShmemStructure};
 
             // Get location of the nr log from the controller, who will created them in shared memory
-            let mut log_ptrs = [0u64; 1];
-            rpc_get_shmem_structure(ShmemStructure::NrLog, &mut log_ptrs).expect("Failed to get nr log from controller");
-            let log_ptr = paddr_to_kernel_vaddr(PAddr::from(log_ptrs[0]));
-            let local_log_arc = unsafe { Arc::from_raw(log_ptr.as_u64() as *const Log<'static, Op>) };
-            local_log_arc
+            let mut node_replicated_ptrs = [0u64; 1];
+            rpc_get_shmem_structure(ShmemStructure::NrLog, &mut node_replicated_ptrs).expect("Failed to get nr log from controller");
+            let nr_ptr = paddr_to_kernel_vaddr(PAddr::from(node_replicated_ptrs[0]));
+            let nr_instance = unsafe { Arc::from_raw(nr_ptr.as_u64() as *const NodeReplicated<KernelNode>) };
+            nr_instance
         }
     };
 }

From 135246953d5adf5e9d298e4390b29ce26ead15eb Mon Sep 17 00:00:00 2001
From: Gerd Zellweger <mail@gerdzellweger.com>
Date: Sun, 15 Oct 2023 22:33:48 -0700
Subject: [PATCH 07/32] Add syscalls to add/remove replica.

Signed-off-by: Gerd Zellweger <mail@gerdzellweger.com>
---
 kernel/src/arch/unix/process.rs               |  1 +
 kernel/src/arch/unix/syscalls.rs              |  4 ++
 kernel/src/arch/x86_64/mod.rs                 |  1 +
 kernel/src/arch/x86_64/process.rs             | 34 +++++++-------
 .../arch/x86_64/rackscale/get_shmem_frames.rs |  3 +-
 .../x86_64/rackscale/get_shmem_structure.rs   | 18 ++++----
 kernel/src/arch/x86_64/rackscale/syscalls.rs  |  4 ++
 kernel/src/arch/x86_64/syscall.rs             | 12 +++++
 kernel/src/arch/x86_64/vspace/mod.rs          |  9 ++--
 kernel/src/arch/x86_64/vspace/page_table.rs   |  2 +-
 kernel/src/nr.rs                              | 44 +++++++++++++------
 kernel/src/nrproc.rs                          | 18 ++++++++
 kernel/src/syscalls.rs                        |  4 ++
 lib/kpi/src/lib.rs                            |  3 ++
 lib/kpi/src/syscalls/process.rs               | 18 ++++++++
 usr/init/Cargo.toml                           |  1 +
 usr/init/src/init.rs                          |  9 ++++
 17 files changed, 139 insertions(+), 46 deletions(-)

diff --git a/kernel/src/arch/unix/process.rs b/kernel/src/arch/unix/process.rs
index 2117d844f..694e89195 100644
--- a/kernel/src/arch/unix/process.rs
+++ b/kernel/src/arch/unix/process.rs
@@ -18,6 +18,7 @@ use kpi::process::FrameId;
 use lazy_static::lazy_static;
 
 use nr2::nr::{AffinityChange, Dispatch, NodeReplicated, ThreadToken};
+//use nr2::nr::rwlock::RwLock;
 
 use crate::arch::kcb::get_kcb;
 use crate::error::{KError, KResult};
diff --git a/kernel/src/arch/unix/syscalls.rs b/kernel/src/arch/unix/syscalls.rs
index b7a684300..0fee973f1 100644
--- a/kernel/src/arch/unix/syscalls.rs
+++ b/kernel/src/arch/unix/syscalls.rs
@@ -27,6 +27,10 @@ impl SystemDispatch<u64> for UnixSystemCalls {
 }
 
 impl ProcessDispatch<u64> for UnixSystemCalls {
+    fn set_replicas(&self, add: u64, rid: u64) -> KResult<(u64, u64)> {
+        todo!()
+    }
+
     fn log(&self, _buffer_arg: UserSlice) -> KResult<(u64, u64)> {
         todo!()
     }
diff --git a/kernel/src/arch/x86_64/mod.rs b/kernel/src/arch/x86_64/mod.rs
index 2839519a5..b4fdfcfcd 100644
--- a/kernel/src/arch/x86_64/mod.rs
+++ b/kernel/src/arch/x86_64/mod.rs
@@ -542,6 +542,7 @@ fn _start(argc: isize, _argv: *const *const u8) -> isize {
         let kernel_node = crate::nr::KERNEL_NODE_INSTANCE.clone();
 
         let local_ridx = kernel_node.register(0).unwrap();
+        log::info!("Kernel node replica idx is {:?}", local_ridx);
         crate::nr::NR_REPLICA.call_once(|| (kernel_node.clone(), local_ridx));
         kernel_node
     };
diff --git a/kernel/src/arch/x86_64/process.rs b/kernel/src/arch/x86_64/process.rs
index b9dbd712f..aeea07e07 100644
--- a/kernel/src/arch/x86_64/process.rs
+++ b/kernel/src/arch/x86_64/process.rs
@@ -20,7 +20,7 @@ use kpi::arch::SaveArea;
 use kpi::process::{FrameId, ELF_OFFSET, EXECUTOR_OFFSET};
 use lazy_static::lazy_static;
 use log::{debug, info, trace, warn};
-use crate::arch::kcb::{self, per_core_mem};
+use crate::arch::kcb;
 use core::num::NonZeroUsize;
 use nr2::nr::{NodeReplicated, AffinityChange};
 use x86::bits64::paging::*;
@@ -74,28 +74,28 @@ pub(crate) fn current_pid() -> KResult<Pid> {
 lazy_static! {
     pub(crate) static ref PROCESS_TABLE: ArrayVec<Arc<NodeReplicated<NrProcess<Ring3Process>>>, MAX_PROCESSES> = {
         use crate::memory::shmem_affinity::mid_to_shmem_affinity;
+        use crate::arch::kcb::per_core_mem;
 
         if !crate::CMDLINE
             .get()
             .map_or(false, |c| c.mode == crate::cmdline::Mode::Controller)
         {
-            // Get location of the logs from the controller, who will have created them in shared memory
-            
-            /*use crate::arch::rackscale::get_shmem_structure::{rpc_get_shmem_structure, ShmemStructure};
-
-            let mut log_ptrs = [0u64; MAX_PROCESSES];
-            rpc_get_shmem_structure(ShmemStructure::NrProcLogs, &mut log_ptrs[..]).expect("Failed to get process log pointers");
-            let mut process_logs = Box::new(ArrayVec::new());
-            for i in 0..log_ptrs.len() {
-                let log_ptr = paddr_to_kernel_vaddr(PAddr::from(log_ptrs[i]));
-                let local_log_arc = unsafe {
-                    Arc::from_raw(log_ptr.as_u64()
-                        as *const Log<'static, <NrProcess<Ring3Process> as Dispatch>::WriteOperation>)
+            // Get the NodeReplicated instances from the controller, 
+            // who will have created them in shared memory
+            use crate::arch::rackscale::get_shmem_structure::{rpc_get_shmem_structure, ShmemStructure};
+
+            let mut nr_ptrs = [0u64; MAX_PROCESSES];
+            rpc_get_shmem_structure(ShmemStructure::NrProcess, &mut nr_ptrs[..]).expect("Failed to get process log pointers");
+            let mut processes = ArrayVec::new();
+            for i in 0..nr_ptrs.len() {
+                let nrproc_ptr = paddr_to_kernel_vaddr(PAddr::from(nr_ptrs[i]));
+                let nr_process = unsafe {
+                    Arc::from_raw(nrproc_ptr.as_u64()
+                        as *const NodeReplicated<NrProcess<Ring3Process>>)
                 };
-                process_logs.push(local_log_arc);
+                processes.push(nr_process);
             }
-            return process_logs;*/
-            unimplemented!("Need to get NodeReplicated from controller")
+            return processes;
         }
 
         // We want to allocate the logs in controller shared memory
@@ -1671,14 +1671,12 @@ impl FrameManagement for Ring3Process {
 #[cfg(target_os = "none")]
 pub(crate) fn spawn(binary: &'static str) -> Result<Pid, KError> {
     use crate::process::make_process;
-
     let pid = make_process::<Ring3Process>(binary)?;
 
     // Let the controller pick the initial core for the process
     #[cfg(feature = "rackscale")]
     {
         use crate::arch::rackscale::processops::request_core::rpc_request_core;
-
         let (_gtid, _) = rpc_request_core(pid, true, INVALID_EXECUTOR_START.as_u64())
             .expect("Failed to get core for newly spawned process");
     }
diff --git a/kernel/src/arch/x86_64/rackscale/get_shmem_frames.rs b/kernel/src/arch/x86_64/rackscale/get_shmem_frames.rs
index 205d9d803..2bbd1807b 100644
--- a/kernel/src/arch/x86_64/rackscale/get_shmem_frames.rs
+++ b/kernel/src/arch/x86_64/rackscale/get_shmem_frames.rs
@@ -147,9 +147,10 @@ pub(crate) fn handle_get_shmem_frames(
 
             // TODO(error_handling): should handle errors gracefully here, maybe percolate to client?
             let mut manager = &mut SHMEM_MEMSLICE_ALLOCATORS[mid - 1].lock();
-            let frame = manager
+            let mut frame = manager
                 .allocate_large_page()
                 .expect("DCM OK'd allocation, this should succeed");
+            unsafe { frame.zero() };
             assert!(frame.affinity == mid_to_shmem_affinity(mid));
             regions.push(ShmemRegion {
                 base: frame.base.as_u64(),
diff --git a/kernel/src/arch/x86_64/rackscale/get_shmem_structure.rs b/kernel/src/arch/x86_64/rackscale/get_shmem_structure.rs
index bf3b8f49b..268e99971 100644
--- a/kernel/src/arch/x86_64/rackscale/get_shmem_structure.rs
+++ b/kernel/src/arch/x86_64/rackscale/get_shmem_structure.rs
@@ -21,7 +21,7 @@ use crate::error::{KError, KResult};
 use crate::memory::shmem_affinity::local_shmem_affinity;
 use crate::memory::vspace::TlbFlushHandle;
 use crate::memory::{kernel_vaddr_to_paddr, paddr_to_kernel_vaddr, PAddr, VAddr};
-use crate::nr::{Op, KERNEL_NODE_INSTANCE};
+use crate::nr::{KERNEL_NODE_INSTANCE, KernelNode};
 use crate::nrproc::NrProcess;
 use crate::process::MAX_PROCESSES;
 
@@ -29,9 +29,7 @@ use crate::process::MAX_PROCESSES;
 #[derive(Debug, Eq, PartialEq, PartialOrd, Clone, Copy)]
 #[repr(u8)]
 pub enum ShmemStructure {
-    // TODO(dynrep): remove NrProcLogs/NrLog add NodeReplicated<Process> and
-    // NodeReplicated<KernelNode> instead that gets sent from controller
-    NrProcLogs = 0,
+    NrProcess = 0,
     NrLog = 1,
     WorkQueues = 2,
 }
@@ -50,7 +48,7 @@ pub(crate) fn rpc_get_shmem_structure(
     // Construct result buffer and call RPC
     log::debug!("Calling GetShmemStructure({:?})", shmem_structure);
     let res_size = match shmem_structure {
-        ShmemStructure::NrProcLogs => core::mem::size_of::<[u64; MAX_PROCESSES]>(),
+        ShmemStructure::NrProcess => core::mem::size_of::<[u64; MAX_PROCESSES]>(),
         _ => core::mem::size_of::<[u64; 1]>(),
     };
 
@@ -63,7 +61,7 @@ pub(crate) fn rpc_get_shmem_structure(
     unsafe { encode(&req, &mut (&mut req_data).as_mut()) }
         .expect("Failed to encode shmem structure request");
 
-    // Make buffer max size of MAX_PROCESS (for NrProcLogs), 1 (for NrLog)
+    // Make buffer max size of MAX_PROCESS (for NrProcess), 1 (for NrLog)
     let mut res_data = [0u8; core::mem::size_of::<[u64; MAX_PROCESSES]>()];
     CLIENT_STATE
         .rpc_client
@@ -76,7 +74,7 @@ pub(crate) fn rpc_get_shmem_structure(
         .unwrap();
 
     let decode_result = match shmem_structure {
-        ShmemStructure::NrProcLogs => {
+        ShmemStructure::NrProcess => {
             unsafe { decode::<[u64; MAX_PROCESSES]>(&mut res_data[..res_size]) }
                 .map(|(ret, remaining)| (&ret[..], remaining.len()))
         }
@@ -124,7 +122,7 @@ pub(crate) fn handle_get_shmem_structure(
     };
 
     match shmem_structure {
-        ShmemStructure::NrProcLogs => {
+        ShmemStructure::NrProcess => {
             let mut logs = [0u64; MAX_PROCESSES];
 
             for i in 0..PROCESS_TABLE.len() {
@@ -148,9 +146,11 @@ pub(crate) fn handle_get_shmem_structure(
         }
         ShmemStructure::NrLog => {
             let log_clone = Arc::into_raw(Arc::clone(&KERNEL_NODE_INSTANCE));
+
             let log_paddr =
-                kernel_vaddr_to_paddr(VAddr::from_u64((*&log_clone as *const Log<Op>) as u64))
+                kernel_vaddr_to_paddr(VAddr::from_u64((*&log_clone as *const NodeReplicated<KernelNode>) as u64))
                     .as_u64();
+            log::info!("nr_node addr {:?} &KERNEL_NODE_INSTANCE = {:p}", log_paddr, &KERNEL_NODE_INSTANCE);
 
             // Modify header and write into output buffer
             unsafe { encode(&[log_paddr], &mut payload) }.unwrap();
diff --git a/kernel/src/arch/x86_64/rackscale/syscalls.rs b/kernel/src/arch/x86_64/rackscale/syscalls.rs
index 9f6beec1b..7384bf327 100644
--- a/kernel/src/arch/x86_64/rackscale/syscalls.rs
+++ b/kernel/src/arch/x86_64/rackscale/syscalls.rs
@@ -283,6 +283,10 @@ impl FsDispatch<u64> for Arch86LwkSystemCall {
 }
 
 impl ProcessDispatch<u64> for Arch86LwkSystemCall {
+    fn set_replicas(&self, add: u64, rid: u64) -> KResult<(u64, u64)> {
+        self.local.set_replicas(add, rid)
+    }
+
     fn log(&self, uslice: UserSlice) -> KResult<(u64, u64)> {
         let msg: String = uslice.try_into()?;
         rpc_log(msg).map_err(|e| e.into())
diff --git a/kernel/src/arch/x86_64/syscall.rs b/kernel/src/arch/x86_64/syscall.rs
index 76bb76555..a8c225ce9 100644
--- a/kernel/src/arch/x86_64/syscall.rs
+++ b/kernel/src/arch/x86_64/syscall.rs
@@ -292,6 +292,18 @@ impl<T: Arch86ProcessDispatch> ProcessDispatch<u64> for T {
         Ok((0, 0))
     }
 
+    fn set_replicas(&self, add: u64, rid: u64) -> crate::error::KResult<(u64, u64)> {
+        let pid = current_pid()?;
+        if add > 0 {
+            NrProcess::<Ring3Process>::add_replica(pid, rid as usize).expect("add_replica");
+        }
+        else {
+            NrProcess::<Ring3Process>::remove_replica(pid, rid as usize).expect("remove_replica");
+        }
+
+        Ok((0,0))
+    }
+
     fn exit(&self, code: u64) -> Result<(u64, u64), KError> {
         debug!("Process got exit, we are done for now...");
         // TODO: For now just a dummy version that exits Qemu
diff --git a/kernel/src/arch/x86_64/vspace/mod.rs b/kernel/src/arch/x86_64/vspace/mod.rs
index 42ea4effb..325508e4f 100644
--- a/kernel/src/arch/x86_64/vspace/mod.rs
+++ b/kernel/src/arch/x86_64/vspace/mod.rs
@@ -121,7 +121,6 @@ impl AddressSpace for VSpace {
             // virtual addr should be aligned to page-size
             return Err(KError::InvalidBase);
         }
-
         let tomap_range = base.as_usize()..base.as_usize() + frame.size;
 
         // Check all mapping in that region to see if we can allow this map:
@@ -149,9 +148,8 @@ impl AddressSpace for VSpace {
                 });
             }
         }
-
-        self.mappings
-            .try_insert(base, MappingInfo::new(frame, action))?;
+        //self.mappings
+        //    .try_insert(base, MappingInfo::new(frame, action))?;
         let r = self.page_table.map_frame(base, frame, action);
         r
     }
@@ -200,6 +198,9 @@ impl Drop for VSpace {
 
 impl VSpace {
     pub(crate) fn new() -> Result<Self, KError> {
+        let mut btree = BTreeMap::new();
+        btree.try_insert(VAddr(0x0), MappingInfo::new(Frame::empty(), MapAction::none())).expect("fail");
+
         Ok(VSpace {
             mappings: BTreeMap::new(),
             page_table: PageTable::new()?,
diff --git a/kernel/src/arch/x86_64/vspace/page_table.rs b/kernel/src/arch/x86_64/vspace/page_table.rs
index 5ed05cc36..9dd2fd79f 100644
--- a/kernel/src/arch/x86_64/vspace/page_table.rs
+++ b/kernel/src/arch/x86_64/vspace/page_table.rs
@@ -819,7 +819,7 @@ impl PageTable {
 
     fn alloc_frame(&self) -> Frame {
         let frame_ptr = unsafe {
-            let ptr = alloc::alloc::alloc(PT_LAYOUT);
+            let ptr = alloc::alloc::alloc_zeroed(PT_LAYOUT);
             debug_assert!(!ptr.is_null());
 
             let nptr = NonNull::new_unchecked(ptr);
diff --git a/kernel/src/nr.rs b/kernel/src/nr.rs
index 538ab55e0..3876e62fb 100644
--- a/kernel/src/nr.rs
+++ b/kernel/src/nr.rs
@@ -1,16 +1,14 @@
 // Copyright © 2021 VMware, Inc. All Rights Reserved.
 // SPDX-License-Identifier: Apache-2.0 OR MIT
-
+#![allow(dead_code, warnings)]
 use crate::prelude::*;
 use core::fmt::Debug;
-use core::num::NonZeroUsize;
 
 use alloc::sync::Arc;
 use hashbrown::HashMap;
 use log::{error, trace};
-use nr2::nr::{Dispatch, NodeReplicated, ThreadToken, AffinityChange};
+use nr2::nr::{Dispatch, NodeReplicated, ThreadToken};
 use spin::Once;
-use crate::arch::kcb;
 
 #[cfg(feature = "rackscale")]
 use lazy_static::lazy_static;
@@ -28,8 +26,11 @@ pub(crate) static NR_REPLICA: Once<(Arc<NodeReplicated<KernelNode>>, ThreadToken
 #[cfg(feature = "rackscale")]
 lazy_static! {
     pub(crate) static ref KERNEL_NODE_INSTANCE: Arc<NodeReplicated<KernelNode>> = {
+        use core::num::NonZeroUsize;
+        use nr2::nr::AffinityChange;
         use crate::memory::shmem_affinity::mid_to_shmem_affinity;
         use crate::memory::shmem_affinity::local_shmem_affinity;
+        use crate::arch::kcb;
 
         if crate::CMDLINE
             .get()
@@ -37,10 +38,19 @@ lazy_static! {
         {
             // Want at least one replica...
             let num_replicas =
-                NonZeroUsize::new(core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes())).unwrap();
-            Arc::try_new(
+                NonZeroUsize::new(core::cmp::max(1, *crate::environment::NUM_MACHINES-1)).unwrap();
+            log::info!("NodeReplicated<KernelNode> creating with {} replicas", num_replicas);
+
+            let cur_affinity = {
+                let pcm = kcb::per_core_mem();
+                let cur_affinity = pcm.physical_memory.borrow().affinity;
+                let ret = pcm.set_mem_affinity(local_shmem_affinity()).expect("Can't set affinity");
+                cur_affinity
+            };
+
+            let nr  = Arc::try_new(
                 NodeReplicated::new(num_replicas, |afc: AffinityChange| {
-                    let pcm = kcb::per_core_mem();
+                    /*let pcm = kcb::per_core_mem();
                     match afc {
                         AffinityChange::Replica(r) => {
                             pcm.set_mem_affinity(mid_to_shmem_affinity(r)).expect("Can't change affinity");
@@ -48,12 +58,19 @@ lazy_static! {
                         AffinityChange::Revert(_orig) => {
                             pcm.set_mem_affinity(local_shmem_affinity()).expect("Can't set affinity")
                         }
-                    }
+                    }*/
                     return 0; // TODO(dynrep): Return error code
                 })
                 .expect("Not enough memory to initialize system"),
             )
-            .expect("Not enough memory to initialize system")
+            .expect("Not enough memory to initialize system");
+
+            {
+                let pcm = kcb::per_core_mem();
+                pcm.set_mem_affinity(cur_affinity).expect("Can't set affinity");
+            }
+
+            nr
         } else {
             use crate::memory::{paddr_to_kernel_vaddr, PAddr};
             use crate::arch::rackscale::get_shmem_structure::{rpc_get_shmem_structure, ShmemStructure};
@@ -113,10 +130,11 @@ pub(crate) struct KernelNode {
 
 impl Default for KernelNode {
     fn default() -> KernelNode {
-        KernelNode {
-            process_map: HashMap::new(),   // with_capacity(MAX_PROCESSES),
-            scheduler_map: HashMap::new(), // with_capacity(MAX_CORES), or, for rackscale, with_capacity(MAX_CORES * MAX_MACHINES)
-        }
+        let k = KernelNode {
+            process_map: HashMap::with_capacity(MAX_PROCESSES),   // with_capacity(MAX_PROCESSES),
+            scheduler_map: HashMap::with_capacity(24), // with_capacity(MAX_CORES), or, for rackscale, with_capacity(MAX_CORES * MAX_MACHINES)
+        };
+        k
     }
 }
 
diff --git a/kernel/src/nrproc.rs b/kernel/src/nrproc.rs
index d573dc2cf..240cb5573 100644
--- a/kernel/src/nrproc.rs
+++ b/kernel/src/nrproc.rs
@@ -141,6 +141,24 @@ impl<P: Process> NrProcess<P> {
 }
 
 impl<P: Process> NrProcess<P> {
+    pub(crate) fn add_replica(pid: Pid, rid: usize) -> Result<(), KError>{
+        debug_assert!(pid < MAX_PROCESSES, "Invalid PID");
+        let max_nodes = *crate::environment::NUM_MACHINES;
+        debug_assert!(rid < max_nodes, "Invalid Node ID");
+        log::info!("add_replica {pid} {rid}");
+        PROCESS_TABLE[pid].add_replica(rid).expect("add_replica failed");
+        Ok(())
+    }
+
+    pub(crate) fn remove_replica(pid: Pid, rid: usize) -> Result<(), KError>{
+        debug_assert!(pid < MAX_PROCESSES, "Invalid PID");
+        let max_nodes = *crate::environment::NUM_MACHINES;
+        debug_assert!(rid < max_nodes, "Invalid Node ID");
+        log::info!("remove_replica {pid} {rid}");
+        PROCESS_TABLE[pid].remove_replica(rid).expect("remove_replica failed");
+        Ok(())    
+    }
+
     pub(crate) fn load(
         pid: Pid,
         module_name: String,
diff --git a/kernel/src/syscalls.rs b/kernel/src/syscalls.rs
index 6b65fdfc0..f736a160a 100644
--- a/kernel/src/syscalls.rs
+++ b/kernel/src/syscalls.rs
@@ -113,6 +113,7 @@ pub(crate) trait ProcessDispatch<W: Into<u64> + LowerHex + Debug + Copy + Clone>
     fn release_core(&self, cord_id: W) -> KResult<(W, W)>;
     fn allocate_physical(&self, page_size: W, affinity: W) -> KResult<(W, W)>;
     fn release_physical(&self, page_id: W) -> KResult<(W, W)>;
+    fn set_replicas(&self, add: W, replicas: W) -> KResult<(W, W)>;
     fn exit(&self, code: W) -> KResult<(W, W)>;
 }
 
@@ -127,6 +128,7 @@ enum ProcessOperationArgs<W> {
     ReleaseCore(W),
     AllocatePhysical(W, W),
     ReleasePhysical(W),
+    SetReplicas(W, W)
 }
 
 impl<W: Into<u64> + LowerHex + Debug + Copy + Clone> ProcessOperationArgs<W> {
@@ -149,6 +151,7 @@ impl<W: Into<u64> + LowerHex + Debug + Copy + Clone> ProcessOperationArgs<W> {
             ProcessOperation::ReleaseCore => Ok(Self::ReleaseCore(arg2)),
             ProcessOperation::AllocatePhysical => Ok(Self::AllocatePhysical(arg2, arg3)),
             ProcessOperation::ReleasePhysical => Ok(Self::ReleasePhysical(arg2)),
+            ProcessOperation::SetReplicas => Ok(Self::SetReplicas(arg2, arg3)),
             ProcessOperation::SubscribeEvent => {
                 error!("SubscribeEvent is not implemented");
                 Err(KError::InvalidProcessOperation { a: arg1.into() })
@@ -284,6 +287,7 @@ pub(crate) trait SystemCallDispatch<W: Into<u64> + LowerHex + Debug + Copy + Clo
                 self.allocate_physical(page_size, affinity)
             }
             Poa::ReleasePhysical(frame_id) => self.release_physical(frame_id),
+            Poa::SetReplicas(add, replicas) => self.set_replicas(add, replicas),
         }
     }
 
diff --git a/lib/kpi/src/lib.rs b/lib/kpi/src/lib.rs
index a7f99343e..6ebd2000d 100644
--- a/lib/kpi/src/lib.rs
+++ b/lib/kpi/src/lib.rs
@@ -104,6 +104,8 @@ pub enum ProcessOperation {
     AllocatePhysical = 9,
     /// Release a physical memory page from the process.
     ReleasePhysical = 10,
+    /// Set Replicas
+    SetReplicas = 11
 }
 
 impl ProcessOperation {
@@ -120,6 +122,7 @@ impl ProcessOperation {
             8 => Some(Self::ReleaseCore),
             9 => Some(Self::AllocatePhysical),
             10 => Some(Self::ReleasePhysical),
+            11 => Some(Self::SetReplicas),
             _ => None,
         }
     }
diff --git a/lib/kpi/src/syscalls/process.rs b/lib/kpi/src/syscalls/process.rs
index 762f72297..4110a6500 100644
--- a/lib/kpi/src/syscalls/process.rs
+++ b/lib/kpi/src/syscalls/process.rs
@@ -14,6 +14,24 @@ use x86::bits64::paging::VAddr;
 pub struct Process;
 
 impl Process {
+    pub fn set_replicas(add: bool, replica_idx: usize) -> Result<(), SystemCallError>{
+        let r = unsafe {
+            syscall!(
+                SystemCall::Process as u64,
+                ProcessOperation::SetReplicas as u64,
+                add as u64,
+                replica_idx as u64,
+                1
+            )
+        };
+
+        if r == 0 {
+            Ok(())
+        } else {
+            Err(SystemCallError::from(r))
+        }
+    }
+
     /// Request to run on `core_id` starting at `entry_point`.
     pub fn request_core(core_id: usize, entry_point: VAddr) -> Result<CoreToken, SystemCallError> {
         let (r, gtid, _eid) = unsafe {
diff --git a/usr/init/Cargo.toml b/usr/init/Cargo.toml
index 39572a20b..d97673f89 100644
--- a/usr/init/Cargo.toml
+++ b/usr/init/Cargo.toml
@@ -52,6 +52,7 @@ test-phys-alloc = []
 test-core-alloc = []
 test-rackscale-shootdown = []
 test-concurrent-shootdown = []
+test-dynamic-replication = []
 
 # Simple micro-benchmarks
 bench-vmops = []
diff --git a/usr/init/src/init.rs b/usr/init/src/init.rs
index bf70e1454..2b3a2669e 100644
--- a/usr/init/src/init.rs
+++ b/usr/init/src/init.rs
@@ -987,8 +987,15 @@ pub fn upcall_test() {
     info!("upcall_test OK");
 }
 
+pub fn dynamic_replication_test() {
+    vibrio::syscalls::Process::set_replicas(false, 1).expect("Can't read vcpu control area.");
+    vibrio::syscalls::Process::set_replicas(true, 1).expect("Can't read vcpu control area.");
+    info!("dynamic_replication OK");
+}
+
 #[no_mangle]
 pub extern "C" fn _start() -> ! {
+    sys_println!("_start()");
     unsafe {
         log::set_logger(&vibrio::writer::LOGGER)
             .map(|()| log::set_max_level(Level::Debug.to_level_filter()))
@@ -1062,6 +1069,8 @@ pub extern "C" fn _start() -> ! {
 
     #[cfg(feature = "test-core-alloc")]
     core_alloc_test();
+    
+    dynamic_replication_test();
 
     #[cfg(feature = "test-scheduler")]
     scheduler_test();

From adf90842b1e725c09b5808c0be55010633710fe3 Mon Sep 17 00:00:00 2001
From: Gerd Zellweger <mail@gerdzellweger.com>
Date: Wed, 18 Oct 2023 14:36:49 -0700
Subject: [PATCH 08/32] Wrap NR instance in rwlock.

Signed-off-by: Gerd Zellweger <mail@gerdzellweger.com>
---
 kernel/src/arch/unix/process.rs          |  9 ++---
 kernel/src/arch/x86_64/process.rs        | 25 ++++++++------
 kernel/src/environment.rs                |  3 ++
 kernel/src/nrproc.rs                     | 44 ++++++++++++------------
 kernel/testutils/src/rackscale_runner.rs |  6 ++--
 lib/node-replication2                    |  2 +-
 6 files changed, 48 insertions(+), 41 deletions(-)

diff --git a/kernel/src/arch/unix/process.rs b/kernel/src/arch/unix/process.rs
index 694e89195..206e5b00c 100644
--- a/kernel/src/arch/unix/process.rs
+++ b/kernel/src/arch/unix/process.rs
@@ -6,6 +6,7 @@ use alloc::boxed::Box;
 use alloc::sync::Arc;
 use alloc::vec::Vec;
 use bootloader_shared::Module;
+use nr2::nr::rwlock::RwLock;
 use core::alloc::Allocator;
 use core::cell::RefCell;
 use core::num::NonZeroUsize;
@@ -66,7 +67,7 @@ pub(crate) fn swap_current_executor(_current_executor: Box<UnixThread>) -> Optio
 }
 
 lazy_static! {
-    pub(crate) static ref PROCESS_TABLE: ArrayVec<Arc<NodeReplicated<NrProcess<UnixProcess>>>, MAX_PROCESSES> = {
+    pub(crate) static ref PROCESS_TABLE: ArrayVec<Arc<RwLock<NodeReplicated<NrProcess<UnixProcess>>>>, MAX_PROCESSES> = {
         debug_assert_eq!(*crate::environment::NODE_ID, 0, "Expect initialization to happen on node 0.");
         // Want at least one replica...
         let num_replicas = NonZeroUsize::new(core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes())).expect("At least one numa node");
@@ -74,9 +75,9 @@ lazy_static! {
         let mut processes = ArrayVec::new();
         for pid in 0..MAX_PROCESSES {
             processes.push(
-                Arc::try_new(NodeReplicated::<NrProcess<UnixProcess>>::new(num_replicas, |afc: AffinityChange| {
+                Arc::try_new(RwLock::new(NodeReplicated::<NrProcess<UnixProcess>>::new(num_replicas, |afc: AffinityChange| {
                     return 0; // TODO(dynrep): Return error code
-                }).expect("Not enough memory to initialize system")).expect("Not enough memory to initialize system"));
+                }).expect("Not enough memory to initialize system"))).expect("Not enough memory to initialize system"));
         }
         processes
     };
@@ -89,7 +90,7 @@ impl crate::nrproc::ProcessManager for ArchProcessManagement {
 
     fn process_table(
         &self,
-    ) -> &'static ArrayVec<Arc<NodeReplicated<NrProcess<UnixProcess>>>, MAX_PROCESSES> {
+    ) -> &'static ArrayVec<Arc<RwLock<NodeReplicated<NrProcess<UnixProcess>>>>, MAX_PROCESSES> {
         &super::process::PROCESS_TABLE
     }
 }
diff --git a/kernel/src/arch/x86_64/process.rs b/kernel/src/arch/x86_64/process.rs
index aeea07e07..1e2fc94d2 100644
--- a/kernel/src/arch/x86_64/process.rs
+++ b/kernel/src/arch/x86_64/process.rs
@@ -23,6 +23,7 @@ use log::{debug, info, trace, warn};
 use crate::arch::kcb;
 use core::num::NonZeroUsize;
 use nr2::nr::{NodeReplicated, AffinityChange};
+use nr2::nr::rwlock::RwLock;
 use x86::bits64::paging::*;
 use x86::bits64::rflags;
 use x86::{controlregs, Ring};
@@ -72,7 +73,7 @@ pub(crate) fn current_pid() -> KResult<Pid> {
 
 #[cfg(feature = "rackscale")]
 lazy_static! {
-    pub(crate) static ref PROCESS_TABLE: ArrayVec<Arc<NodeReplicated<NrProcess<Ring3Process>>>, MAX_PROCESSES> = {
+    pub(crate) static ref PROCESS_TABLE: ArrayVec<Arc<RwLock<NodeReplicated<NrProcess<Ring3Process>>>>, MAX_PROCESSES> = {
         use crate::memory::shmem_affinity::mid_to_shmem_affinity;
         use crate::arch::kcb::per_core_mem;
 
@@ -91,7 +92,7 @@ lazy_static! {
                 let nrproc_ptr = paddr_to_kernel_vaddr(PAddr::from(nr_ptrs[i]));
                 let nr_process = unsafe {
                     Arc::from_raw(nrproc_ptr.as_u64()
-                        as *const NodeReplicated<NrProcess<Ring3Process>>)
+                        as *const RwLock<NodeReplicated<NrProcess<Ring3Process>>>)
                 };
                 processes.push(nr_process);
             }
@@ -115,8 +116,8 @@ lazy_static! {
                 "Expect initialization to happen on node 0."
             );
 
-            let process: Arc<NodeReplicated<NrProcess<Ring3Process>>> = Arc::try_new(
-                NodeReplicated::new(num_replicas, |afc: AffinityChange| {
+            let process: Arc<RwLock<NodeReplicated<NrProcess<Ring3Process>>>> = Arc::try_new(
+                RwLock::new(NodeReplicated::new(num_replicas, |afc: AffinityChange| {
                     let pcm = kcb::per_core_mem();
                     match afc {
                         AffinityChange::Replica(r) => {
@@ -129,7 +130,7 @@ lazy_static! {
                     return 0; // TODO(dynrep): Return error code
                 })
                 .expect("Not enough memory to initialize system"),
-            )
+            ))
             .expect("Not enough memory to initialize system");
 
             processes.push(process)
@@ -181,12 +182,12 @@ lazy_static! {
 
 #[cfg(not(feature = "rackscale"))]
 lazy_static! {
-    pub(crate) static ref PROCESS_TABLE: ArrayVec<Arc<NodeReplicated<NrProcess<Ring3Process>>>, MAX_PROCESSES> =
+    pub(crate) static ref PROCESS_TABLE: ArrayVec<Arc<RwLock<NodeReplicated<NrProcess<Ring3Process>>>>, MAX_PROCESSES> =
         create_process_table();
 }
 
 #[cfg(not(feature = "rackscale"))]
-fn create_process_table() -> ArrayVec<Arc<NodeReplicated<NrProcess<Ring3Process>>>, MAX_PROCESSES> {
+fn create_process_table() -> ArrayVec<Arc<RwLock<NodeReplicated<NrProcess<Ring3Process>>>>, MAX_PROCESSES> {
     // Want at least one replica...
     let num_replicas =
         NonZeroUsize::new(core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes())).unwrap();
@@ -199,7 +200,7 @@ fn create_process_table() -> ArrayVec<Arc<NodeReplicated<NrProcess<Ring3Process>
             "Expect initialization to happen on node 0."
         );
 
-        let process: Arc<NodeReplicated<NrProcess<Ring3Process>>> = Arc::try_new(
+        let process: Arc<RwLock<NodeReplicated<NrProcess<Ring3Process>>>> = Arc::try_new(RwLock::new(
             NodeReplicated::new(num_replicas, |afc: AffinityChange| {
                 let pcm = kcb::per_core_mem();
                 match afc {
@@ -212,7 +213,7 @@ fn create_process_table() -> ArrayVec<Arc<NodeReplicated<NrProcess<Ring3Process>
                 }
                 return 0; // TODO(dynrep): Return error code
             })
-            .expect("Not enough memory to initialize system"),
+            .expect("Not enough memory to initialize system")),
         )
         .expect("Not enough memory to initialize system");
 
@@ -293,14 +294,16 @@ fn create_process_table(
     numa_cache
 }
  */
-pub(crate) struct ArchProcessManagement;
+
+ pub(crate) struct ArchProcessManagement;
+
 
 impl crate::nrproc::ProcessManager for ArchProcessManagement {
     type Process = Ring3Process;
 
     fn process_table(
         &self,
-    ) -> &'static ArrayVec<Arc<NodeReplicated<NrProcess<Self::Process>>>, MAX_PROCESSES> {
+    ) -> &'static ArrayVec<Arc<RwLock<NodeReplicated<NrProcess<Self::Process>>>>, MAX_PROCESSES> {
         &*super::process::PROCESS_TABLE
     }
 }
diff --git a/kernel/src/environment.rs b/kernel/src/environment.rs
index 449be8ed4..c9ae95be0 100644
--- a/kernel/src/environment.rs
+++ b/kernel/src/environment.rs
@@ -13,6 +13,9 @@ use crate::arch::{MAX_CORES, MAX_MACHINES, MAX_NUMA_NODES};
 pub(crate) static CORE_ID: Lazy<usize> =
     Lazy::new(|| new_gtid(atopology::MACHINE_TOPOLOGY.current_thread().id, *MACHINE_ID));
 
+#[thread_local]
+pub(crate) static MT_ID: Lazy<usize> = Lazy::new(|| kpi::system::mtid_from_gtid(*crate::environment::CORE_ID));
+
 /// The NUMA node id of the current core (hardware thread).
 #[thread_local]
 pub(crate) static NODE_ID: Lazy<usize> = Lazy::new(|| {
diff --git a/kernel/src/nrproc.rs b/kernel/src/nrproc.rs
index 240cb5573..0a6260d5d 100644
--- a/kernel/src/nrproc.rs
+++ b/kernel/src/nrproc.rs
@@ -11,7 +11,7 @@ use arrayvec::ArrayVec;
 use fallible_collections::vec::FallibleVec;
 use kpi::process::{FrameId, ProcessInfo};
 use kpi::MemType;
-use nr2::nr::{Dispatch, NodeReplicated, ThreadToken};
+use nr2::nr::{Dispatch, NodeReplicated, ThreadToken, rwlock::RwLock};
 use spin::Once;
 
 use crate::arch::process::PROCESS_TABLE;
@@ -40,7 +40,7 @@ pub(crate) fn register_thread_with_process_replicas() {
         for pid in 0..MAX_PROCESSES {
             debug_assert!(PROCESS_TABLE.len() > pid, "Invalid PID");
 
-            let token = PROCESS_TABLE[pid].register(node);
+            let token = PROCESS_TABLE[pid].write(*crate::environment::MT_ID).register(node);
             tokens.push(token.expect("Need to be able to register"));
         }
 
@@ -119,7 +119,7 @@ pub(crate) trait ProcessManager {
     #[allow(clippy::type_complexity)] // fix this once `associated_type_defaults` works
     fn process_table(
         &self,
-    ) -> &'static ArrayVec<Arc<NodeReplicated<NrProcess<Self::Process>>>, MAX_PROCESSES>;
+    ) -> &ArrayVec<Arc<RwLock<NodeReplicated<NrProcess<Self::Process>>>>, MAX_PROCESSES>;
 }
 
 /// A node-replicated process.
@@ -146,7 +146,7 @@ impl<P: Process> NrProcess<P> {
         let max_nodes = *crate::environment::NUM_MACHINES;
         debug_assert!(rid < max_nodes, "Invalid Node ID");
         log::info!("add_replica {pid} {rid}");
-        PROCESS_TABLE[pid].add_replica(rid).expect("add_replica failed");
+        PROCESS_TABLE[pid].write(*crate::environment::MT_ID).add_replica(rid).expect("add_replica failed");
         Ok(())
     }
 
@@ -155,7 +155,7 @@ impl<P: Process> NrProcess<P> {
         let max_nodes = *crate::environment::NUM_MACHINES;
         debug_assert!(rid < max_nodes, "Invalid Node ID");
         log::info!("remove_replica {pid} {rid}");
-        PROCESS_TABLE[pid].remove_replica(rid).expect("remove_replica failed");
+        PROCESS_TABLE[pid].write(*crate::environment::MT_ID).remove_replica(rid).expect("remove_replica failed");
         Ok(())    
     }
 
@@ -165,7 +165,7 @@ impl<P: Process> NrProcess<P> {
         writeable_sections: Vec<Frame>,
     ) -> Result<(), KError> {
         debug_assert!(pid < MAX_PROCESSES, "Invalid PID");
-        let response = PROCESS_TABLE[pid].execute_mut(
+        let response = PROCESS_TABLE[pid].read(*crate::environment::MT_ID).execute_mut(
             ProcessOpMut::Load(pid, module_name, writeable_sections),
             PROCESS_TOKEN.get().unwrap()[pid],
         );
@@ -179,7 +179,7 @@ impl<P: Process> NrProcess<P> {
     pub(crate) fn resolve(pid: Pid, base: VAddr) -> Result<(u64, u64), KError> {
         debug_assert!(pid < MAX_PROCESSES, "Invalid PID");
         debug_assert!(base.as_u64() < kpi::KERNEL_BASE, "Invalid base");
-        let response = PROCESS_TABLE[pid].execute(
+        let response = PROCESS_TABLE[pid].read(*crate::environment::MT_ID).execute(
             ProcessOp::MemResolve(base),
             PROCESS_TOKEN.get().unwrap()[pid],
         );
@@ -192,7 +192,7 @@ impl<P: Process> NrProcess<P> {
 
     pub(crate) fn synchronize(pid: Pid) {
         debug_assert!(pid < MAX_PROCESSES, "Invalid PID");
-        PROCESS_TABLE[pid].sync(PROCESS_TOKEN.get().unwrap()[pid]);
+        PROCESS_TABLE[pid].read(*crate::environment::MT_ID).sync(PROCESS_TOKEN.get().unwrap()[pid]);
     }
 
     pub(crate) fn map_device_frame(
@@ -201,7 +201,7 @@ impl<P: Process> NrProcess<P> {
         action: MapAction,
     ) -> Result<(u64, u64), KError> {
         debug_assert!(pid < MAX_PROCESSES, "Invalid PID");
-        let response = PROCESS_TABLE[pid].execute_mut(
+        let response = PROCESS_TABLE[pid].read(*crate::environment::MT_ID).execute_mut(
             ProcessOpMut::MemMapDevice(frame, action),
             PROCESS_TOKEN.get().unwrap()[pid],
         );
@@ -214,7 +214,7 @@ impl<P: Process> NrProcess<P> {
 
     pub(crate) fn unmap(pid: Pid, base: VAddr) -> Result<Vec<TlbFlushHandle>, KError> {
         debug_assert!(pid < MAX_PROCESSES, "Invalid PID");
-        let response = PROCESS_TABLE[pid].execute_mut(
+        let response = PROCESS_TABLE[pid].read(*crate::environment::MT_ID).execute_mut(
             ProcessOpMut::MemUnmap(base),
             PROCESS_TOKEN.get().unwrap()[pid],
         );
@@ -233,7 +233,7 @@ impl<P: Process> NrProcess<P> {
     ) -> Result<(PAddr, usize), KError> {
         debug_assert!(pid < MAX_PROCESSES, "Invalid PID");
         //action.multiple_mappings(true);
-        let response = PROCESS_TABLE[pid].execute_mut(
+        let response = PROCESS_TABLE[pid].read(*crate::environment::MT_ID).execute_mut(
             ProcessOpMut::MemMapFrameId(base, frame_id, action),
             PROCESS_TOKEN.get().unwrap()[pid],
         );
@@ -253,7 +253,7 @@ impl<P: Process> NrProcess<P> {
         debug_assert!(pid < MAX_PROCESSES, "Invalid PID");
         let mut virtual_offset = 0;
         for frame in frames {
-            let response = PROCESS_TABLE[pid].execute_mut(
+            let response = PROCESS_TABLE[pid].read(*crate::environment::MT_ID).execute_mut(
                 ProcessOpMut::MemMapFrame(base + virtual_offset, frame, action),
                 PROCESS_TOKEN.get().unwrap()[pid],
             );
@@ -277,7 +277,7 @@ impl<P: Process> NrProcess<P> {
     pub(crate) fn pinfo(pid: Pid) -> Result<ProcessInfo, KError> {
         debug_assert!(pid < MAX_PROCESSES, "Invalid PID");
         let response =
-            PROCESS_TABLE[pid].execute(ProcessOp::ProcessInfo, PROCESS_TOKEN.get().unwrap()[pid]);
+            PROCESS_TABLE[pid].read(*crate::environment::MT_ID).execute(ProcessOp::ProcessInfo, PROCESS_TOKEN.get().unwrap()[pid]);
         match response {
             Ok(ProcessResult::ProcessInfo(pinfo)) => Ok(pinfo),
             Err(e) => Err(e),
@@ -293,7 +293,7 @@ impl<P: Process> NrProcess<P> {
         let gtid = *crate::environment::CORE_ID;
         let node = *crate::environment::NODE_ID;
 
-        let response = pm.process_table()[pid].execute_mut(
+        let response = pm.process_table()[pid].read(*crate::environment::MT_ID).execute_mut(
             ProcessOpMut::AssignExecutor(gtid, node),
             PROCESS_TOKEN.get().unwrap()[pid],
         );
@@ -324,7 +324,7 @@ impl<P: Process> NrProcess<P> {
 
     pub(crate) fn allocate_frame_to_process(pid: Pid, frame: Frame) -> Result<FrameId, KError> {
         debug_assert!(pid < MAX_PROCESSES, "Invalid PID");
-        let response = PROCESS_TABLE[pid].execute_mut(
+        let response = PROCESS_TABLE[pid].read(*crate::environment::MT_ID).execute_mut(
             ProcessOpMut::AllocateFrameToProcess(frame),
             PROCESS_TOKEN.get().unwrap()[pid],
         );
@@ -338,7 +338,7 @@ impl<P: Process> NrProcess<P> {
     pub(crate) fn release_frame_from_process(pid: Pid, fid: FrameId) -> Result<Frame, KError> {
         debug_assert!(pid < MAX_PROCESSES, "Invalid PID");
         debug_assert!(fid < MAX_FRAMES_PER_PROCESS, "Invalid FID");
-        let response = PROCESS_TABLE[pid].execute_mut(
+        let response = PROCESS_TABLE[pid].read(*crate::environment::MT_ID).execute_mut(
             ProcessOpMut::ReleaseFrameFromProcess(fid),
             PROCESS_TOKEN.get().unwrap()[pid],
         );
@@ -354,7 +354,7 @@ impl<P: Process> NrProcess<P> {
         #[cfg(feature = "rackscale")]
         let mid = *crate::environment::MACHINE_ID;
 
-        let response = PROCESS_TABLE[pid].execute_mut(
+        let response = PROCESS_TABLE[pid].read(*crate::environment::MT_ID).execute_mut(
             #[cfg(not(feature = "rackscale"))]
             ProcessOpMut::DispatcherAllocation(frame),
             #[cfg(feature = "rackscale")]
@@ -370,7 +370,7 @@ impl<P: Process> NrProcess<P> {
     }
 
     pub(crate) fn userslice_to_arc_slice(from: UserSlice) -> Result<Arc<[u8]>, KError> {
-        let response = PROCESS_TABLE[from.pid].execute(
+        let response = PROCESS_TABLE[from.pid].read(*crate::environment::MT_ID).execute(
             ProcessOp::ReadSlice(from),
             PROCESS_TOKEN.get().unwrap()[from.pid],
         );
@@ -382,7 +382,7 @@ impl<P: Process> NrProcess<P> {
     }
 
     pub(crate) fn read_string_from_userspace(from: UserSlice) -> Result<String, KError> {
-        let response = PROCESS_TABLE[from.pid].execute(
+        let response = PROCESS_TABLE[from.pid].read(*crate::environment::MT_ID).execute(
             ProcessOp::ReadString(from),
             PROCESS_TOKEN.get().unwrap()[from.pid],
         );
@@ -396,7 +396,7 @@ impl<P: Process> NrProcess<P> {
     pub(crate) fn write_to_userspace(to: &mut UserSlice, kbuf: &[u8]) -> Result<(), KError> {
         let pid = to.pid;
 
-        let response = PROCESS_TABLE[pid].execute(
+        let response = PROCESS_TABLE[pid].read(*crate::environment::MT_ID).execute(
             ProcessOp::WriteSlice(to, kbuf),
             PROCESS_TOKEN.get().unwrap()[pid],
         );
@@ -412,7 +412,7 @@ impl<P: Process> NrProcess<P> {
         on: UserSlice,
         f: Box<dyn Fn(&mut [u8]) -> KResult<(u64, u64)>>,
     ) -> Result<(u64, u64), KError> {
-        let response = PROCESS_TABLE[on.pid].execute(
+        let response = PROCESS_TABLE[on.pid].read(*crate::environment::MT_ID).execute(
             ProcessOp::ExecSliceMut(on, f),
             PROCESS_TOKEN.get().unwrap()[on.pid],
         );
@@ -427,7 +427,7 @@ impl<P: Process> NrProcess<P> {
         on: &'a UserSlice,
         f: Box<dyn Fn(&'a [u8]) -> KResult<()>>,
     ) -> Result<(), KError> {
-        let response = PROCESS_TABLE[on.pid].execute(
+        let response = PROCESS_TABLE[on.pid].read(*crate::environment::MT_ID).execute(
             ProcessOp::ExecSlice(on, f),
             PROCESS_TOKEN.get().unwrap()[on.pid],
         );
diff --git a/kernel/testutils/src/rackscale_runner.rs b/kernel/testutils/src/rackscale_runner.rs
index cda022e43..892443c48 100644
--- a/kernel/testutils/src/rackscale_runner.rs
+++ b/kernel/testutils/src/rackscale_runner.rs
@@ -512,8 +512,8 @@ impl<T: Clone + Send + 'static> RackscaleBench<T> {
 
         // Find max cores, max numa, and max cores per node
         let machine = Machine::determine();
-        let max_cores = if is_smoke { 2 } else { machine.max_cores() };
-        let max_numa = machine.max_numa_nodes();
+        let max_cores = if is_smoke { 8 } else { machine.max_cores() };
+        let max_numa =  if is_smoke { 4 } else { machine.max_numa_nodes() };
         let total_cores_per_node = core::cmp::max(1, max_cores / max_numa);
 
         // Do initial network configuration
@@ -524,7 +524,7 @@ impl<T: Clone + Send + 'static> RackscaleBench<T> {
             setup_network(num_clients + 1);
         }
 
-        let mut total_cores = 1;
+        let mut total_cores = 4;
         while total_cores < max_cores {
             // Round up to get the number of clients
             let new_num_clients = (total_cores + (total_cores_per_node - 1)) / total_cores_per_node;
diff --git a/lib/node-replication2 b/lib/node-replication2
index 23822c47c..23df3df94 160000
--- a/lib/node-replication2
+++ b/lib/node-replication2
@@ -1 +1 @@
-Subproject commit 23822c47c8be92eee9f440c93e6a836061793967
+Subproject commit 23df3df94d60ad64abc078c62fbbb229f7f01dff

From 5b7d359a3b96ac6ae235adda1ca9d30216d0bcb9 Mon Sep 17 00:00:00 2001
From: Gerd Zellweger <mail@gerdzellweger.com>
Date: Mon, 6 Nov 2023 00:58:41 -0800
Subject: [PATCH 09/32] Wip.

Signed-off-by: Gerd Zellweger <mail@gerdzellweger.com>
---
 kernel/src/arch/unix/vspace.rs              |   4 +
 kernel/src/arch/x86_64/irq.rs               |  51 +++++++++-
 kernel/src/arch/x86_64/process.rs           |  77 +++++----------
 kernel/src/arch/x86_64/syscall.rs           |  16 ++-
 kernel/src/arch/x86_64/tlb.rs               |  10 ++
 kernel/src/arch/x86_64/vspace/mod.rs        |   8 +-
 kernel/src/arch/x86_64/vspace/page_table.rs | 102 +++++++++++++++++++-
 kernel/src/environment.rs                   |  13 ++-
 kernel/src/fs/fd.rs                         |  10 ++
 kernel/src/memory/vspace.rs                 |   5 +-
 kernel/src/nrproc.rs                        |  90 +++++++++++------
 kernel/src/process.rs                       |  22 ++---
 kernel/tests/s04_user_runtime_tests.rs      |  24 +++++
 kernel/tests/s06_rackscale_tests.rs         |   3 +
 kernel/tests/s10_benchmarks.rs              |   2 +-
 kernel/testutils/src/rackscale_runner.rs    |   4 +-
 usr/init/src/init.rs                        |  15 ++-
 17 files changed, 346 insertions(+), 110 deletions(-)

diff --git a/kernel/src/arch/unix/vspace.rs b/kernel/src/arch/unix/vspace.rs
index 989539228..4695a3f07 100644
--- a/kernel/src/arch/unix/vspace.rs
+++ b/kernel/src/arch/unix/vspace.rs
@@ -69,6 +69,10 @@ impl VSpace {
 }
 
 impl AddressSpace for VSpace {
+    fn root(&self) -> PAddr {
+        PAddr::zero()
+    }
+    
     fn map_frame(&mut self, base: VAddr, frame: Frame, action: MapAction) -> Result<(), KError> {
         let ma = MappingInfo::new(frame, action);
         self.mappings.insert(ma.vrange(base), ma);
diff --git a/kernel/src/arch/x86_64/irq.rs b/kernel/src/arch/x86_64/irq.rs
index c332dc112..de43bc7da 100644
--- a/kernel/src/arch/x86_64/irq.rs
+++ b/kernel/src/arch/x86_64/irq.rs
@@ -34,6 +34,7 @@
 #![allow(warnings)] // TODO(fix) the unaligned accesses...
 
 use alloc::boxed::Box;
+use core::borrow::BorrowMut;
 use core::cell::{Cell, RefCell};
 use core::fmt;
 
@@ -47,7 +48,9 @@ use x86::segmentation::{
     BuildDescriptor, DescriptorBuilder, GateDescriptorBuilder, SegmentSelector,
 };
 use x86::{dtables, Ring};
+use spin::Lazy;
 
+use crate::arch::process::CURRENT_EXECUTOR;
 use crate::memory::vspace::MapAction;
 use crate::memory::Frame;
 use crate::panic::{backtrace, backtrace_from};
@@ -503,6 +506,9 @@ unsafe fn bkp_handler(a: &ExceptionArguments) {
     }
 }
 
+#[thread_local]
+pub(crate) static REPLICA_STATE: Lazy<usize> = Lazy::new(|| 0);
+
 /// Handler for the timer exception.
 ///
 /// We currently use it to periodically make sure that a replica
@@ -518,10 +524,39 @@ unsafe fn timer_handler(_a: &ExceptionArguments) {
 
     // Periodically advance replica state, then resume immediately
     nr::KernelNode::synchronize().expect("Synchronized failed?");
-    let kcb = get_kcb();
     for pid in 0..crate::process::MAX_PROCESSES {
         nrproc::NrProcess::<Ring3Process>::synchronize(pid);
     }
+    
+    if *crate::environment::MT_ID == 0 {
+        use crate::arch::process::current_pid;
+        let pid = current_pid().expect("dont have a pid?");
+
+        if rawtime::BOOT_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(10) && *REPLICA_STATE == 0 {
+            info!("got a timer after 10s, remove rid 1");
+            let handles = nrproc::NrProcess::<Ring3Process>::remove_replica(pid, 1).expect("removed");
+            #[cfg(not(feature = "rackscale"))]
+            super::tlb::shootdown(handles[0].clone());
+            unsafe { *REPLICA_STATE.as_mut_ptr() = 1 };
+        }
+        if rawtime::BOOT_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(20) && *REPLICA_STATE == 1 {
+            info!("got a timer after 20s, add rid 1");
+            let handles = nrproc::NrProcess::<Ring3Process>::add_replica(pid, 1).expect("added");
+            #[cfg(not(feature = "rackscale"))]
+            super::tlb::shootdown(handles[0].clone());
+            unsafe { *REPLICA_STATE.as_mut_ptr() = 2 };
+        }
+        if rawtime::BOOT_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(30) && *REPLICA_STATE == 2 {
+            info!("got a timer after 30s");
+            unsafe { *REPLICA_STATE.as_mut_ptr() = 3 };
+        }
+        if rawtime::BOOT_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(40) && *REPLICA_STATE == 3 {
+            info!("got a timer after 40s");
+            unsafe { *REPLICA_STATE.as_mut_ptr() = 4 };
+        }
+
+    }
+    let kcb = get_kcb();
 
     if super::process::has_executor() {
         // TODO(process-mgmt): Ensures that we still periodically
@@ -738,13 +773,20 @@ pub extern "C" fn handle_generic_exception(a: ExceptionArguments) -> ! {
             gdb_serial_handler(&a);
         } else if a.vector == TLB_WORK_PENDING.into() {
             let kcb = get_kcb();
-            trace!("got an interrupt {:?}", core_id);
+            info!("got an interrupt {:?}", core_id);
             super::tlb::dequeue(core_id);
 
             if super::process::has_executor() {
                 // Return immediately
                 TLB_TIME.update(|t| t + x86::time::rdtsc() - start);
+
+                let mut pborrow = super::process::CURRENT_EXECUTOR.borrow_mut();
+                let p = pborrow.as_ref().unwrap();
+                p.maybe_switch_vspace();
+                drop(pborrow);
+
                 kcb_iret_handle(kcb).resume()
+
             } else {
                 // Go to scheduler instead
                 crate::scheduler::schedule()
@@ -755,6 +797,11 @@ pub extern "C" fn handle_generic_exception(a: ExceptionArguments) -> ! {
 
             let kcb = get_kcb();
             if super::process::has_executor() {
+                
+                //let mut pborrow = super::process::CURRENT_EXECUTOR.borrow_mut();
+                //let p = pborrow.as_ref().unwrap();
+                //p.maybe_switch_vspace();
+
                 kcb_iret_handle(kcb).resume()
             } else {
                 loop {
diff --git a/kernel/src/arch/x86_64/process.rs b/kernel/src/arch/x86_64/process.rs
index 1e2fc94d2..456b7033d 100644
--- a/kernel/src/arch/x86_64/process.rs
+++ b/kernel/src/arch/x86_64/process.rs
@@ -1015,10 +1015,11 @@ impl Executor for Ring3Executor {
     }
 
     fn maybe_switch_vspace(&self) {
+        let replica_pml4 = NrProcess::<Ring3Process>::ptroot(self.pid).expect("Can't read pml4");
         unsafe {
             let current_pml4 = PAddr::from(controlregs::cr3());
-            if current_pml4 != self.pml4 {
-                trace!("Switching to 0x{:x}", self.pml4);
+            if current_pml4 != replica_pml4 {
+                info!("Switching from 0x{:x} to 0x{:x}", current_pml4, replica_pml4);
                 controlregs::cr3_write(self.pml4.into());
             }
         }
@@ -1076,8 +1077,7 @@ impl Default for NrProcess<Ring3Process> {
 
 impl Clone for Ring3Process {
     fn clone(&self) -> Self {
-        unimplemented!("Clone not implemented for Ring3Process")
-        /*Ring3Process {
+        Ring3Process {
             pid: self.pid,
             current_eid: self.current_eid,
             vspace: self.vspace.clone(),
@@ -1090,7 +1090,7 @@ impl Clone for Ring3Process {
             pfm: self.pfm.clone(),
             writeable_sections: self.writeable_sections.clone(),
             read_only_offset: self.read_only_offset,
-        }*/
+        }
     }
 }
 
@@ -1188,7 +1188,8 @@ impl elfloader::ElfLoader for Ring3Process {
             };
 
             info!(
-                "ELF Allocate: {:#x} -- {:#x} align to {:#x} with flags {:?} ({:?})",
+                "{}: ELF Allocate: {:#x} -- {:#x} align to {:#x} with flags {:?} ({:?})",
+                *crate::environment::MT_ID,
                 page_base,
                 page_base + size_page,
                 align_to,
@@ -1498,57 +1499,25 @@ impl Process for Ring3Process {
     fn allocate_executors(
         &mut self,
         memory: Frame,
-
-        #[cfg(feature = "rackscale")] mid: kpi::system::MachineId,
     ) -> Result<usize, KError> {
         let executor_space_requirement = Ring3Executor::EXECUTOR_SPACE_REQUIREMENT;
         let executors_to_create = memory.size() / executor_space_requirement;
 
         // Only map to kernel space for local (valid) frames
-        #[cfg(feature = "rackscale")]
-        if mid == *crate::environment::MACHINE_ID {
-            KernelAllocator::try_refill_tcache(20, 0, MemType::Mem).expect("Refill didn't work");
-            self.vspace
-                .map_frame(
-                    self.executor_offset,
-                    memory,
-                    MapAction::user() | MapAction::write(),
-                )
-                .expect("Can't map user-space executor memory.");
-            log::debug!(
-                "executor space base expanded {:#x} size: {} end {:#x}",
-                self.executor_offset,
-                memory.size(),
-                self.executor_offset + memory.size()
-            );
-        } else {
-            log::debug!(
-                "skipping executor space vspace mapping for mid={:?} on mid={:?} {:#x} size: {} end {:#x}",
-                mid,
-                *crate::environment::MACHINE_ID,
-                self.executor_offset,
-                memory.size(),
-                self.executor_offset + memory.size()
-            );
-        }
-
-        #[cfg(not(feature = "rackscale"))]
-        {
-            KernelAllocator::try_refill_tcache(20, 0, MemType::Mem).expect("Refill didn't work");
-            self.vspace
-                .map_frame(
-                    self.executor_offset,
-                    memory,
-                    MapAction::user() | MapAction::write(),
-                )
-                .expect("Can't map user-space executor memory.");
-            log::debug!(
-                "executor space base expanded {:#x} size: {} end {:#x}",
+        KernelAllocator::try_refill_tcache(20, 0, MemType::Mem).expect("Refill didn't work");
+        self.vspace
+            .map_frame(
                 self.executor_offset,
-                memory.size(),
-                self.executor_offset + memory.size()
-            );
-        }
+                memory,
+                MapAction::user() | MapAction::write(),
+            )
+            .expect("Can't map user-space executor memory.");
+        log::debug!(
+            "executor space base expanded {:#x} size: {} end {:#x}",
+            self.executor_offset,
+            memory.size(),
+            self.executor_offset + memory.size()
+        );
 
         let executor_space = executor_space_requirement * executors_to_create;
         let prange = memory.base..memory.base + executor_space;
@@ -1581,11 +1550,11 @@ impl Process for Ring3Process {
                 memory.affinity,
             ))?;
 
-            #[cfg(not(feature = "rackscale"))]
             let index = memory.affinity as usize;
 
-            #[cfg(feature = "rackscale")]
-            let index = self.get_executor_index(memory.affinity, mid);
+            //TODO: xxx
+            //#[cfg(feature = "rackscale")]
+            //let index = self.get_executor_index(memory.affinity, mid);
 
             // TODO(error-handling): Needs to properly unwind on alloc errors
             // (e.g., have something that frees vcpu mem etc. on drop())
diff --git a/kernel/src/arch/x86_64/syscall.rs b/kernel/src/arch/x86_64/syscall.rs
index a8c225ce9..f95793cd5 100644
--- a/kernel/src/arch/x86_64/syscall.rs
+++ b/kernel/src/arch/x86_64/syscall.rs
@@ -294,12 +294,20 @@ impl<T: Arch86ProcessDispatch> ProcessDispatch<u64> for T {
 
     fn set_replicas(&self, add: u64, rid: u64) -> crate::error::KResult<(u64, u64)> {
         let pid = current_pid()?;
-        if add > 0 {
-            NrProcess::<Ring3Process>::add_replica(pid, rid as usize).expect("add_replica");
+        let handles = if add > 0 {
+            NrProcess::<Ring3Process>::add_replica(pid, rid as usize).expect("add_replica")
         }
         else {
-            NrProcess::<Ring3Process>::remove_replica(pid, rid as usize).expect("remove_replica");
-        }
+            NrProcess::<Ring3Process>::remove_replica(pid, rid as usize).expect("remove_replica")
+        };
+
+        #[cfg(feature = "rackscale")]
+        super::tlb::remote_shootdown(handles);
+
+        // There will only be one handle in non-rackscale build
+        #[cfg(not(feature = "rackscale"))]
+        super::tlb::shootdown(handles[0].clone());
+
 
         Ok((0,0))
     }
diff --git a/kernel/src/arch/x86_64/tlb.rs b/kernel/src/arch/x86_64/tlb.rs
index 9fb4f3413..657ffe835 100644
--- a/kernel/src/arch/x86_64/tlb.rs
+++ b/kernel/src/arch/x86_64/tlb.rs
@@ -159,6 +159,11 @@ impl Shootdown {
         // before this function completes:
         self.acknowledge();
 
+        if self.vregion.start == 0u64 && self.vregion.end == 0u64 {
+            log::info!("got special unmap for 0..0, skipping TLB flush");
+            return;
+        }
+
         let it = self.vregion.clone().step_by(BASE_PAGE_SIZE);
         if it.count() > 20 {
             trace!("flush the entire TLB");
@@ -397,6 +402,11 @@ pub(crate) fn shootdown(handle: TlbFlushHandle) {
     let shootdown = Shootdown::new(range);
     shootdown.process();
 
+    //use crate::process::Executor;
+    //let pborrow = super::process::CURRENT_EXECUTOR.borrow_mut();
+    //let p = pborrow.as_ref().unwrap();
+    //p.maybe_switch_vspace();
+
     // Wait synchronously on cores to complete
     while !shootdowns.is_empty() {
         // Make progress on our work while we wait for others
diff --git a/kernel/src/arch/x86_64/vspace/mod.rs b/kernel/src/arch/x86_64/vspace/mod.rs
index 325508e4f..e28d1cf7c 100644
--- a/kernel/src/arch/x86_64/vspace/mod.rs
+++ b/kernel/src/arch/x86_64/vspace/mod.rs
@@ -92,7 +92,7 @@ lazy_static! {
 /// - This clearly needs a better solution. See also the part where we patch
 ///   this into the process page-table.
 pub(crate) unsafe fn init_large_objects_pml4() {
-    log::info!("init_large_objects_pml4()");
+    //log::info!("init_large_objects_pml4()");
     let mut vspace = INITIAL_VSPACE.lock();
     let frame_ptr = alloc::alloc::alloc(PT_LAYOUT);
 
@@ -103,12 +103,17 @@ pub(crate) unsafe fn init_large_objects_pml4() {
     (*vspace.pml4)[132] = PML4Entry::new(frame.base, PML4Flags::P | PML4Flags::RW);
 }
 
+#[derive(Clone)]
 pub(crate) struct VSpace {
     pub mappings: BTreeMap<VAddr, MappingInfo>,
     pub page_table: PageTable,
 }
 
 impl AddressSpace for VSpace {
+    fn root(&self) -> PAddr {
+        self.pml4_address()
+    }
+    
     fn map_frame(&mut self, base: VAddr, frame: Frame, action: MapAction) -> Result<(), KError> {
         if frame.size() == 0 {
             return Err(KError::InvalidFrame);
@@ -148,6 +153,7 @@ impl AddressSpace for VSpace {
                 });
             }
         }
+        //log::info!("VSpace::map_frame base={:x} frame={:?} action={:?}", base.as_usize(), frame, action);
         //self.mappings
         //    .try_insert(base, MappingInfo::new(frame, action))?;
         let r = self.page_table.map_frame(base, frame, action);
diff --git a/kernel/src/arch/x86_64/vspace/page_table.rs b/kernel/src/arch/x86_64/vspace/page_table.rs
index 9dd2fd79f..1429a6c02 100644
--- a/kernel/src/arch/x86_64/vspace/page_table.rs
+++ b/kernel/src/arch/x86_64/vspace/page_table.rs
@@ -35,10 +35,100 @@ pub(crate) struct PageTable {
     pub pml4: Pin<Box<PML4>>,
 }
 
+impl Clone for PageTable {
+    fn clone(&self) -> Self {
+        fn alloc_frame() -> Frame {
+            let frame_ptr = unsafe {
+                let ptr = alloc::alloc::alloc_zeroed(PT_LAYOUT);
+                debug_assert!(!ptr.is_null());
+    
+                let nptr = NonNull::new_unchecked(ptr);
+                NonNull::slice_from_raw_parts(nptr, PT_LAYOUT.size())
+            };
+            let vaddr = VAddr::from(frame_ptr.as_ptr() as *const u8 as u64);
+            let paddr = crate::arch::memory::kernel_vaddr_to_paddr(vaddr);
+            let mut frame = Frame::new(paddr, PT_LAYOUT.size(), 0);
+            unsafe { frame.zero() };
+            frame
+        }
+    
+        fn new_pt() -> PDEntry {
+            let frame = alloc_frame();
+            return PDEntry::new(frame.base, PDFlags::P | PDFlags::RW | PDFlags::US);
+        }
+    
+        fn new_pd() -> PDPTEntry {
+            let frame = alloc_frame();
+            return PDPTEntry::new(frame.base, PDPTFlags::P | PDPTFlags::RW | PDPTFlags::US);
+        }
+    
+        fn new_pdpt() -> PML4Entry {
+            let frame = alloc_frame();
+            return PML4Entry::new(frame.base, PML4Flags::P | PML4Flags::RW | PML4Flags::US);
+        }
+
+        let mut cloned_pt = PageTable::new().expect("Can't clone PT");
+
+        // Do a DFS and find all mapped entries and replicate them in the new `pt`
+        for pml4_idx in 0..PAGE_SIZE_ENTRIES {
+            if pml4_idx < pml4_index(KERNEL_BASE.into()) && self.pml4[pml4_idx].is_present() {
+                cloned_pt.pml4[pml4_idx] = new_pdpt();
+
+                for pdpt_idx in 0..PAGE_SIZE_ENTRIES {
+                    let pdpt = self.get_pdpt(self.pml4[pml4_idx]);
+                    let cloned_pdpt = cloned_pt.get_pdpt_mut(cloned_pt.pml4[pml4_idx]);
+
+                    if pdpt[pdpt_idx].is_present() {
+                        if !pdpt[pdpt_idx].is_page() {
+                            cloned_pdpt[pdpt_idx] = new_pd();
+                            let cloned_pdpt_entry = cloned_pdpt[pdpt_idx];
+                            drop(cloned_pdpt);
+
+
+                            for pd_idx in 0..PAGE_SIZE_ENTRIES {
+                                let pd = self.get_pd(pdpt[pdpt_idx]);
+                                let cloned_pd = cloned_pt.get_pd_mut(cloned_pdpt_entry);
+
+                                if pd[pd_idx].is_present() {
+                                    if !pd[pd_idx].is_page() {
+                                        cloned_pd[pd_idx] = new_pt();
+                                        let cloned_pd_entry = cloned_pd[pd_idx];
+                                        drop(cloned_pd);
+
+                                        for pt_idx in 0..PAGE_SIZE_ENTRIES {
+                                            let pt = self.get_pt(pd[pd_idx]);
+                                            let cloned_pt = cloned_pt.get_pt_mut(cloned_pd_entry);
+
+                                            if pt[pt_idx].is_present() {
+                                                cloned_pt[pt_idx] = pt[pt_idx];
+                                            }
+                                        }
+                                    }
+                                } else {
+                                    // Encountered a 2 MiB mapping
+                                    cloned_pd[pd_idx] = pd[pd_idx];
+
+                                }
+                            }
+                        } else {
+                            cloned_pdpt[pdpt_idx] = pdpt[pdpt_idx];
+                        }
+                    }
+                }
+            }
+        }
+
+        cloned_pt
+    }
+}
+
 impl Drop for PageTable {
+    #[allow(unreachable_code)]
     fn drop(&mut self) {
-        use alloc::alloc::dealloc;
+        log::info!("calling drop in PageTable, skipping for now");
+        return;
 
+        use alloc::alloc::dealloc;
         // Do a DFS and free all page-table memory allocated below kernel-base,
         // don't free the mapped frames -- we return them later through NR
         for pml4_idx in 0..PAGE_SIZE_ENTRIES {
@@ -85,6 +175,10 @@ impl Drop for PageTable {
 }
 
 impl AddressSpace for PageTable {
+    fn root(&self) -> PAddr {
+        PAddr::from(self.pml4.as_ptr() as u64)
+    }
+
     fn map_frame(&mut self, base: VAddr, frame: Frame, action: MapAction) -> Result<(), KError> {
         // These assertion are checked with error returns in `VSpace`
         debug_assert!(frame.size() > 0);
@@ -562,7 +656,7 @@ impl PageTable {
                     let cur_rights: MapAction = pt[pt_idx].flags().into();
                     if address != pbase + mapped || cur_rights != rights {
                         panic!(
-                            "Trying to map 4 KiB page but it conflicts with existing mapping {:x}",
+                            "Trying to map 4 KiB page at vbase={vbase:#x} pbase={pbase:#x} but it conflicts with existing mapping {:x}",
                             address
                         );
                     }
@@ -932,6 +1026,10 @@ impl<'a> ReadOnlyPageTable<'a> {
 }
 
 impl<'a> AddressSpace for ReadOnlyPageTable<'a> {
+    fn root(&self) -> PAddr {
+        PAddr::from(self.pml4.as_ptr() as u64)
+    }
+
     fn resolve(&self, addr: VAddr) -> Result<(PAddr, MapAction), KError> {
         let pml4_idx = pml4_index(addr);
         if self.pml4[pml4_idx].is_present() {
diff --git a/kernel/src/environment.rs b/kernel/src/environment.rs
index c9ae95be0..805f0822f 100644
--- a/kernel/src/environment.rs
+++ b/kernel/src/environment.rs
@@ -14,7 +14,18 @@ pub(crate) static CORE_ID: Lazy<usize> =
     Lazy::new(|| new_gtid(atopology::MACHINE_TOPOLOGY.current_thread().id, *MACHINE_ID));
 
 #[thread_local]
-pub(crate) static MT_ID: Lazy<usize> = Lazy::new(|| kpi::system::mtid_from_gtid(*crate::environment::CORE_ID));
+pub(crate) static MT_ID: Lazy<usize> = Lazy::new(|| {
+    #[cfg(feature = "rackscale")]
+    return (*crate::environment::MACHINE_ID - 1) * (atopology::MACHINE_TOPOLOGY.num_threads()) + atopology::MACHINE_TOPOLOGY.current_thread().id;
+    #[cfg(not(feature = "rackscale"))]
+    *CORE_ID
+});
+
+/// Number of nodes in the current deployment.
+#[allow(unused)]
+pub(crate) static NUM_NODES: Lazy<usize> =
+    Lazy::new(|| atopology::MACHINE_TOPOLOGY.num_nodes());
+
 
 /// The NUMA node id of the current core (hardware thread).
 #[thread_local]
diff --git a/kernel/src/fs/fd.rs b/kernel/src/fs/fd.rs
index d89c0016c..283a20433 100644
--- a/kernel/src/fs/fd.rs
+++ b/kernel/src/fs/fd.rs
@@ -105,6 +105,16 @@ pub(crate) struct FileDescriptorEntry {
     offset: AtomicUsize,
 }
 
+impl Clone for FileDescriptorEntry {
+    fn clone(&self) -> Self {
+        Self {
+            mnode: self.mnode,
+            flags: self.flags,
+            offset: AtomicUsize::new(self.offset()),
+        }
+    }
+}
+
 impl FileDescriptorEntry {
     pub(super) fn update(&mut self, mnode: MnodeNum, flags: FileFlags) {
         self.mnode = mnode;
diff --git a/kernel/src/memory/vspace.rs b/kernel/src/memory/vspace.rs
index 23149390d..e27610b3b 100644
--- a/kernel/src/memory/vspace.rs
+++ b/kernel/src/memory/vspace.rs
@@ -100,7 +100,7 @@ impl Iterator for CoreBitMapIter {
 }
 
 #[cfg_attr(not(target_os = "none"), allow(dead_code))]
-#[derive(Debug, PartialEq)]
+#[derive(Debug, PartialEq, Copy, Clone)]
 pub(crate) enum MappingType {
     _ElfText,
     _ElfData,
@@ -108,6 +108,7 @@ pub(crate) enum MappingType {
     Heap,
 }
 
+#[derive(PartialEq, Clone)]
 pub(crate) struct MappingInfo {
     pub frame: Frame,
     pub rights: MapAction,
@@ -141,6 +142,8 @@ impl fmt::Debug for MappingInfo {
 
 /// Generic address space functionality.
 pub(crate) trait AddressSpace {
+    fn root(&self) -> PAddr;
+
     /// Maps a list of `frames` at `base` in the address space
     /// with the access rights defined by `action`.
     fn map_frames(&mut self, base: VAddr, frames: &[(Frame, MapAction)]) -> Result<(), KError> {
diff --git a/kernel/src/nrproc.rs b/kernel/src/nrproc.rs
index 0a6260d5d..67ddf3957 100644
--- a/kernel/src/nrproc.rs
+++ b/kernel/src/nrproc.rs
@@ -32,7 +32,11 @@ pub(crate) static PROCESS_TOKEN: Once<ArrayVec<ThreadToken, { MAX_PROCESSES }>>
 ///
 /// Should be called on each core.
 pub(crate) fn register_thread_with_process_replicas() {
+    #[cfg(not(feature = "rackscale"))]
     let node = *crate::environment::NODE_ID;
+    #[cfg(feature = "rackscale")]
+    let node = 0; //*crate::environment::MACHINE_ID
+    
     debug_assert!(PROCESS_TABLE.len() > node, "Invalid Node ID");
 
     PROCESS_TOKEN.call_once(|| {
@@ -40,7 +44,8 @@ pub(crate) fn register_thread_with_process_replicas() {
         for pid in 0..MAX_PROCESSES {
             debug_assert!(PROCESS_TABLE.len() > pid, "Invalid PID");
 
-            let token = PROCESS_TABLE[pid].write(*crate::environment::MT_ID).register(node);
+            let token = PROCESS_TABLE[pid].read(*crate::environment::MT_ID).register(node);
+            log::debug!("MT_ID is {}: {node} registered {pid} {token:?}", *crate::environment::MT_ID);
             tokens.push(token.expect("Need to be able to register"));
         }
 
@@ -69,6 +74,7 @@ pub(crate) enum ProcessOp<'buf> {
     #[allow(unused)]
     ExecSliceMut(UserSlice, SliceExecMutFn<'buf>),
     ExecSlice(&'buf UserSlice, SliceExecFn<'buf>),
+    GetPtRoot,
 }
 
 /// Mutable operations on the NrProcess.
@@ -84,10 +90,6 @@ pub(crate) enum ProcessOpMut {
     /// Remove a physical frame previosuly allocated to the process (returns a Frame).
     ReleaseFrameFromProcess(FrameId),
 
-    #[cfg(feature = "rackscale")]
-    DispatcherAllocation(Frame, kpi::system::MachineId),
-
-    #[cfg(not(feature = "rackscale"))]
     DispatcherAllocation(Frame),
 
     MemMapFrame(VAddr, Frame, MapAction),
@@ -111,6 +113,7 @@ pub(crate) enum ProcessResult<E: Executor> {
     Frame(Frame),
     ReadSlice(Arc<[u8]>),
     ReadString(String),
+    PtRoot(PAddr),
 }
 
 pub(crate) trait ProcessManager {
@@ -141,22 +144,44 @@ impl<P: Process> NrProcess<P> {
 }
 
 impl<P: Process> NrProcess<P> {
-    pub(crate) fn add_replica(pid: Pid, rid: usize) -> Result<(), KError>{
+    pub(crate) fn add_replica(pid: Pid, rid: usize) -> Result<Vec<TlbFlushHandle>, KError> {
         debug_assert!(pid < MAX_PROCESSES, "Invalid PID");
+        #[cfg(feature = "rackscale")]
         let max_nodes = *crate::environment::NUM_MACHINES;
+        #[cfg(not(feature = "rackscale"))]
+        let max_nodes = *crate::environment::NUM_NODES;
+
         debug_assert!(rid < max_nodes, "Invalid Node ID");
         log::info!("add_replica {pid} {rid}");
+        // we use unmap of 0x0 to get a snapshot of where the core is running on
+        let handle = NrProcess::<P>::unmap(pid, VAddr::from(0x0));
+        if !handle.is_ok() {
+            panic!("couldn't get snapshot");
+        }
+
         PROCESS_TABLE[pid].write(*crate::environment::MT_ID).add_replica(rid).expect("add_replica failed");
-        Ok(())
+        log::info!("added_replica {pid} {rid}");
+
+        handle
     }
 
-    pub(crate) fn remove_replica(pid: Pid, rid: usize) -> Result<(), KError>{
+    pub(crate) fn remove_replica(pid: Pid, rid: usize) -> Result<Vec<TlbFlushHandle>, KError> {
         debug_assert!(pid < MAX_PROCESSES, "Invalid PID");
+        #[cfg(feature = "rackscale")]
         let max_nodes = *crate::environment::NUM_MACHINES;
-        debug_assert!(rid < max_nodes, "Invalid Node ID");
-        log::info!("remove_replica {pid} {rid}");
+        #[cfg(not(feature = "rackscale"))]
+        let max_nodes = *crate::environment::NUM_NODES;
+
+        debug_assert!(rid < max_nodes, "Invalid Node ID {rid} max_nodes {max_nodes}");
+
+        // we use unmap of 0x0 to get a snapshot of where the core is running on
+        let handle = NrProcess::<P>::unmap(pid, VAddr::from(0x0));
+        if !handle.is_ok() {
+            panic!("couldn't get snapshot");
+        }
         PROCESS_TABLE[pid].write(*crate::environment::MT_ID).remove_replica(rid).expect("remove_replica failed");
-        Ok(())    
+
+        handle
     }
 
     pub(crate) fn load(
@@ -274,6 +299,18 @@ impl<P: Process> NrProcess<P> {
         Ok((base.as_u64(), virtual_offset as u64))
     }
 
+
+    pub(crate) fn ptroot(pid: Pid) -> Result<PAddr, KError> {
+        debug_assert!(pid < MAX_PROCESSES, "Invalid PID");
+        let response =
+            PROCESS_TABLE[pid].read(*crate::environment::MT_ID).execute(ProcessOp::GetPtRoot, PROCESS_TOKEN.get().unwrap()[pid]);
+        match response {
+            Ok(ProcessResult::PtRoot(paddr)) => Ok(paddr),
+            Err(e) => Err(e),
+            _ => unreachable!("Got unexpected response"),
+        }
+    }
+
     pub(crate) fn pinfo(pid: Pid) -> Result<ProcessInfo, KError> {
         debug_assert!(pid < MAX_PROCESSES, "Invalid PID");
         let response =
@@ -351,14 +388,9 @@ impl<P: Process> NrProcess<P> {
 
     pub(crate) fn allocate_dispatchers(pid: Pid, frame: Frame) -> Result<usize, KError> {
         debug_assert!(pid < MAX_PROCESSES, "Invalid PID");
-        #[cfg(feature = "rackscale")]
-        let mid = *crate::environment::MACHINE_ID;
 
         let response = PROCESS_TABLE[pid].read(*crate::environment::MT_ID).execute_mut(
-            #[cfg(not(feature = "rackscale"))]
             ProcessOpMut::DispatcherAllocation(frame),
-            #[cfg(feature = "rackscale")]
-            ProcessOpMut::DispatcherAllocation(frame, mid),
             PROCESS_TOKEN.get().unwrap()[pid],
         );
 
@@ -450,6 +482,9 @@ where
 
     fn dispatch<'buf>(&self, op: Self::ReadOperation<'_>) -> Self::Response {
         match op {
+            ProcessOp::GetPtRoot => {
+                Ok(ProcessResult::PtRoot(self.process.vspace().root()))
+            }
             ProcessOp::ProcessInfo => Ok(ProcessResult::ProcessInfo(*self.process.pinfo())),
             ProcessOp::MemResolve(base) => {
                 let (paddr, rights) = self.process.vspace().resolve(base)?;
@@ -511,18 +546,11 @@ where
                 Ok(ProcessResult::Ok)
             }
 
-            #[cfg(not(feature = "rackscale"))]
             ProcessOpMut::DispatcherAllocation(frame) => {
                 let how_many = self.process.allocate_executors(frame)?;
                 Ok(ProcessResult::ExecutorsCreated(how_many))
             }
 
-            #[cfg(feature = "rackscale")]
-            ProcessOpMut::DispatcherAllocation(frame, mid) => {
-                let how_many = self.process.allocate_executors(frame, mid)?;
-                Ok(ProcessResult::ExecutorsCreated(how_many))
-            }
-
             ProcessOpMut::MemMapFrame(base, frame, action) => {
                 crate::memory::KernelAllocator::try_refill_tcache(7, 0, MemType::Mem)?;
                 self.process.vspace_mut().map_frame(base, frame, action)?;
@@ -545,12 +573,18 @@ where
             }
 
             ProcessOpMut::MemUnmap(vaddr) => {
-                let shootdown_handle = self.process.vspace_mut().unmap(vaddr)?;
-                if shootdown_handle.flags.is_aliasable() {
-                    self.process
-                        .remove_frame_mapping(shootdown_handle.paddr, shootdown_handle.vaddr)
-                        .expect("is_aliasable implies this op can't fail");
+                let shootdown_handle = if vaddr.as_u64() != 0x0 {
+                    let shootdown_handle = self.process.vspace_mut().unmap(vaddr)?;
+                    if shootdown_handle.flags.is_aliasable() {
+                        self.process
+                            .remove_frame_mapping(shootdown_handle.paddr, shootdown_handle.vaddr)
+                            .expect("is_aliasable implies this op can't fail");
+                    }
+                    shootdown_handle
                 }
+                else {
+                    TlbFlushHandle::new(0x0.into(), 0x0.into(), 0x0, MapAction::none())
+                };
 
                 let num_machines = *crate::environment::NUM_MACHINES;
                 let mut shootdown_handles = Vec::try_with_capacity(num_machines)
diff --git a/kernel/src/process.rs b/kernel/src/process.rs
index 77ad93dd9..e2a6ebd68 100644
--- a/kernel/src/process.rs
+++ b/kernel/src/process.rs
@@ -27,7 +27,7 @@ use crate::arch::MAX_CORES;
 use crate::cmdline::CommandLineArguments;
 use crate::error::{KError, KResult};
 use crate::fs::fd::FileDescriptorEntry;
-use crate::memory::backends::PhysicalPageProvider;
+//use crate::memory::backends::PhysicalPageProvider;
 use crate::memory::vspace::AddressSpace;
 use crate::memory::{Frame, KernelAllocator, PAddr, VAddr, KERNEL_BASE};
 use crate::prelude::overlaps;
@@ -79,16 +79,8 @@ pub(crate) trait Process: FrameManagement + Clone {
         affinity: atopology::NodeId,
     ) -> Result<(), alloc::collections::TryReserveError>;
 
-    #[cfg(not(feature = "rackscale"))]
     fn allocate_executors(&mut self, frame: Frame) -> Result<usize, KError>;
 
-    #[cfg(feature = "rackscale")]
-    fn allocate_executors(
-        &mut self,
-        frame: Frame,
-        mid: kpi::system::MachineId,
-    ) -> Result<usize, KError>;
-
     fn vspace_mut(&mut self) -> &mut Self::A;
 
     fn vspace(&self) -> &Self::A;
@@ -121,6 +113,7 @@ pub(crate) trait FrameManagement {
 }
 
 /// Implementation for managing a process' frames.
+#[derive(Clone)]
 pub(crate) struct ProcessFrames {
     /// Physical frame objects registered to the process.
     frames: ArrayVec<(Option<Frame>, usize), MAX_FRAMES_PER_PROCESS>,
@@ -589,9 +582,14 @@ pub(crate) fn allocate_dispatchers<P: Process>(pid: Pid, affinity: NodeId) -> Re
         KernelAllocator::try_refill_tcache(20, 1, MemType::Mem)?;
         let mut frame = {
             let pcm = crate::arch::kcb::per_core_mem();
-            pcm.gmanager.unwrap().node_caches[affinity]
-                .lock()
-                .allocate_large_page()?
+
+            #[cfg(feature = "rackscale")]
+            pcm.set_mem_affinity(affinity).expect("Can't change affinity");
+            let frame = pcm.mem_manager().allocate_large_page()?;
+            
+            #[cfg(feature = "rackscale")]
+            pcm.set_mem_affinity(crate::memory::shmem_affinity::local_shmem_affinity()).expect("Can't reset affinity");
+            frame
         };
 
         unsafe {
diff --git a/kernel/tests/s04_user_runtime_tests.rs b/kernel/tests/s04_user_runtime_tests.rs
index e8d749895..333aa87c9 100644
--- a/kernel/tests/s04_user_runtime_tests.rs
+++ b/kernel/tests/s04_user_runtime_tests.rs
@@ -19,6 +19,30 @@ use testutils::helpers::{
 };
 use testutils::runner_args::{check_for_successful_exit, wait_for_sigterm, RunnerArgs};
 
+/// Makes sure we can change the number of replicas for a process.
+#[cfg(not(feature = "baremetal"))]
+#[test]
+fn s04_dynamic_replication() {
+    let build = BuildArgs::default()
+        .user_feature("test-dynamic-replication")
+        .build();
+    let cmdline = RunnerArgs::new_with_build("userspace-smp", &build)
+        .nodes(4)
+        .cores(64)
+        .memory(4096)
+        .timeout(120_000);
+
+    let mut output = String::new();
+    let mut qemu_run = || -> Result<WaitStatus> {
+        let mut p = spawn_nrk(&cmdline)?;
+        output += p.exp_string("dynamic_replication OK")?.as_str();
+        output = p.exp_eof()?;
+        p.process.exit()
+    };
+
+    check_for_successful_exit(&cmdline, qemu_run(), output);
+}
+
 /// Tests the lineup scheduler multi-core ability.
 ///
 /// Makes sure we can request cores and spawn threads on said cores.
diff --git a/kernel/tests/s06_rackscale_tests.rs b/kernel/tests/s06_rackscale_tests.rs
index 0954febdb..544899d83 100644
--- a/kernel/tests/s06_rackscale_tests.rs
+++ b/kernel/tests/s06_rackscale_tests.rs
@@ -39,6 +39,7 @@ fn rackscale_userspace_smoke_test(transport: RackscaleTransport) {
             "test-upcall",
             "test-scheduler",
             "test-syscalls",
+            //"test-dynamic-replication"
         ])
         .set_rackscale(true)
         .release()
@@ -58,6 +59,7 @@ fn rackscale_userspace_smoke_test(transport: RackscaleTransport) {
         *output += proc.exp_string("map_test OK")?.as_str();
         *output += proc.exp_string("alloc_test OK")?.as_str();
         *output += proc.exp_string("scheduler_test OK")?.as_str();
+        //*output += proc.exp_string("dynamic_replication OK")?.as_str();
         Ok(())
     }
 
@@ -65,6 +67,7 @@ fn rackscale_userspace_smoke_test(transport: RackscaleTransport) {
     test_run.client_match_fn = client_match_fn;
     test_run.transport = transport;
     test_run.wait_for_client = true;
+    test_run.shmem_size *= 2;
     test_run.run_rackscale();
 }
 
diff --git a/kernel/tests/s10_benchmarks.rs b/kernel/tests/s10_benchmarks.rs
index 4bff3512d..c39001adc 100644
--- a/kernel/tests/s10_benchmarks.rs
+++ b/kernel/tests/s10_benchmarks.rs
@@ -850,7 +850,7 @@ fn s10_xmemcached_benchmark_internal() {
         // Throw out everything above 28 since we have some non-deterministic
         // bug on larger machines that leads to threads calling sched_yield and
         // no readrandom is performed...
-        .filter(|&t| t <= 28)
+        .filter(|&t| t <= 16)
         .collect();
 
     // memcached arguments // currently not there.
diff --git a/kernel/testutils/src/rackscale_runner.rs b/kernel/testutils/src/rackscale_runner.rs
index 892443c48..3b8af826c 100644
--- a/kernel/testutils/src/rackscale_runner.rs
+++ b/kernel/testutils/src/rackscale_runner.rs
@@ -413,9 +413,9 @@ impl<T: Clone + Send + 'static> RackscaleRun<T> {
         }
 
         for client_ret in client_rets {
-            client_ret.unwrap();
+            client_ret.expect("client_ret");
         }
-        controller_ret.unwrap();
+        controller_ret.expect("controller_ret");
     }
 
     pub fn run_baseline(&self) {
diff --git a/usr/init/src/init.rs b/usr/init/src/init.rs
index 2b3a2669e..17cb706fd 100644
--- a/usr/init/src/init.rs
+++ b/usr/init/src/init.rs
@@ -988,8 +988,18 @@ pub fn upcall_test() {
 }
 
 pub fn dynamic_replication_test() {
-    vibrio::syscalls::Process::set_replicas(false, 1).expect("Can't read vcpu control area.");
-    vibrio::syscalls::Process::set_replicas(true, 1).expect("Can't read vcpu control area.");
+    info!("dynamic_replication_test START");
+    
+    vibrio::syscalls::Process::set_replicas(false, 0).expect("Can't remove replica 0.");
+
+    //vibrio::syscalls::Process::set_replicas(false, 1).expect("Can't remove replica 1.");
+    //vibrio::syscalls::Process::set_replicas(false, 2).expect("Can't remove replica 2.");
+    //vibrio::syscalls::Process::set_replicas(false, 3).expect("Can't remove replica 3.");
+    //info!("dynamic_replication_test ALL REMOVED");
+    //vibrio::syscalls::Process::set_replicas(true, 3).expect("Can't add replica 3.");
+    //vibrio::syscalls::Process::set_replicas(true, 2).expect("Can't add replica 2.");
+    //vibrio::syscalls::Process::set_replicas(true, 1).expect("Can't add replica 1.");
+    info!("dynamic_replication_test ALL ADDED BACK");
     info!("dynamic_replication OK");
 }
 
@@ -1070,6 +1080,7 @@ pub extern "C" fn _start() -> ! {
     #[cfg(feature = "test-core-alloc")]
     core_alloc_test();
     
+    #[cfg(feature = "test-dynamic-replication")]
     dynamic_replication_test();
 
     #[cfg(feature = "test-scheduler")]

From b9f89b21ec80ab465aa201553f0386797df9f6fe Mon Sep 17 00:00:00 2001
From: Gerd Zellweger <mail@gerdzellweger.com>
Date: Mon, 6 Nov 2023 01:00:04 -0800
Subject: [PATCH 10/32] wiwipp

Signed-off-by: Gerd Zellweger <mail@gerdzellweger.com>
---
 lib/node-replication2 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/node-replication2 b/lib/node-replication2
index 23df3df94..b5ffc6a67 160000
--- a/lib/node-replication2
+++ b/lib/node-replication2
@@ -1 +1 @@
-Subproject commit 23df3df94d60ad64abc078c62fbbb229f7f01dff
+Subproject commit b5ffc6a676ac036ebaed6f3de1d04f4ab4d36177

From 92c79d1c06aeab5500e83c7c53ecc10773e28a3f Mon Sep 17 00:00:00 2001
From: Gerd Zellweger <mail@gerdzellweger.com>
Date: Thu, 16 Nov 2023 15:08:29 -0800
Subject: [PATCH 11/32] Wip.

Signed-off-by: Gerd Zellweger <mail@gerdzellweger.com>
---
 kernel/src/arch/x86_64/mod.rs                 |  9 ---------
 kernel/src/arch/x86_64/process.rs             | 10 ++++++----
 .../src/arch/x86_64/rackscale/controller.rs   |  3 +++
 kernel/src/memory/mod.rs                      |  5 +++++
 kernel/src/nrproc.rs                          | 20 +++++++++++++++++--
 kernel/src/process.rs                         |  3 ++-
 kernel/tests/s06_rackscale_tests.rs           |  5 +++--
 7 files changed, 37 insertions(+), 18 deletions(-)

diff --git a/kernel/src/arch/x86_64/mod.rs b/kernel/src/arch/x86_64/mod.rs
index b4fdfcfcd..dac19add9 100644
--- a/kernel/src/arch/x86_64/mod.rs
+++ b/kernel/src/arch/x86_64/mod.rs
@@ -513,15 +513,6 @@ fn _start(argc: isize, _argv: *const *const u8) -> isize {
     // Intialize PCI
     crate::pci::init();
 
-    // Initialize processes
-    #[cfg(feature = "rackscale")]
-    if crate::CMDLINE
-        .get()
-        .map_or(false, |c| c.mode == crate::cmdline::Mode::Controller)
-    {
-        lazy_static::initialize(&process::PROCESS_TABLE);
-    }
-
     #[cfg(not(feature = "rackscale"))]
     {
         lazy_static::initialize(&process::PROCESS_TABLE);
diff --git a/kernel/src/arch/x86_64/process.rs b/kernel/src/arch/x86_64/process.rs
index 456b7033d..3df58bd15 100644
--- a/kernel/src/arch/x86_64/process.rs
+++ b/kernel/src/arch/x86_64/process.rs
@@ -76,6 +76,7 @@ lazy_static! {
     pub(crate) static ref PROCESS_TABLE: ArrayVec<Arc<RwLock<NodeReplicated<NrProcess<Ring3Process>>>>, MAX_PROCESSES> = {
         use crate::memory::shmem_affinity::mid_to_shmem_affinity;
         use crate::arch::kcb::per_core_mem;
+        use crate::environment::NUM_MACHINES;
 
         if !crate::CMDLINE
             .get()
@@ -106,7 +107,7 @@ lazy_static! {
         
         // Want at least one replica...
         let num_replicas =
-            NonZeroUsize::new(core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes())).unwrap();
+            NonZeroUsize::new(core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes() * (*NUM_MACHINES))).unwrap();
         let mut processes = ArrayVec::new();
 
         for _pid in 0..MAX_PROCESSES {
@@ -1499,6 +1500,7 @@ impl Process for Ring3Process {
     fn allocate_executors(
         &mut self,
         memory: Frame,
+        #[cfg(feature = "rackscale")] mid: kpi::system::MachineId,
     ) -> Result<usize, KError> {
         let executor_space_requirement = Ring3Executor::EXECUTOR_SPACE_REQUIREMENT;
         let executors_to_create = memory.size() / executor_space_requirement;
@@ -1550,11 +1552,11 @@ impl Process for Ring3Process {
                 memory.affinity,
             ))?;
 
+            #[cfg(not(feature = "rackscale"))]
             let index = memory.affinity as usize;
 
-            //TODO: xxx
-            //#[cfg(feature = "rackscale")]
-            //let index = self.get_executor_index(memory.affinity, mid);
+            #[cfg(feature = "rackscale")]
+            let index = self.get_executor_index(memory.affinity, mid);
 
             // TODO(error-handling): Needs to properly unwind on alloc errors
             // (e.g., have something that frees vcpu mem etc. on drop())
diff --git a/kernel/src/arch/x86_64/rackscale/controller.rs b/kernel/src/arch/x86_64/rackscale/controller.rs
index 20def97b5..3989403c8 100644
--- a/kernel/src/arch/x86_64/rackscale/controller.rs
+++ b/kernel/src/arch/x86_64/rackscale/controller.rs
@@ -81,6 +81,9 @@ pub(crate) fn run() {
 
     // TODO(dynrep): here is the point where we have all the memory regions and
     // could create NodeReplicated instances for each client.
+    // Initialize processes
+    use crate::arch::process::PROCESS_TABLE;
+    lazy_static::initialize(&PROCESS_TABLE);
 
     #[cfg(feature = "test-controller-shmem-alloc")]
     {
diff --git a/kernel/src/memory/mod.rs b/kernel/src/memory/mod.rs
index ab3939bdf..ce9b67085 100644
--- a/kernel/src/memory/mod.rs
+++ b/kernel/src/memory/mod.rs
@@ -345,6 +345,10 @@ impl KernelAllocator {
         use crate::arch::rackscale::get_shmem_frames::rpc_get_shmem_frames;
         use crate::arch::rackscale::CLIENT_STATE;
 
+        if needed_base_pages == 0 && needed_large_pages == 0 {
+            return Ok(());
+        }
+
         // We only request at large page granularity
         let mut total_needed_large_pages = needed_large_pages;
         let mut total_needed_base_pages = needed_base_pages;
@@ -418,6 +422,7 @@ impl KernelAllocator {
             }
             frames
         } else {
+            log::info!("try-refill-shmem needed_base_pages={needed_base_pages} needed_large_pages={needed_large_pages} total_needed_base_pages={total_needed_base_pages} total_needed_large_pages={total_needed_large_pages}");
             rpc_get_shmem_frames(None, total_needed_large_pages)?
         };
 
diff --git a/kernel/src/nrproc.rs b/kernel/src/nrproc.rs
index 67ddf3957..8246fc217 100644
--- a/kernel/src/nrproc.rs
+++ b/kernel/src/nrproc.rs
@@ -90,8 +90,12 @@ pub(crate) enum ProcessOpMut {
     /// Remove a physical frame previosuly allocated to the process (returns a Frame).
     ReleaseFrameFromProcess(FrameId),
 
+    #[cfg(feature = "rackscale")]
+    DispatcherAllocation(Frame, kpi::system::MachineId),
+    
+    #[cfg(not(feature = "rackscale"))]
     DispatcherAllocation(Frame),
-
+    
     MemMapFrame(VAddr, Frame, MapAction),
     MemMapDevice(Frame, MapAction),
     MemMapFrameId(VAddr, FrameId, MapAction),
@@ -389,11 +393,16 @@ impl<P: Process> NrProcess<P> {
     pub(crate) fn allocate_dispatchers(pid: Pid, frame: Frame) -> Result<usize, KError> {
         debug_assert!(pid < MAX_PROCESSES, "Invalid PID");
 
+        #[cfg(feature = "rackscale")]
+        let mid = *crate::environment::MACHINE_ID;
         let response = PROCESS_TABLE[pid].read(*crate::environment::MT_ID).execute_mut(
+        #[cfg(not(feature = "rackscale"))]
             ProcessOpMut::DispatcherAllocation(frame),
+        #[cfg(feature = "rackscale")]
+            ProcessOpMut::DispatcherAllocation(frame, mid),
             PROCESS_TOKEN.get().unwrap()[pid],
         );
-
+        
         match response {
             Ok(ProcessResult::ExecutorsCreated(how_many)) => Ok(how_many),
             Err(e) => Err(e),
@@ -546,11 +555,18 @@ where
                 Ok(ProcessResult::Ok)
             }
 
+            #[cfg(not(feature = "rackscale"))]
             ProcessOpMut::DispatcherAllocation(frame) => {
                 let how_many = self.process.allocate_executors(frame)?;
                 Ok(ProcessResult::ExecutorsCreated(how_many))
             }
 
+            #[cfg(feature = "rackscale")]
+            ProcessOpMut::DispatcherAllocation(frame, mid) => {
+                let how_many = self.process.allocate_executors(frame, mid)?;
+                Ok(ProcessResult::ExecutorsCreated(how_many))
+            }
+
             ProcessOpMut::MemMapFrame(base, frame, action) => {
                 crate::memory::KernelAllocator::try_refill_tcache(7, 0, MemType::Mem)?;
                 self.process.vspace_mut().map_frame(base, frame, action)?;
diff --git a/kernel/src/process.rs b/kernel/src/process.rs
index e2a6ebd68..f3c13db20 100644
--- a/kernel/src/process.rs
+++ b/kernel/src/process.rs
@@ -79,7 +79,7 @@ pub(crate) trait Process: FrameManagement + Clone {
         affinity: atopology::NodeId,
     ) -> Result<(), alloc::collections::TryReserveError>;
 
-    fn allocate_executors(&mut self, frame: Frame) -> Result<usize, KError>;
+    fn allocate_executors(&mut self, frame: Frame, #[cfg(feature = "rackscale")] mid: kpi::system::MachineId) -> Result<usize, KError>;
 
     fn vspace_mut(&mut self) -> &mut Self::A;
 
@@ -288,6 +288,7 @@ impl elfloader::ElfLoader for DataSecAllocator {
                         }
                     };
 
+                    log::info!("DataSecAllocator::allocate");
                     let shmem_frames = rpc_get_shmem_frames(Some(self.pid), large_pages)
                         .expect("Failed to get shmem frames for elf loading");
 
diff --git a/kernel/tests/s06_rackscale_tests.rs b/kernel/tests/s06_rackscale_tests.rs
index 544899d83..25e9518af 100644
--- a/kernel/tests/s06_rackscale_tests.rs
+++ b/kernel/tests/s06_rackscale_tests.rs
@@ -39,7 +39,7 @@ fn rackscale_userspace_smoke_test(transport: RackscaleTransport) {
             "test-upcall",
             "test-scheduler",
             "test-syscalls",
-            //"test-dynamic-replication"
+            "test-dynamic-replication"
         ])
         .set_rackscale(true)
         .release()
@@ -59,7 +59,7 @@ fn rackscale_userspace_smoke_test(transport: RackscaleTransport) {
         *output += proc.exp_string("map_test OK")?.as_str();
         *output += proc.exp_string("alloc_test OK")?.as_str();
         *output += proc.exp_string("scheduler_test OK")?.as_str();
-        //*output += proc.exp_string("dynamic_replication OK")?.as_str();
+        *output += proc.exp_string("dynamic_replication OK")?.as_str();
         Ok(())
     }
 
@@ -68,6 +68,7 @@ fn rackscale_userspace_smoke_test(transport: RackscaleTransport) {
     test_run.transport = transport;
     test_run.wait_for_client = true;
     test_run.shmem_size *= 2;
+    test_run.num_clients = 3;
     test_run.run_rackscale();
 }
 

From 91811e61af9aba191d4871eb214a6144c5b905f5 Mon Sep 17 00:00:00 2001
From: Gerd Zellweger <mail@gerdzellweger.com>
Date: Sat, 18 Nov 2023 20:11:32 -0800
Subject: [PATCH 12/32] Rework KernelNode init, change affinity correctly.

Signed-off-by: Gerd Zellweger <mail@gerdzellweger.com>
---
 kernel/src/arch/unix/mod.rs                   |  27 +--
 kernel/src/arch/x86_64/coreboot.rs            |   7 -
 kernel/src/arch/x86_64/mod.rs                 |  70 +++-----
 kernel/src/arch/x86_64/process.rs             |  20 ++-
 .../src/arch/x86_64/rackscale/controller.rs   |   1 +
 kernel/src/nr.rs                              |  84 +++++++---
 kernel/src/process.rs                         |  19 +--
 kernel/src/scheduler/mod.rs                   |  98 ++++++-----
 kernel/tests/s11_rackscale_benchmarks.rs      | 155 ++++++++++++++++++
 lib/node-replication2                         |   2 +-
 10 files changed, 310 insertions(+), 173 deletions(-)

diff --git a/kernel/src/arch/unix/mod.rs b/kernel/src/arch/unix/mod.rs
index a0d2fedad..7b8c5f147 100644
--- a/kernel/src/arch/unix/mod.rs
+++ b/kernel/src/arch/unix/mod.rs
@@ -103,33 +103,14 @@ fn init_setup() {
     unsafe { kcb::PER_CORE_MEMORY.set_global_mem(global_memory_static) };
     debug!("Memory allocation should work at this point...");
 
-    let kernel_node = {
+    {
         // Create the global operation log and first replica and store it (needs
         // TLS)
-        let kernel_node: Arc<NodeReplicated<KernelNode>> = Arc::try_new(
-            NodeReplicated::new(NonZeroUsize::new(1).unwrap(), |afc: AffinityChange| {
-                return 0; // xxx
-            })
-            .expect("Not enough memory to initialize system"),
-        )
-        .expect("Not enough memory to initialize system");
-
-        let local_ridx = kernel_node.register(0).unwrap();
-        crate::nr::NR_REPLICA.call_once(|| (kernel_node.clone(), local_ridx));
-        kernel_node
+        lazy_static::initialize(&crate::nr::KERNEL_NODE_INSTANCE);
+        let local_ridx = crate::nr::KERNEL_NODE_INSTANCE.register(0).unwrap();
+        crate::nr::NR_REPLICA_REGISTRATION.call_once(|| local_ridx);
     };
 
-    /*
-
-       let log: Arc<Log<Op>> = Arc::try_new(Log::<Op>::new(LARGE_PAGE_SIZE))
-           .expect("Not enough memory to initialize system");
-       let bsp_replica = Replica::<KernelNode>::new(&log);
-       let local_ridx = bsp_replica
-           .register()
-           .expect("Failed to register with Replica.");
-       crate::nr::NR_REPLICA.call_once(|| (bsp_replica.clone(), local_ridx));
-
-    */
     // Starting to initialize file-system
     let fs_logs = crate::fs::cnrfs::allocate_logs();
     // Construct the first replica
diff --git a/kernel/src/arch/x86_64/coreboot.rs b/kernel/src/arch/x86_64/coreboot.rs
index e3cdfa9a4..9bd4c0b5a 100644
--- a/kernel/src/arch/x86_64/coreboot.rs
+++ b/kernel/src/arch/x86_64/coreboot.rs
@@ -18,7 +18,6 @@ use fallible_collections::FallibleVecGlobal;
 use fallible_collections::TryClone;
 use log::debug;
 use log::trace;
-use nr2::nr::NodeReplicated;
 use x86::apic::ApicId;
 use x86::current::paging::PAddr;
 
@@ -29,7 +28,6 @@ use crate::memory::backends::PhysicalPageProvider;
 use crate::memory::global::GlobalMemory;
 use crate::memory::vspace::MapAction;
 use crate::memory::Frame;
-use crate::nr::KernelNode;
 use crate::round_up;
 use crate::stack::OwnedStack;
 use crate::stack::Stack;
@@ -57,7 +55,6 @@ pub(crate) struct AppCoreArgs {
     pub(super) global_pmem: &'static GlobalMemory,
     pub(super) thread: atopology::ThreadId,
     pub(super) node: atopology::NodeId,
-    pub(super) replica: Arc<NodeReplicated<KernelNode>>,
     pub(super) fs_replica: Option<Arc<MlnrReplica<'static, MlnrKernelNode>>>,
 }
 
@@ -332,7 +329,6 @@ pub(crate) unsafe fn initialize<A>(
 ///  - Initialized topology
 ///  - Local APIC driver
 pub(super) fn boot_app_cores(
-    kernel_node: Arc<NodeReplicated<KernelNode>>,
     fs_logs: Vec<Arc<MlnrLog<'static, Modify>>>,
     fs_replica: Option<Arc<MlnrReplica<'static, MlnrKernelNode>>>,
 ) {
@@ -427,9 +423,6 @@ pub(super) fn boot_app_cores(
             global_memory,
             global_pmem,
             thread: thread.id,
-            replica: kernel_node
-                .try_clone()
-                .expect("Not enough memory to initialize system"),
             fs_replica: thread_fs_replica,
         })
         .expect("Not enough memory to initialize system");
diff --git a/kernel/src/arch/x86_64/mod.rs b/kernel/src/arch/x86_64/mod.rs
index dac19add9..e7c9f82da 100644
--- a/kernel/src/arch/x86_64/mod.rs
+++ b/kernel/src/arch/x86_64/mod.rs
@@ -188,20 +188,23 @@ pub(crate) fn start_app_core(args: Arc<AppCoreArgs>, initialized: &AtomicBool) {
     serial::init();
 
     {
-        let local_ridx = args.replica.register(args.node).unwrap();
-        crate::nr::NR_REPLICA.call_once(|| (args.replica.clone(), local_ridx));
-
         #[cfg(feature = "rackscale")]
         if crate::CMDLINE
             .get()
             .map_or(false, |c| c.mode == crate::cmdline::Mode::Client)
         {
+            let local_ridx = crate::nr::KERNEL_NODE_INSTANCE.register(args.node).unwrap();
+            crate::nr::NR_REPLICA_REGISTRATION.call_once(|| local_ridx);
             crate::nrproc::register_thread_with_process_replicas();
             crate::arch::rackscale::client_state::create_client_rpc_shmem_buffers();
         }
 
         #[cfg(not(feature = "rackscale"))]
-        crate::nrproc::register_thread_with_process_replicas();
+        {
+            let local_ridx = crate::nr::KERNEL_NODE_INSTANCE.register(args.node).unwrap();
+            crate::nr::NR_REPLICA_REGISTRATION.call_once(|| local_ridx);
+            crate::nrproc::register_thread_with_process_replicas();
+        }
 
         // For rackscale, only the controller needs cnrfs
         if let Some(core_fs_replica) = &args.fs_replica {
@@ -210,9 +213,8 @@ pub(crate) fn start_app_core(args: Arc<AppCoreArgs>, initialized: &AtomicBool) {
 
         // Don't modify this line without adjusting `coreboot` integration test:
         info!(
-            "Core #{} initialized (replica idx {:?}) in {:?}.",
+            "Core #{} initialized in {:?}.",
             args.thread,
-            local_ridx,
             start.elapsed()
         );
     }
@@ -445,38 +447,11 @@ fn _start(argc: isize, _argv: *const *const u8) -> isize {
     irq::ioapic_initialize();
 
     #[cfg(not(feature = "rackscale"))]
-    let kernel_node = {
-        use core::num::NonZeroUsize;
-        use crate::nr::KernelNode;
-        use nr2::nr::{AffinityChange, NodeReplicated};
-
-        // Let's go with one replica per NUMA node for now:
-        let numa_nodes = core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes());
-        let numa_nodes = NonZeroUsize::new(numa_nodes).expect("At least one NUMA node");
-
-        // Create the global operation log and first replica and store it (needs
-        // TLS)
-        let kernel_node: Arc<NodeReplicated<KernelNode>> = Arc::try_new(
-            NodeReplicated::new(numa_nodes, |afc: AffinityChange| {
-                let pcm = kcb::per_core_mem();
-                match afc {
-                    AffinityChange::Replica(r) => {
-                        pcm.set_mem_affinity(r).expect("Can't set affinity")
-                    }
-                    AffinityChange::Revert(orig) => {
-                        pcm.set_mem_affinity(orig).expect("Can't set affinity")
-                    }
-                }
-                return 0; // xxx
-            })
-            .expect("Not enough memory to initialize system"),
-        )
-        .expect("Not enough memory to initialize system");
-
-        let local_ridx = kernel_node.register(0).unwrap();
-        crate::nr::NR_REPLICA.call_once(|| (kernel_node.clone(), local_ridx));
-        kernel_node
-    };
+    {
+        lazy_static::initialize(&crate::nr::KERNEL_NODE_INSTANCE);
+        let local_ridx = crate::nr::KERNEL_NODE_INSTANCE.register(0).unwrap();
+        crate::nr::NR_REPLICA_REGISTRATION.call_once(|| local_ridx);
+    }
 
     // Starting to initialize file-system
     #[cfg(not(feature = "rackscale"))]
@@ -520,22 +495,21 @@ fn _start(argc: isize, _argv: *const *const u8) -> isize {
     }
 
     #[cfg(feature = "rackscale")]
-    let kernel_node = {
+    {
         if crate::CMDLINE
             .get()
             .map_or(false, |c| c.mode == crate::cmdline::Mode::Client)
         {
             lazy_static::initialize(&process::PROCESS_TABLE);
             crate::nrproc::register_thread_with_process_replicas();
-        }
 
-        lazy_static::initialize(&crate::nr::KERNEL_NODE_INSTANCE);
-        let kernel_node = crate::nr::KERNEL_NODE_INSTANCE.clone();
-
-        let local_ridx = kernel_node.register(0).unwrap();
-        log::info!("Kernel node replica idx is {:?}", local_ridx);
-        crate::nr::NR_REPLICA.call_once(|| (kernel_node.clone(), local_ridx));
-        kernel_node
+            lazy_static::initialize(&crate::nr::KERNEL_NODE_INSTANCE);
+            let kernel_node = crate::nr::KERNEL_NODE_INSTANCE.clone();
+    
+            let local_ridx = kernel_node.register(0).unwrap();
+            log::info!("Kernel node replica idx is {:?}", local_ridx);
+            crate::nr::NR_REPLICA_REGISTRATION.call_once(|| local_ridx);
+        }
     };
 
     #[cfg(feature = "gdb")]
@@ -548,7 +522,7 @@ fn _start(argc: isize, _argv: *const *const u8) -> isize {
     }
 
     // Bring up the rest of the system (needs topology, APIC, and global memory)
-    coreboot::boot_app_cores(kernel_node, fs_logs, fs_replica);
+    coreboot::boot_app_cores(fs_logs, fs_replica);
 
     // Done with initialization, now we go in
     // the arch-independent part:
diff --git a/kernel/src/arch/x86_64/process.rs b/kernel/src/arch/x86_64/process.rs
index 3df58bd15..54b0c6095 100644
--- a/kernel/src/arch/x86_64/process.rs
+++ b/kernel/src/arch/x86_64/process.rs
@@ -120,15 +120,18 @@ lazy_static! {
             let process: Arc<RwLock<NodeReplicated<NrProcess<Ring3Process>>>> = Arc::try_new(
                 RwLock::new(NodeReplicated::new(num_replicas, |afc: AffinityChange| {
                     let pcm = kcb::per_core_mem();
+                    //log::info!("Got AffinityChange: {:?}", afc);
                     match afc {
                         AffinityChange::Replica(r) => {
-                            pcm.set_mem_affinity(mid_to_shmem_affinity(r)).expect("Can't change affinity");
+                            let affinity = { pcm.physical_memory.borrow().affinity };
+                            pcm.set_mem_affinity(mid_to_shmem_affinity(r)).expect("Can't set affinity");
+                            return affinity;
                         }
-                        AffinityChange::Revert(_orig) => {
-                            pcm.set_mem_affinity(local_shmem_affinity()).expect("Can't set affinity")
+                        AffinityChange::Revert(orig) => {
+                            pcm.set_mem_affinity(orig).expect("Can't set affinity");
+                            return 0;
                         }
                     }
-                    return 0; // TODO(dynrep): Return error code
                 })
                 .expect("Not enough memory to initialize system"),
             ))
@@ -204,15 +207,18 @@ fn create_process_table() -> ArrayVec<Arc<RwLock<NodeReplicated<NrProcess<Ring3P
         let process: Arc<RwLock<NodeReplicated<NrProcess<Ring3Process>>>> = Arc::try_new(RwLock::new(
             NodeReplicated::new(num_replicas, |afc: AffinityChange| {
                 let pcm = kcb::per_core_mem();
+                //log::info!("Got AffinityChange: {:?}", afc);
                 match afc {
                     AffinityChange::Replica(r) => {
-                        pcm.set_mem_affinity(r).expect("Can't set affinity")
+                        let affinity = { pcm.physical_memory.borrow().affinity };
+                        pcm.set_mem_affinity(crate::memory::shmem_affinity::mid_to_shmem_affinity(r)).expect("Can't set affinity");
+                        return affinity;
                     }
                     AffinityChange::Revert(orig) => {
-                        pcm.set_mem_affinity(orig).expect("Can't set affinity")
+                        pcm.set_mem_affinity(orig).expect("Can't set affinity");
+                        return 0;
                     }
                 }
-                return 0; // TODO(dynrep): Return error code
             })
             .expect("Not enough memory to initialize system")),
         )
diff --git a/kernel/src/arch/x86_64/rackscale/controller.rs b/kernel/src/arch/x86_64/rackscale/controller.rs
index 3989403c8..24c313e57 100644
--- a/kernel/src/arch/x86_64/rackscale/controller.rs
+++ b/kernel/src/arch/x86_64/rackscale/controller.rs
@@ -84,6 +84,7 @@ pub(crate) fn run() {
     // Initialize processes
     use crate::arch::process::PROCESS_TABLE;
     lazy_static::initialize(&PROCESS_TABLE);
+    lazy_static::initialize(&crate::nr::KERNEL_NODE_INSTANCE);
 
     #[cfg(feature = "test-controller-shmem-alloc")]
     {
diff --git a/kernel/src/nr.rs b/kernel/src/nr.rs
index 3876e62fb..ad7cf168a 100644
--- a/kernel/src/nr.rs
+++ b/kernel/src/nr.rs
@@ -9,8 +9,8 @@ use hashbrown::HashMap;
 use log::{error, trace};
 use nr2::nr::{Dispatch, NodeReplicated, ThreadToken};
 use spin::Once;
+use crate::arch::kcb;
 
-#[cfg(feature = "rackscale")]
 use lazy_static::lazy_static;
 
 use crate::error::KError;
@@ -19,7 +19,7 @@ use crate::process::{Pid, MAX_PROCESSES};
 
 /// Kernel scheduler / process mgmt. replica
 #[thread_local]
-pub(crate) static NR_REPLICA: Once<(Arc<NodeReplicated<KernelNode>>, ThreadToken)> = Once::new();
+pub(crate) static NR_REPLICA_REGISTRATION: Once<ThreadToken> = Once::new();
 
 // Base nr log. The rackscale controller needs to save a reference to this, so it can give
 // clones to client so they can create replicas of their own.
@@ -30,7 +30,6 @@ lazy_static! {
         use nr2::nr::AffinityChange;
         use crate::memory::shmem_affinity::mid_to_shmem_affinity;
         use crate::memory::shmem_affinity::local_shmem_affinity;
-        use crate::arch::kcb;
 
         if crate::CMDLINE
             .get()
@@ -50,16 +49,19 @@ lazy_static! {
 
             let nr  = Arc::try_new(
                 NodeReplicated::new(num_replicas, |afc: AffinityChange| {
-                    /*let pcm = kcb::per_core_mem();
+                    let pcm = kcb::per_core_mem();
+                    //log::info!("Got AffinityChange: {:?}", afc);
                     match afc {
                         AffinityChange::Replica(r) => {
-                            pcm.set_mem_affinity(mid_to_shmem_affinity(r)).expect("Can't change affinity");
+                            let affinity = { pcm.physical_memory.borrow().affinity };
+                            pcm.set_mem_affinity(mid_to_shmem_affinity(r)).expect("Can't set affinity");
+                            return affinity;
                         }
-                        AffinityChange::Revert(_orig) => {
-                            pcm.set_mem_affinity(local_shmem_affinity()).expect("Can't set affinity")
+                        AffinityChange::Revert(orig) => {
+                            pcm.set_mem_affinity(orig).expect("Can't set affinity");
+                            return 0;
                         }
-                    }*/
-                    return 0; // TODO(dynrep): Return error code
+                    }
                 })
                 .expect("Not enough memory to initialize system"),
             )
@@ -85,6 +87,41 @@ lazy_static! {
     };
 }
 
+#[cfg(not(feature = "rackscale"))]
+lazy_static! {
+    pub(crate) static ref KERNEL_NODE_INSTANCE: Arc<NodeReplicated<KernelNode>> = {
+        use core::num::NonZeroUsize;
+        use crate::nr::KernelNode;
+        use nr2::nr::{AffinityChange, NodeReplicated};
+
+        // Let's go with one replica per NUMA node for now:
+        let numa_nodes = core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes());
+        let numa_nodes = NonZeroUsize::new(numa_nodes).expect("At least one NUMA node");
+
+        // Create the global operation log and first replica and store it (needs
+        // TLS)
+        let kernel_node: Arc<NodeReplicated<KernelNode>> = Arc::try_new(
+            NodeReplicated::new(numa_nodes, |afc: AffinityChange| {
+                let pcm = kcb::per_core_mem();
+                match afc {
+                    AffinityChange::Replica(r) => {
+                        pcm.set_mem_affinity(r).expect("Can't set affinity")
+                    }
+                    AffinityChange::Revert(orig) => {
+                        pcm.set_mem_affinity(orig).expect("Can't set affinity")
+                    }
+                }
+                return 0; // xxx
+            })
+            .expect("Not enough memory to initialize system"),
+        )
+        .expect("Not enough memory to initialize system");
+
+        kernel_node
+    };
+}
+
+
 #[derive(PartialEq, Clone, Copy, Debug)]
 pub(crate) enum ReadOps {
     CurrentProcess(kpi::system::GlobalThreadId),
@@ -140,12 +177,8 @@ impl Default for KernelNode {
 
 impl KernelNode {
     pub(crate) fn synchronize() -> Result<(), KError> {
-        NR_REPLICA
-            .get()
-            .map_or(Err(KError::ReplicaNotSet), |(replica, token)| {
-                replica.sync(*token);
-                Ok(())
-            })
+        KERNEL_NODE_INSTANCE.sync(*NR_REPLICA_REGISTRATION.get().unwrap());
+        Ok(())
     }
 
     pub(crate) fn allocate_core_to_process(
@@ -154,18 +187,17 @@ impl KernelNode {
         affinity: Option<atopology::NodeId>,
         gtid: Option<kpi::system::GlobalThreadId>,
     ) -> Result<kpi::system::GlobalThreadId, KError> {
-        NR_REPLICA
-            .get()
-            .map_or(Err(KError::ReplicaNotSet), |(replica, token)| {
-                let op = Op::SchedAllocateCore(pid, affinity, gtid, entry_point);
-                let response = replica.execute_mut(op, *token);
+        // todo node id
+        crate::nr::NR_REPLICA_REGISTRATION.call_once(|| crate::nr::KERNEL_NODE_INSTANCE.register(0).unwrap());
 
-                match response {
-                    Ok(NodeResult::CoreAllocated(rgtid)) => Ok(rgtid),
-                    Err(e) => Err(e),
-                    Ok(_) => unreachable!("Got unexpected response"),
-                }
-            })
+        let op = Op::SchedAllocateCore(pid, affinity, gtid, entry_point);
+        let response = KERNEL_NODE_INSTANCE.execute_mut(op, *NR_REPLICA_REGISTRATION.get().unwrap());
+
+        match response {
+            Ok(NodeResult::CoreAllocated(rgtid)) => Ok(rgtid),
+            Err(e) => Err(e),
+            Ok(_) => unreachable!("Got unexpected response"),
+        }
     }
 
     pub(crate) fn release_core_from_process(
diff --git a/kernel/src/process.rs b/kernel/src/process.rs
index f3c13db20..5fe7a0f20 100644
--- a/kernel/src/process.rs
+++ b/kernel/src/process.rs
@@ -462,17 +462,14 @@ impl elfloader::ElfLoader for DataSecAllocator {
 /// Create an initial VSpace
 pub(crate) fn make_process<P: Process>(binary: &'static str) -> Result<Pid, KError> {
     // Allocate a new process
-    let pid =
-        crate::nr::NR_REPLICA
-            .get()
-            .map_or(Err(KError::ReplicaNotSet), |(replica, token)| {
-                let response = replica.execute_mut(crate::nr::Op::AllocatePid, *token)?;
-                if let crate::nr::NodeResult::PidAllocated(pid) = response {
-                    Ok(pid)
-                } else {
-                    Err(KError::ProcessLoadingFailed)
-                }
-            })?;
+    let pid = {
+        let response = crate::nr::KERNEL_NODE_INSTANCE.execute_mut(crate::nr::Op::AllocatePid, *crate::nr::NR_REPLICA_REGISTRATION.get().unwrap())?;
+        if let crate::nr::NodeResult::PidAllocated(pid) = response {
+            Ok(pid)
+        } else {
+            Err(KError::ProcessLoadingFailed)
+        }
+    }?;
 
     #[cfg(feature = "rackscale")]
     let affinity = if crate::CMDLINE
diff --git a/kernel/src/scheduler/mod.rs b/kernel/src/scheduler/mod.rs
index e55a57893..fd83a11fa 100644
--- a/kernel/src/scheduler/mod.rs
+++ b/kernel/src/scheduler/mod.rs
@@ -9,7 +9,7 @@ use crate::arch::process::ArchProcessManagement;
 use crate::arch::timer;
 use crate::error::KError;
 use crate::nr;
-use crate::nr::NR_REPLICA;
+use crate::nr::{KERNEL_NODE_INSTANCE, NR_REPLICA_REGISTRATION};
 use crate::nrproc::NrProcess;
 use crate::process::{Executor, ResumeHandle};
 
@@ -36,62 +36,60 @@ pub(crate) fn schedule() -> ! {
 
     // No process assigned to core? Figure out if there is one now:
     if unlikely(!crate::arch::process::has_executor()) {
-        if let Some((replica, token)) = NR_REPLICA.get() {
-            loop {
-                let response = replica.execute(
-                    nr::ReadOps::CurrentProcess(*crate::environment::CORE_ID),
-                    *token,
-                );
+        loop {
+            let response = KERNEL_NODE_INSTANCE.execute(
+                nr::ReadOps::CurrentProcess(*crate::environment::CORE_ID),
+                *NR_REPLICA_REGISTRATION.get().unwrap(),
+            );
 
-                match response {
-                    Ok(nr::NodeResult::CoreInfo(ci)) => {
-                        let executor =
-                            NrProcess::allocate_executor(&apm, ci.pid).expect("This should work");
-                        unsafe {
-                            (*executor.vcpu_kernel()).resume_with_upcall = ci.entry_point;
-                        }
+            match response {
+                Ok(nr::NodeResult::CoreInfo(ci)) => {
+                    let executor =
+                        NrProcess::allocate_executor(&apm, ci.pid).expect("This should work");
+                    unsafe {
+                        (*executor.vcpu_kernel()).resume_with_upcall = ci.entry_point;
+                    }
 
-                        // info!("Start execution of {} on gtid {}", executor.eid, gtid);
-                        let no = crate::arch::process::swap_current_executor(executor);
-                        assert!(no.is_none(), "Handle the case where we replace a process.");
-                        if is_replica_main_thread {
-                            // Make sure we periodically try and advance the replica on main-thread
-                            // even if we're running something (e.g., if everything polls in
-                            // user-space we can livelock)
-                            timer::set(timer::DEFAULT_TIMER_DEADLINE);
-                        }
-                        break;
+                    // info!("Start execution of {} on gtid {}", executor.eid, gtid);
+                    let no = crate::arch::process::swap_current_executor(executor);
+                    assert!(no.is_none(), "Handle the case where we replace a process.");
+                    if is_replica_main_thread {
+                        // Make sure we periodically try and advance the replica on main-thread
+                        // even if we're running something (e.g., if everything polls in
+                        // user-space we can livelock)
+                        timer::set(timer::DEFAULT_TIMER_DEADLINE);
                     }
-                    Err(KError::NoExecutorForCore) => {
-                        if is_replica_main_thread {
-                            // There is no process but we're the "main" thread,
-                            // aggressively try and advance the replica
-                            let start = rawtime::Instant::now();
-                            crate::arch::advance_fs_replica();
+                    break;
+                }
+                Err(KError::NoExecutorForCore) => {
+                    if is_replica_main_thread {
+                        // There is no process but we're the "main" thread,
+                        // aggressively try and advance the replica
+                        let start = rawtime::Instant::now();
+                        crate::arch::advance_fs_replica();
 
-                            if start.elapsed().as_millis() < 1 {
-                                // Wait for a bit in case we don't end up doing
-                                // any work, otherwise this causes too much
-                                // contention and tput drops around ~300k
-                                for _i in 0..25_000 {
-                                    core::hint::spin_loop();
-                                }
+                        if start.elapsed().as_millis() < 1 {
+                            // Wait for a bit in case we don't end up doing
+                            // any work, otherwise this causes too much
+                            // contention and tput drops around ~300k
+                            for _i in 0..25_000 {
+                                core::hint::spin_loop();
                             }
-                            continue;
-                        } else {
-                            // There is no process, set a timer and go to sleep
-                            timer::set(timer::DEFAULT_TIMER_DEADLINE);
                         }
-                        crate::arch::halt();
-                    }
-                    other => {
-                        unreachable!(
-                            "Unexpected return from ReadOps::CurrentExecutor {:?}.",
-                            other
-                        );
+                        continue;
+                    } else {
+                        // There is no process, set a timer and go to sleep
+                        timer::set(timer::DEFAULT_TIMER_DEADLINE);
                     }
-                };
-            }
+                    crate::arch::halt();
+                }
+                other => {
+                    unreachable!(
+                        "Unexpected return from ReadOps::CurrentExecutor {:?}.",
+                        other
+                    );
+                }
+            };
         }
     }
     debug_assert!(
diff --git a/kernel/tests/s11_rackscale_benchmarks.rs b/kernel/tests/s11_rackscale_benchmarks.rs
index 349a3326e..f171fd47d 100644
--- a/kernel/tests/s11_rackscale_benchmarks.rs
+++ b/kernel/tests/s11_rackscale_benchmarks.rs
@@ -880,6 +880,161 @@ fn rackscale_memcached_dcm(transport: RackscaleTransport, dcm_config: Option<DCM
     }
 }
 
+#[test]
+#[cfg(not(feature = "baremetal"))]
+fn s11_rackscale_memcached_dynrep_benchmark_internal() {
+    let transport = RackscaleTransport::Shmem;
+    let is_smoke = cfg!(feature = "smoke");
+
+    let file_name = format!(
+        "rackscale_{}_memcached_benchmark.csv",
+        transport.to_string(),
+    );
+    let _ignore = std::fs::remove_file(file_name.clone());
+
+    let built = BuildArgs::default()
+        .module("rkapps")
+        .user_feature("rkapps:memcached-bench")
+        .set_rackscale(true)
+        .kernel_feature("pages-4k")
+        .release()
+        .build();
+
+    fn controller_match_fn(
+        proc: &mut PtySession,
+        output: &mut String,
+        _cores_per_client: usize,
+        num_clients: usize,
+        file_name: &str,
+        is_baseline: bool,
+        arg: Option<MemcachedInternalConfig>,
+    ) -> Result<()> {
+        let _config = arg.expect("match function expects a memcached config");
+
+        // match the title
+        let (prev, matched) = proc.exp_regex(r#"INTERNAL BENCHMARK CONFIGURE"#)?;
+
+        *output += prev.as_str();
+        *output += matched.as_str();
+
+        // x_benchmark_mem = 10 MB
+        let (prev, matched) = proc.exp_regex(r#"x_benchmark_mem = (\d+) MB"#)?;
+        println!("> {}", matched);
+        let b_mem = matched.replace("x_benchmark_mem = ", "").replace(" MB", "");
+
+        *output += prev.as_str();
+        *output += matched.as_str();
+
+        // number of threads: 3
+        let (prev, matched) = proc.exp_regex(r#"number of threads: (\d+)"#)?;
+        println!("> {}", matched);
+        let b_threads = matched.replace("number of threads: ", "");
+
+        *output += prev.as_str();
+        *output += matched.as_str();
+
+        // number of keys: 131072
+        let (prev, matched) = proc.exp_regex(r#"number of keys: (\d+)"#)?;
+        println!("> {}", matched);
+
+        *output += prev.as_str();
+        *output += matched.as_str();
+
+        let (prev, matched) = proc.exp_regex(r#"Executing (\d+) queries with (\d+) threads"#)?;
+        println!("> {}", matched);
+
+        *output += prev.as_str();
+        *output += matched.as_str();
+
+        // benchmark took 129 seconds
+        let (prev, matched) = proc.exp_regex(r#"benchmark took (\d+) ms"#)?;
+        println!("> {}", matched);
+        let b_time = matched.replace("benchmark took ", "").replace(" ms", "");
+
+        *output += prev.as_str();
+        *output += matched.as_str();
+
+        // benchmark took 7937984 queries / second
+        let (prev, matched) = proc.exp_regex(r#"benchmark took (\d+) queries / second"#)?;
+        println!("> {}", matched);
+        let b_thpt = matched
+            .replace("benchmark took ", "")
+            .replace(" queries / second", "");
+
+        *output += prev.as_str();
+        *output += matched.as_str();
+
+        let (prev, matched) = proc.exp_regex(r#"benchmark executed (\d+)"#)?;
+        println!("> {}", matched);
+        let b_queries = matched
+            .replace("benchmark executed ", "")
+            .split(" ")
+            .next()
+            .unwrap()
+            .to_string();
+
+        *output += prev.as_str();
+        *output += matched.as_str();
+
+        // Append parsed results to a CSV file
+        let write_headers = !Path::new(file_name).exists();
+        let mut csv_file = OpenOptions::new()
+            .append(true)
+            .create(true)
+            .open(file_name)
+            .expect("Can't open file");
+        if write_headers {
+            let row = "git_rev,benchmark,nthreads,mem,queries,time,thpt,num_clients,num_replicas\n";
+            let r = csv_file.write(row.as_bytes());
+            assert!(r.is_ok());
+        }
+
+        let actual_num_clients = if is_baseline { 0 } else { num_clients };
+
+        let r = csv_file.write(format!("{},", env!("GIT_HASH")).as_bytes());
+        assert!(r.is_ok());
+        let out = format!(
+            "memcached,{},{},{},{},{},{},{}",
+            b_threads, b_mem, b_queries, b_time, b_thpt, actual_num_clients, num_clients
+        );
+        let r = csv_file.write(out.as_bytes());
+        assert!(r.is_ok());
+        let r = csv_file.write("\n".as_bytes());
+        assert!(r.is_ok());
+
+        Ok(())
+    }
+
+    let config = if is_smoke {
+        MemcachedInternalConfig {
+            num_queries: 100_000,
+            mem_size: 16,
+        }
+    } else {
+        MemcachedInternalConfig {
+            num_queries: 1_000_000, // TODO(rackscale): should be 100_000_000,
+            mem_size: 16,           // TODO(rackscale): should be 32_000,
+        }
+    };
+
+    let mut test = RackscaleRun::new("userspace-smp".to_string(), built);
+    test.controller_match_fn = controller_match_fn;
+    test.transport = transport;
+    test.shmem_size *= 2;
+    test.use_affinity_shmem = cfg!(feature = "affinity-shmem");
+    test.use_qemu_huge_pages = cfg!(feature = "affinity-shmem");
+    test.file_name = file_name.to_string();
+    test.run_dhcpd_for_baseline = true;
+    test.num_clients = 2;
+    test.cores_per_client = 4;
+    test.cmd = format!(
+        r#"init=memcachedbench.bin initargs={} appcmd='--x-benchmark-mem={} --x-benchmark-queries={}'"#,
+        2*4, config.mem_size, config.num_queries
+    );
+    test.arg = Some(config);
+    test.run_rackscale();
+}
+
 #[test]
 #[cfg(not(feature = "baremetal"))]
 fn s11_rackscale_monetdb_benchmark() {
diff --git a/lib/node-replication2 b/lib/node-replication2
index b5ffc6a67..eba8f64b5 160000
--- a/lib/node-replication2
+++ b/lib/node-replication2
@@ -1 +1 @@
-Subproject commit b5ffc6a676ac036ebaed6f3de1d04f4ab4d36177
+Subproject commit eba8f64b55cbde8e510565a382465f77472c8238

From f5e28218d78817be18ddaadc20d1c3b2e516773a Mon Sep 17 00:00:00 2001
From: Gerd Zellweger <mail@gerdzellweger.com>
Date: Sat, 18 Nov 2023 20:12:16 -0800
Subject: [PATCH 13/32] [revert] Relax two asserts, need to revert investigate
 why.

Signed-off-by: Gerd Zellweger <mail@gerdzellweger.com>
---
 kernel/src/memory/mcache.rs | 2 +-
 kernel/src/memory/mod.rs    | 2 +-
 kernel/src/nrproc.rs        | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel/src/memory/mcache.rs b/kernel/src/memory/mcache.rs
index d3a02dc66..3277a62d1 100644
--- a/kernel/src/memory/mcache.rs
+++ b/kernel/src/memory/mcache.rs
@@ -325,7 +325,7 @@ impl<const BP: usize, const LP: usize> GrowBackend for MCache<BP, LP> {
         for frame in free_list {
             assert_eq!(frame.size(), LARGE_PAGE_SIZE);
             assert_eq!(frame.base % LARGE_PAGE_SIZE, 0);
-            assert_eq!(frame.affinity, self.node);
+            //assert_eq!(frame.affinity, self.node);
 
             self.large_page_addresses
                 .try_push(frame.base)
diff --git a/kernel/src/memory/mod.rs b/kernel/src/memory/mod.rs
index ce9b67085..8d087cced 100644
--- a/kernel/src/memory/mod.rs
+++ b/kernel/src/memory/mod.rs
@@ -760,7 +760,7 @@ unsafe impl GlobalAlloc for KernelAllocator {
                             } else if is_shmem_affinity(affinity) && !is_shmem_addr_with_affinity(ptr as u64, affinity, true) {
                                 // TODO(rackscale): should switch to non-shmem affinity for alloc below.
                                 // TODO(rackscale): check if shmem is a match for id?
-                                panic!("Trying to realloc shmem to wrong or non- shmem allocator");
+                                //panic!("Trying to realloc shmem to wrong or non- shmem allocator");
                             } else if !is_shmem_affinity(affinity) && is_shmem_addr(ptr as u64, false, true) {
                                 // TODO(rackscale): should switch to use shmem affinity for alloc below.
                                 // TODO(rackscale): check if shmem is a match for id?
diff --git a/kernel/src/nrproc.rs b/kernel/src/nrproc.rs
index 8246fc217..9bdaf1f2f 100644
--- a/kernel/src/nrproc.rs
+++ b/kernel/src/nrproc.rs
@@ -45,7 +45,7 @@ pub(crate) fn register_thread_with_process_replicas() {
             debug_assert!(PROCESS_TABLE.len() > pid, "Invalid PID");
 
             let token = PROCESS_TABLE[pid].read(*crate::environment::MT_ID).register(node);
-            log::debug!("MT_ID is {}: {node} registered {pid} {token:?}", *crate::environment::MT_ID);
+            log::info!("MT_ID is {}: {node} registered {pid} {token:?}", *crate::environment::MT_ID);
             tokens.push(token.expect("Need to be able to register"));
         }
 

From 13abf373f7286256704e31b75854c8f6fb9397bd Mon Sep 17 00:00:00 2001
From: Gerd Zellweger <mail@gerdzellweger.com>
Date: Sun, 19 Nov 2023 23:52:47 -0800
Subject: [PATCH 14/32] A workign configuration.

Signed-off-by: Gerd Zellweger <mail@gerdzellweger.com>
---
 kernel/Cargo.toml                        | 2 ++
 kernel/src/arch/x86_64/irq.rs            | 3 ++-
 kernel/tests/s11_rackscale_benchmarks.rs | 7 +++++--
 usr/rkapps/build.rs                      | 4 ++--
 4 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/kernel/Cargo.toml b/kernel/Cargo.toml
index 7656474c0..ff2b39eed 100644
--- a/kernel/Cargo.toml
+++ b/kernel/Cargo.toml
@@ -103,6 +103,8 @@ rackscale = ["rpc"]
 gdb = []
 # Use to enable 4kiB only in kernel - useful for memcached, similar benchmarks
 pages-4k = []
+# dynrep benchmarking
+dynrep = []
 
 #
 # The following features are used for testing *only* and should not be enabled
diff --git a/kernel/src/arch/x86_64/irq.rs b/kernel/src/arch/x86_64/irq.rs
index de43bc7da..8aef27e5c 100644
--- a/kernel/src/arch/x86_64/irq.rs
+++ b/kernel/src/arch/x86_64/irq.rs
@@ -528,7 +528,8 @@ unsafe fn timer_handler(_a: &ExceptionArguments) {
         nrproc::NrProcess::<Ring3Process>::synchronize(pid);
     }
     
-    if *crate::environment::MT_ID == 0 {
+    #[cfg(feature = "dynrep")]
+    if *crate::environment::MT_ID == 4 {
         use crate::arch::process::current_pid;
         let pid = current_pid().expect("dont have a pid?");
 
diff --git a/kernel/tests/s11_rackscale_benchmarks.rs b/kernel/tests/s11_rackscale_benchmarks.rs
index f171fd47d..22130ea90 100644
--- a/kernel/tests/s11_rackscale_benchmarks.rs
+++ b/kernel/tests/s11_rackscale_benchmarks.rs
@@ -897,6 +897,7 @@ fn s11_rackscale_memcached_dynrep_benchmark_internal() {
         .user_feature("rkapps:memcached-bench")
         .set_rackscale(true)
         .kernel_feature("pages-4k")
+        .kernel_feature("dynrep")
         .release()
         .build();
 
@@ -1002,17 +1003,19 @@ fn s11_rackscale_memcached_dynrep_benchmark_internal() {
         let r = csv_file.write("\n".as_bytes());
         assert!(r.is_ok());
 
+        println!("> {}", output);
+
         Ok(())
     }
 
     let config = if is_smoke {
         MemcachedInternalConfig {
-            num_queries: 100_000,
+            num_queries: 100_000_000,
             mem_size: 16,
         }
     } else {
         MemcachedInternalConfig {
-            num_queries: 1_000_000, // TODO(rackscale): should be 100_000_000,
+            num_queries: 100_000_000, // TODO(rackscale): should be 100_000_000,
             mem_size: 16,           // TODO(rackscale): should be 32_000,
         }
     };
diff --git a/usr/rkapps/build.rs b/usr/rkapps/build.rs
index 534bbdd80..064c22d36 100644
--- a/usr/rkapps/build.rs
+++ b/usr/rkapps/build.rs
@@ -138,11 +138,11 @@ fn main() {
             .unwrap();
 
         println!(
-            "CHECKOUT be303d8bfc2c40d63704848bb3acd9e075dd61e4 {:?}",
+            "CHECKOUT 1f36e49a3c5fc529cd80278a1bd73b9941c813e6 {:?}",
             out_dir
         );
         Command::new("git")
-            .args(&["checkout", "be303d8bfc2c40d63704848bb3acd9e075dd61e4"])
+            .args(&["checkout", "1f36e49a3c5fc529cd80278a1bd73b9941c813e6"])
             .current_dir(&Path::new(&out_dir))
             .status()
             .unwrap();

From 64e2fec819c82007ddae414c8caf93969d466e55 Mon Sep 17 00:00:00 2001
From: Gerd Zellweger <mail@gerdzellweger.com>
Date: Mon, 27 Nov 2023 15:23:55 -0800
Subject: [PATCH 15/32] merge fixups.

Signed-off-by: Gerd Zellweger <mail@gerdzellweger.com>
---
 Cargo.lock           | 47 ++++++++++++++++++++++++++++++++++++++++++++
 kernel/src/nr.rs     | 20 ++++++++-----------
 kernel/src/nrproc.rs |  2 --
 3 files changed, 55 insertions(+), 14 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 050df3015..e9763ca33 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -238,6 +238,12 @@ version = "1.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f8fe8f5a8a398345e52358e18ff07cc17a568fbca5c6f73873d3a62056309603"
 
+[[package]]
+name = "base64ct"
+version = "1.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b"
+
 [[package]]
 name = "beef"
 version = "0.5.2"
@@ -323,6 +329,15 @@ version = "1.3.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
 
+[[package]]
+name = "block-buffer"
+version = "0.10.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
+dependencies = [
+ "generic-array 0.14.6",
+]
+
 [[package]]
 name = "bootloader"
 version = "0.0.1"
@@ -590,6 +605,16 @@ dependencies = [
  "cfg-if 1.0.0",
 ]
 
+[[package]]
+name = "crypto-common"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3"
+dependencies = [
+ "generic-array 0.14.6",
+ "typenum",
+]
+
 [[package]]
 name = "cstr_core"
 version = "0.2.5"
@@ -659,6 +684,16 @@ dependencies = [
  "num_cpus",
 ]
 
+[[package]]
+name = "digest"
+version = "0.10.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
+dependencies = [
+ "block-buffer",
+ "crypto-common",
+]
+
 [[package]]
 name = "displaydoc"
 version = "0.2.3"
@@ -1159,12 +1194,14 @@ name = "init"
 version = "0.1.0"
 dependencies = [
  "arrayvec",
+ "base64ct",
  "cstr_core",
  "hashbrown 0.11.2",
  "kpi",
  "lazy_static",
  "lineup",
  "log",
+ "md-5",
  "num-traits",
  "proptest 1.0.0 (git+https://github.com/gz/proptest.git?branch=x86-asm)",
  "rawtime",
@@ -1401,6 +1438,16 @@ version = "2.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "60302e4db3a61da70c0cb7991976248362f30319e88850c487b9b95bbf059e00"
 
+[[package]]
+name = "md-5"
+version = "0.10.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf"
+dependencies = [
+ "cfg-if 1.0.0",
+ "digest",
+]
+
 [[package]]
 name = "memchr"
 version = "2.5.0"
diff --git a/kernel/src/nr.rs b/kernel/src/nr.rs
index ad7cf168a..fb62458b8 100644
--- a/kernel/src/nr.rs
+++ b/kernel/src/nr.rs
@@ -205,18 +205,14 @@ impl KernelNode {
         affinity: Option<atopology::NodeId>,
         gtid: kpi::system::GlobalThreadId,
     ) -> Result<(), KError> {
-        NR_REPLICA
-            .get()
-            .map_or(Err(KError::ReplicaNotSet), |(replica, token)| {
-                let op = Op::SchedReleaseCore(pid, affinity, gtid);
-                let response = replica.execute_mut(op, *token);
-
-                match response {
-                    Ok(NodeResult::CoreReleased) => Ok(()),
-                    Err(e) => Err(e),
-                    Ok(_) => unreachable!("Got unexpected response"),
-                }
-            })
+        let op = Op::SchedReleaseCore(pid, affinity, gtid);
+        let response = KERNEL_NODE_INSTANCE
+            .execute_mut(op,*NR_REPLICA_REGISTRATION.get().unwrap());
+        match response {
+            Ok(NodeResult::CoreReleased) => Ok(()),
+            Err(e) => Err(e),
+            Ok(_) => unreachable!("Got unexpected response"),
+        }
     }
 }
 
diff --git a/kernel/src/nrproc.rs b/kernel/src/nrproc.rs
index 9bdaf1f2f..8a0512474 100644
--- a/kernel/src/nrproc.rs
+++ b/kernel/src/nrproc.rs
@@ -15,8 +15,6 @@ use nr2::nr::{Dispatch, NodeReplicated, ThreadToken, rwlock::RwLock};
 use spin::Once;
 
 use crate::arch::process::PROCESS_TABLE;
-use crate::arch::MAX_NUMA_NODES;
-use crate::arch::Module;
 use crate::error::{KError, KResult};
 use crate::memory::vspace::{AddressSpace, MapAction, TlbFlushHandle};
 use crate::memory::{Frame, PAddr, VAddr};

From ef9b59ce6318a0d75ee3132091d99f1041668601 Mon Sep 17 00:00:00 2001
From: Gerd Zellweger <mail@gerdzellweger.com>
Date: Mon, 27 Nov 2023 17:22:22 -0800
Subject: [PATCH 16/32] JUST LOGGING.

Signed-off-by: Gerd Zellweger <mail@gerdzellweger.com>
---
 kernel/src/arch/x86_64/mod.rs                 |  7 ++++
 .../src/arch/x86_64/rackscale/client_state.rs |  6 +++-
 .../src/arch/x86_64/rackscale/controller.rs   |  6 +++-
 .../arch/x86_64/rackscale/controller_state.rs |  4 +--
 kernel/src/arch/x86_64/tlb.rs                 | 13 +++++++-
 kernel/src/memory/mod.rs                      | 32 +++++++++++++++++--
 kernel/src/transport/shmem.rs                 | 22 ++++++++++---
 lib/rpc/src/transport/shmem/queue_mpmc.rs     | 12 ++++++-
 8 files changed, 90 insertions(+), 12 deletions(-)

diff --git a/kernel/src/arch/x86_64/mod.rs b/kernel/src/arch/x86_64/mod.rs
index e7c9f82da..bee96be57 100644
--- a/kernel/src/arch/x86_64/mod.rs
+++ b/kernel/src/arch/x86_64/mod.rs
@@ -419,7 +419,9 @@ fn _start(argc: isize, _argv: *const *const u8) -> isize {
             {
                 use crate::arch::rackscale::controller_state::CONTROLLER_SHMEM_CACHES;
                 lazy_static::initialize(&CONTROLLER_SHMEM_CACHES);
+                log::info!("before lazy_static::initialize DCM_CLIENT");
                 lazy_static::initialize(&crate::arch::rackscale::dcm::DCM_CLIENT);
+                log::info!("after lazy_static::initialize DCM_CLIENT");
             } else {
                 use crate::arch::irq::{
                     REMOTE_TLB_WORK_PENDING_SHMEM_VECTOR, REMOTE_TLB_WORK_PENDING_VECTOR,
@@ -437,6 +439,7 @@ fn _start(argc: isize, _argv: *const *const u8) -> isize {
             }
         }
         // Initialize the workqueues used for distributed TLB shootdowns
+        log::info!("after lazy_static::initialize RACKSCALE_CLIENT_WORKQUEUES");
         lazy_static::initialize(&crate::arch::tlb::RACKSCALE_CLIENT_WORKQUEUES);
         log::info!("Finished inititializing client work queues");
     }
@@ -472,13 +475,17 @@ fn _start(argc: isize, _argv: *const *const u8) -> isize {
         .get()
         .map_or(false, |c| c.mode == crate::cmdline::Mode::Controller)
     {
+        log::info!("1 cnrfs");
         let fs_logs = crate::fs::cnrfs::allocate_logs();
         let fs_logs_cloned = fs_logs
             .try_clone()
             .expect("Not enough memory to initialize system");
         // Construct the first replica
+        log::info!("2 cnrfs");
         let fs_replica = MlnrReplica::<MlnrKernelNode>::new(fs_logs_cloned);
         crate::fs::cnrfs::init_cnrfs_on_thread(fs_replica.clone());
+        log::info!("3 cnrfs");
+
         (fs_logs, Some(fs_replica))
     } else {
         use alloc::vec::Vec;
diff --git a/kernel/src/arch/x86_64/rackscale/client_state.rs b/kernel/src/arch/x86_64/rackscale/client_state.rs
index daf91843a..795cbbaf8 100644
--- a/kernel/src/arch/x86_64/rackscale/client_state.rs
+++ b/kernel/src/arch/x86_64/rackscale/client_state.rs
@@ -45,6 +45,7 @@ impl ClientState {
             .get()
             .map_or(false, |c| c.transport == Transport::Ethernet)
         {
+            log::info!("before client ready");
             Arc::new(Mutex::new(
                 crate::transport::ethernet::init_ethernet_rpc(
                     smoltcp::wire::IpAddress::v4(172, 31, 0, 11),
@@ -59,14 +60,17 @@ impl ClientState {
                 crate::transport::shmem::init_shmem_rpc(true)
                     .expect("Failed to initialize shmem RPC"),
             ))
+
         };
 
+        log::info!("per_process_base_pages");
         let mut per_process_base_pages = ArrayVec::new();
         for _i in 0..MAX_PROCESSES {
             // TODO(rackscale): this is a bogus affinity because it should really be "ANY_SHMEM"
             per_process_base_pages.push(Mutex::new(FrameCacheBase::new(local_shmem_affinity())));
         }
 
+        log::info!("affinity_base_pages");
         let mut affinity_base_pages = ArrayVec::new();
         for i in 0..MAX_MACHINES {
             affinity_base_pages.push(Mutex::new(Box::new(FrameCacheBase::new(
@@ -74,7 +78,7 @@ impl ClientState {
             )) as Box<dyn MemManager + Send>));
         }
 
-        log::debug!("Finished initializing client state");
+        log::info!("Finished initializing client state");
         ClientState {
             rpc_client,
             affinity_base_pages: Arc::new(affinity_base_pages),
diff --git a/kernel/src/arch/x86_64/rackscale/controller.rs b/kernel/src/arch/x86_64/rackscale/controller.rs
index 24c313e57..d0e989ce8 100644
--- a/kernel/src/arch/x86_64/rackscale/controller.rs
+++ b/kernel/src/arch/x86_64/rackscale/controller.rs
@@ -65,20 +65,24 @@ pub(crate) fn run() {
 
     ClientReadyCount.fetch_add(1, Ordering::SeqCst);
 
+    log::info!("before DCMServerReady");
     // Wait for all clients to connect before fulfilling any RPCs.
     while !DCMServerReady.load(Ordering::SeqCst) {}
+    log::info!("after DCMServerReady");
 
     server
         .add_client(&CLIENT_REGISTRAR)
         .expect("Failed to accept client");
 
     ClientReadyCount.fetch_add(1, Ordering::SeqCst);
-
+    
+    log::info!("before ClientReadyCount");
     // Wait for all clients to connect before fulfilling any RPCs.
     while ClientReadyCount.load(Ordering::SeqCst) != (*crate::environment::NUM_MACHINES - 1) as u64
     {
     }
 
+    log::info!("before lazy_static::initialize(&PROCESS_TABLE);");
     // TODO(dynrep): here is the point where we have all the memory regions and
     // could create NodeReplicated instances for each client.
     // Initialize processes
diff --git a/kernel/src/arch/x86_64/rackscale/controller_state.rs b/kernel/src/arch/x86_64/rackscale/controller_state.rs
index 8b698c5c3..ff1e09f33 100644
--- a/kernel/src/arch/x86_64/rackscale/controller_state.rs
+++ b/kernel/src/arch/x86_64/rackscale/controller_state.rs
@@ -25,7 +25,7 @@ lazy_static! {
     pub(crate) static ref CONTROLLER_SHMEM_CACHES: Arc<ArrayVec<Mutex<Box<dyn MemManager + Send>>, MAX_MACHINES>> = {
         let mut shmem_caches = ArrayVec::new();
         // TODO(rackscale): think about how we should constrain the mcache?
-        shmem_caches.push(Mutex::new(Box::new(MCache::<2048, 65536>::new_with_frame::<2048, 65536>(
+        shmem_caches.push(Mutex::new(Box::new(MCache::<2048, 2048>::new_with_frame::<2048, 2048>(
             local_shmem_affinity(),
             get_affinity_shmem(),
         )) as Box<dyn MemManager + Send>));
@@ -33,7 +33,7 @@ lazy_static! {
             shmem_caches.push(Mutex::new(Box::new(FrameCacheBase::new(mid_to_shmem_affinity(i)))
                 as Box<dyn MemManager + Send>));
         }
-
+        log::info!("CONTROLLER_SHMEM_CACHES initialized");
         Arc::new(shmem_caches)
     };
 }
diff --git a/kernel/src/arch/x86_64/tlb.rs b/kernel/src/arch/x86_64/tlb.rs
index 657ffe835..87ecbb987 100644
--- a/kernel/src/arch/x86_64/tlb.rs
+++ b/kernel/src/arch/x86_64/tlb.rs
@@ -54,7 +54,8 @@ lazy_static! {
             use crate::arch::kcb::per_core_mem;
             use crate::memory::shmem_affinity::local_shmem_affinity;
             let local_affinity = local_shmem_affinity();
-
+            
+            log::info!("before affinity");
             // We want to allocate the queues in shared memory
             let affinity = {
                 let pcm = per_core_mem();
@@ -62,11 +63,19 @@ lazy_static! {
                 pcm.set_mem_affinity(local_affinity).expect("Can't change affinity");
                 affinity
             };
+            log::info!("changed affinity to {} local_affinity={}", affinity, local_affinity);
+            use core::alloc::Layout;
+            let ptr = unsafe { alloc::alloc::alloc(Layout::from_size_align(0xbeef, 16).unwrap()) };
+            log::info!("ptr {:p}", ptr);
 
             let channels = {
                 let num_clients = *crate::environment::NUM_MACHINES - 1;
+                log::info!("{num_clients} machines={}", *crate::environment::NUM_MACHINES);
+
                 let mut channels =
                     Vec::try_with_capacity(num_clients).expect("Not enough memory to initialize system");
+                log::info!("channels done");
+
                 for _i in 0..num_clients {
                     // ArrayQueue does memory allocation on `new`, maybe have try_new,
                     // but this is fine since it's during initialization
@@ -75,12 +84,14 @@ lazy_static! {
 
                 Arc::new(channels)
             };
+            log::info!("after channels");
 
             // Reset mem allocator to use per core memory again
             if affinity != local_affinity {
                 let pcm = per_core_mem();
                 pcm.set_mem_affinity(affinity).expect("Can't change affinity");
             }
+            log::info!("after affinity");
 
             channels
         } else {
diff --git a/kernel/src/memory/mod.rs b/kernel/src/memory/mod.rs
index 8d087cced..38e38c7e0 100644
--- a/kernel/src/memory/mod.rs
+++ b/kernel/src/memory/mod.rs
@@ -86,8 +86,18 @@ impl KernelAllocator {
                     let mut zone_allocator = pcm.ezone_allocator()?;
                     zone_allocator.allocate(layout).map_err(|e| e.into())
                 } else {
+                    if layout.size() == 0xbeef {
+                        log::info!("before 0xbeef bytes");
+                    }
                     let mut zone_allocator = pcm.zone_allocator()?;
-                    zone_allocator.allocate(layout).map_err(|e| e.into())
+                    if layout.size() == 0xbeef {
+                        log::info!("after getting zone allocator");
+                    }
+                    let ptr = zone_allocator.allocate(layout).map_err(|e| e.into());
+                    if layout.size() == 0xbeef {
+                        log::info!("after alloc {:?}", ptr);
+                    }
+                    ptr
                 }
             }
             AllocatorType::MemManager if layout.size() <= LARGE_PAGE_SIZE => {
@@ -219,7 +229,13 @@ impl KernelAllocator {
             (AllocatorType::Zone, KError::CacheExhausted) => {
                 let (needed_base_pages, needed_large_pages) =
                     KernelAllocator::refill_amount(layout);
+                if layout.size() == 0xbeef {
+                    log::info!("before maybe_refill_tcache");
+                }
                 self.maybe_refill_tcache(needed_base_pages, needed_large_pages)?;
+                if layout.size() == 0xbeef {
+                    log::info!("before try_refill_zone");
+                }
                 self.try_refill_zone(layout)
             }
             (AllocatorType::MapBig, _) => {
@@ -561,14 +577,26 @@ impl KernelAllocator {
                 }
             } else {
                 // Needs a large page
-                let frame = cas.pmanager.allocate_large_page()?;
+                if layout.size() == 0xbeef {
+                    log::info!("before allocate_large_page");
+                }
+
+                let mut frame = cas.pmanager.allocate_large_page()?;
                 unsafe {
                     let large_page_ptr: *mut slabmalloc::LargeObjectPage = frame
                         .uninitialized::<slabmalloc::LargeObjectPage>()
                         .as_mut_ptr();
+                    if layout.size() == 0xbeef {
+                        log::info!("before zero frame={:?}", frame);
+                        frame.zero();
+                        log::info!("before refill_large frame={:?}", frame);
+                    }
                     cas.zone_allocator
                         .refill_large(layout, &mut *large_page_ptr)
                         .expect("This should always succeed");
+                    if layout.size() == 0xbeef {
+                        log::info!("after refill_large");
+                    }
                 }
             }
         }
diff --git a/kernel/src/transport/shmem.rs b/kernel/src/transport/shmem.rs
index fa60fae48..60b178262 100644
--- a/kernel/src/transport/shmem.rs
+++ b/kernel/src/transport/shmem.rs
@@ -176,22 +176,34 @@ impl ShmemDevice {
             )
             .expect("Failed to write potential shmem memory region addresses");
 
-        /*
         // Note: leaving this code as a comment as a way to test if all shmem is writeable.
+        
         #[cfg(feature = "rackscale")]
         match crate::CMDLINE.get().map_or(Mode::Native, |c| c.mode) {
             Mode::Controller => {
+                //let mut f=  Frame::new(PAddr::from(0x70003f600000u64),2*1024*1024,12);
+                //log::info!("zeroing out shmem");
+                //unsafe {f.zero()};
+                log::info!("zeroing out shmem {:x} -- {:x}", mem_region.address, mem_region.address + mem_region.size);
+                for offset in 1..512 {
+                    let myptr: *mut u8 = 
+                            (KERNEL_BASE + mem_region.address + (offset*4096)) as *mut u8;
+                    log::info!("at addr {:x}", (myptr as u64 - KERNEL_BASE));
+                    unsafe { *myptr = 0x0; }
+                }
+                /*
                 let mymemslice = unsafe {
                     core::slice::from_raw_parts_mut(
                         (KERNEL_BASE + mem_region.address) as *mut u8,
                         mem_region.size as usize,
                     )
-                };
-                mymemslice.fill(0);
+                }; 
+                mymemslice.fill(0);*/
+                log::info!("after out shmem {:x} -- {:x}", mem_region.address, mem_region.address + mem_region.size);
             }
             _ => {}
         }
-        */
+         
 
         // Map the MSI-X table into kernel space
         kvspace
@@ -344,6 +356,8 @@ pub(crate) fn create_shmem_transport(mid: MachineId) -> KResult<ShmemTransport<'
     assert!(region_size as u64 >= SHMEM_TRANSPORT_SIZE);
 
     let allocator = ShmemAllocator::new(base_addr.as_u64(), SHMEM_TRANSPORT_SIZE);
+    crate::CMDLINE.get().map(|c| log::info!("c.mode is {:?}", c.mode));
+
     match crate::CMDLINE.get().map_or(Mode::Native, |c| c.mode) {
         Mode::Controller => {
             let server_to_client_queue =
diff --git a/lib/rpc/src/transport/shmem/queue_mpmc.rs b/lib/rpc/src/transport/shmem/queue_mpmc.rs
index 8d4e3cbe7..7febd1ca9 100644
--- a/lib/rpc/src/transport/shmem/queue_mpmc.rs
+++ b/lib/rpc/src/transport/shmem/queue_mpmc.rs
@@ -154,18 +154,24 @@ impl<'a> State<'a> {
         // Calculate and check total data to push
         let push_data_len = values.iter().fold(0, |acc, x| acc + x.len());
         assert!(push_data_len <= QUEUE_ENTRY_SIZE);
-        log::debug!("Attempt to push {:?} bytes", push_data_len);
+        log::info!("Attempt to push {:?} bytes", push_data_len);
 
         if push_data_len == 0 {
             return true;
         }
 
         let mask = self.mask;
+        log::info!("before enq");
         let mut pos = self.enqueue_pos(Relaxed);
+        log::info!("after enq");
         loop {
+            log::info!("before node");
             let node = &self.buffer[pos & mask];
+            log::info!("after node");
             let seq = (*node.get()).sequence.load(Acquire);
+            log::info!("after seq");
             let diff: isize = seq as isize - pos as isize;
+            log::info!("seq {:#x} diff {:#x}", seq, diff);
 
             match diff {
                 0 => {
@@ -178,6 +184,8 @@ impl<'a> State<'a> {
                             // Copy each value into the queue
                             let mut offset = 0;
                             for d in values.iter() {
+                                log::info!("d is at {:#x}", d.as_ptr() as usize);
+
                                 (*node.get()).value[offset..offset + d.len()].copy_from_slice(d);
                                 offset += d.len();
                             }
@@ -192,6 +200,8 @@ impl<'a> State<'a> {
                 _ => pos = self.enqueue_pos(Relaxed),
             }
         }
+        log::info!("pushed {:?} bytes", push_data_len);
+
         true
     }
 

From f2935e0129933ef59e843bd3cdd417d57309abea Mon Sep 17 00:00:00 2001
From: Erika Hunhoff <hunhoff.erika@gmail.com>
Date: Tue, 28 Nov 2023 17:15:03 +0000
Subject: [PATCH 17/32] Ran cargo format, fixed some compilation issues for
 unit tests

---
 kernel/src/arch/unix/process.rs               |   4 +-
 kernel/src/arch/unix/vspace.rs                |   2 +-
 kernel/src/arch/x86_64/irq.rs                 |  26 ++-
 kernel/src/arch/x86_64/mod.rs                 |   2 +-
 kernel/src/arch/x86_64/process.rs             |  66 +++---
 .../src/arch/x86_64/rackscale/client_state.rs |   1 -
 .../src/arch/x86_64/rackscale/controller.rs   |   2 +-
 .../x86_64/rackscale/get_shmem_structure.rs   |  18 +-
 kernel/src/arch/x86_64/syscall.rs             |   6 +-
 kernel/src/arch/x86_64/tlb.rs                 |   2 +-
 kernel/src/arch/x86_64/vspace/mod.rs          |   9 +-
 kernel/src/arch/x86_64/vspace/page_table.rs   |  10 +-
 kernel/src/environment.rs                     |   7 +-
 kernel/src/memory/vspace_model.rs             |   5 +
 kernel/src/nr.rs                              |  15 +-
 kernel/src/nrproc.rs                          | 196 +++++++++++-------
 kernel/src/process.rs                         |  19 +-
 kernel/src/syscalls.rs                        |   2 +-
 kernel/src/transport/shmem.rs                 |  29 ++-
 kernel/tests/s06_rackscale_tests.rs           |   2 +-
 kernel/tests/s10_benchmarks.rs                |   5 +-
 kernel/tests/s11_rackscale_benchmarks.rs      |   6 +-
 kernel/testutils/src/rackscale_runner.rs      |   6 +-
 lib/kpi/src/lib.rs                            |   2 +-
 lib/kpi/src/syscalls/process.rs               |   2 +-
 usr/init/src/init.rs                          |   4 +-
 26 files changed, 270 insertions(+), 178 deletions(-)

diff --git a/kernel/src/arch/unix/process.rs b/kernel/src/arch/unix/process.rs
index 206e5b00c..981de8813 100644
--- a/kernel/src/arch/unix/process.rs
+++ b/kernel/src/arch/unix/process.rs
@@ -6,12 +6,12 @@ use alloc::boxed::Box;
 use alloc::sync::Arc;
 use alloc::vec::Vec;
 use bootloader_shared::Module;
-use nr2::nr::rwlock::RwLock;
 use core::alloc::Allocator;
 use core::cell::RefCell;
 use core::num::NonZeroUsize;
 use core::ops::{Deref, DerefMut};
 use core::sync::atomic::{AtomicUsize, Ordering};
+use nr2::nr::rwlock::RwLock;
 use x86::current::paging::PAddr;
 
 use arrayvec::ArrayVec;
@@ -76,7 +76,7 @@ lazy_static! {
         for pid in 0..MAX_PROCESSES {
             processes.push(
                 Arc::try_new(RwLock::new(NodeReplicated::<NrProcess<UnixProcess>>::new(num_replicas, |afc: AffinityChange| {
-                    return 0; // TODO(dynrep): Return error code
+                    0 // TODO(dynrep): Return error code
                 }).expect("Not enough memory to initialize system"))).expect("Not enough memory to initialize system"));
         }
         processes
diff --git a/kernel/src/arch/unix/vspace.rs b/kernel/src/arch/unix/vspace.rs
index 4695a3f07..22b01d918 100644
--- a/kernel/src/arch/unix/vspace.rs
+++ b/kernel/src/arch/unix/vspace.rs
@@ -72,7 +72,7 @@ impl AddressSpace for VSpace {
     fn root(&self) -> PAddr {
         PAddr::zero()
     }
-    
+
     fn map_frame(&mut self, base: VAddr, frame: Frame, action: MapAction) -> Result<(), KError> {
         let ma = MappingInfo::new(frame, action);
         self.mappings.insert(ma.vrange(base), ma);
diff --git a/kernel/src/arch/x86_64/irq.rs b/kernel/src/arch/x86_64/irq.rs
index 8aef27e5c..3fe5c75fc 100644
--- a/kernel/src/arch/x86_64/irq.rs
+++ b/kernel/src/arch/x86_64/irq.rs
@@ -42,13 +42,13 @@ use apic::x2apic::X2APICDriver;
 use apic::ApicDriver;
 use klogger::{sprint, sprintln};
 use log::{info, trace, warn};
+use spin::Lazy;
 use x86::bits64::segmentation::Descriptor64;
 use x86::irq::*;
 use x86::segmentation::{
     BuildDescriptor, DescriptorBuilder, GateDescriptorBuilder, SegmentSelector,
 };
 use x86::{dtables, Ring};
-use spin::Lazy;
 
 use crate::arch::process::CURRENT_EXECUTOR;
 use crate::memory::vspace::MapAction;
@@ -527,35 +527,43 @@ unsafe fn timer_handler(_a: &ExceptionArguments) {
     for pid in 0..crate::process::MAX_PROCESSES {
         nrproc::NrProcess::<Ring3Process>::synchronize(pid);
     }
-    
+
     #[cfg(feature = "dynrep")]
     if *crate::environment::MT_ID == 4 {
         use crate::arch::process::current_pid;
         let pid = current_pid().expect("dont have a pid?");
 
-        if rawtime::BOOT_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(10) && *REPLICA_STATE == 0 {
+        if rawtime::BOOT_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(10)
+            && *REPLICA_STATE == 0
+        {
             info!("got a timer after 10s, remove rid 1");
-            let handles = nrproc::NrProcess::<Ring3Process>::remove_replica(pid, 1).expect("removed");
+            let handles =
+                nrproc::NrProcess::<Ring3Process>::remove_replica(pid, 1).expect("removed");
             #[cfg(not(feature = "rackscale"))]
             super::tlb::shootdown(handles[0].clone());
             unsafe { *REPLICA_STATE.as_mut_ptr() = 1 };
         }
-        if rawtime::BOOT_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(20) && *REPLICA_STATE == 1 {
+        if rawtime::BOOT_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(20)
+            && *REPLICA_STATE == 1
+        {
             info!("got a timer after 20s, add rid 1");
             let handles = nrproc::NrProcess::<Ring3Process>::add_replica(pid, 1).expect("added");
             #[cfg(not(feature = "rackscale"))]
             super::tlb::shootdown(handles[0].clone());
             unsafe { *REPLICA_STATE.as_mut_ptr() = 2 };
         }
-        if rawtime::BOOT_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(30) && *REPLICA_STATE == 2 {
+        if rawtime::BOOT_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(30)
+            && *REPLICA_STATE == 2
+        {
             info!("got a timer after 30s");
             unsafe { *REPLICA_STATE.as_mut_ptr() = 3 };
         }
-        if rawtime::BOOT_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(40) && *REPLICA_STATE == 3 {
+        if rawtime::BOOT_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(40)
+            && *REPLICA_STATE == 3
+        {
             info!("got a timer after 40s");
             unsafe { *REPLICA_STATE.as_mut_ptr() = 4 };
         }
-
     }
     let kcb = get_kcb();
 
@@ -787,7 +795,6 @@ pub extern "C" fn handle_generic_exception(a: ExceptionArguments) -> ! {
                 drop(pborrow);
 
                 kcb_iret_handle(kcb).resume()
-
             } else {
                 // Go to scheduler instead
                 crate::scheduler::schedule()
@@ -798,7 +805,6 @@ pub extern "C" fn handle_generic_exception(a: ExceptionArguments) -> ! {
 
             let kcb = get_kcb();
             if super::process::has_executor() {
-                
                 //let mut pborrow = super::process::CURRENT_EXECUTOR.borrow_mut();
                 //let p = pborrow.as_ref().unwrap();
                 //p.maybe_switch_vspace();
diff --git a/kernel/src/arch/x86_64/mod.rs b/kernel/src/arch/x86_64/mod.rs
index bee96be57..2abfe7fdd 100644
--- a/kernel/src/arch/x86_64/mod.rs
+++ b/kernel/src/arch/x86_64/mod.rs
@@ -512,7 +512,7 @@ fn _start(argc: isize, _argv: *const *const u8) -> isize {
 
             lazy_static::initialize(&crate::nr::KERNEL_NODE_INSTANCE);
             let kernel_node = crate::nr::KERNEL_NODE_INSTANCE.clone();
-    
+
             let local_ridx = kernel_node.register(0).unwrap();
             log::info!("Kernel node replica idx is {:?}", local_ridx);
             crate::nr::NR_REPLICA_REGISTRATION.call_once(|| local_ridx);
diff --git a/kernel/src/arch/x86_64/process.rs b/kernel/src/arch/x86_64/process.rs
index 54b0c6095..a077c8667 100644
--- a/kernel/src/arch/x86_64/process.rs
+++ b/kernel/src/arch/x86_64/process.rs
@@ -13,17 +13,17 @@ use core::iter::Iterator;
 use core::sync::atomic::{AtomicUsize, Ordering};
 use core::{fmt, ptr};
 
+use crate::arch::kcb;
 use arrayvec::ArrayVec;
+use core::num::NonZeroUsize;
 use fallible_collections::try_vec;
 use fallible_collections::FallibleVec;
 use kpi::arch::SaveArea;
 use kpi::process::{FrameId, ELF_OFFSET, EXECUTOR_OFFSET};
 use lazy_static::lazy_static;
 use log::{debug, info, trace, warn};
-use crate::arch::kcb;
-use core::num::NonZeroUsize;
-use nr2::nr::{NodeReplicated, AffinityChange};
 use nr2::nr::rwlock::RwLock;
+use nr2::nr::{AffinityChange, NodeReplicated};
 use x86::bits64::paging::*;
 use x86::bits64::rflags;
 use x86::{controlregs, Ring};
@@ -82,7 +82,7 @@ lazy_static! {
             .get()
             .map_or(false, |c| c.mode == crate::cmdline::Mode::Controller)
         {
-            // Get the NodeReplicated instances from the controller, 
+            // Get the NodeReplicated instances from the controller,
             // who will have created them in shared memory
             use crate::arch::rackscale::get_shmem_structure::{rpc_get_shmem_structure, ShmemStructure};
 
@@ -104,7 +104,7 @@ lazy_static! {
         use crate::memory::shmem_affinity::local_shmem_affinity;
         let pcm = per_core_mem();
         pcm.set_mem_affinity(local_shmem_affinity()).expect("Can't change affinity");
-        
+
         // Want at least one replica...
         let num_replicas =
             NonZeroUsize::new(core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes() * (*NUM_MACHINES))).unwrap();
@@ -171,7 +171,7 @@ lazy_static! {
                     use crate::memory::shmem_affinity::local_shmem_affinity;
                     let pcm = per_core_mem();
                     - pcm.set_mem_affinity(local_shmem_affinity()).expect("Can't change affinity");
-                    OR 
+                    OR
                     - pcm.set_mem_affinity(orig).expect("Can't change affinity");
                 }
             }
@@ -191,7 +191,8 @@ lazy_static! {
 }
 
 #[cfg(not(feature = "rackscale"))]
-fn create_process_table() -> ArrayVec<Arc<RwLock<NodeReplicated<NrProcess<Ring3Process>>>>, MAX_PROCESSES> {
+fn create_process_table(
+) -> ArrayVec<Arc<RwLock<NodeReplicated<NrProcess<Ring3Process>>>>, MAX_PROCESSES> {
     // Want at least one replica...
     let num_replicas =
         NonZeroUsize::new(core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes())).unwrap();
@@ -204,25 +205,29 @@ fn create_process_table() -> ArrayVec<Arc<RwLock<NodeReplicated<NrProcess<Ring3P
             "Expect initialization to happen on node 0."
         );
 
-        let process: Arc<RwLock<NodeReplicated<NrProcess<Ring3Process>>>> = Arc::try_new(RwLock::new(
-            NodeReplicated::new(num_replicas, |afc: AffinityChange| {
-                let pcm = kcb::per_core_mem();
-                //log::info!("Got AffinityChange: {:?}", afc);
-                match afc {
-                    AffinityChange::Replica(r) => {
-                        let affinity = { pcm.physical_memory.borrow().affinity };
-                        pcm.set_mem_affinity(crate::memory::shmem_affinity::mid_to_shmem_affinity(r)).expect("Can't set affinity");
-                        return affinity;
-                    }
-                    AffinityChange::Revert(orig) => {
-                        pcm.set_mem_affinity(orig).expect("Can't set affinity");
-                        return 0;
+        let process: Arc<RwLock<NodeReplicated<NrProcess<Ring3Process>>>> =
+            Arc::try_new(RwLock::new(
+                NodeReplicated::new(num_replicas, |afc: AffinityChange| {
+                    let pcm = kcb::per_core_mem();
+                    //log::info!("Got AffinityChange: {:?}", afc);
+                    match afc {
+                        AffinityChange::Replica(r) => {
+                            let affinity = { pcm.physical_memory.borrow().affinity };
+                            pcm.set_mem_affinity(
+                                crate::memory::shmem_affinity::mid_to_shmem_affinity(r),
+                            )
+                            .expect("Can't set affinity");
+                            return affinity;
+                        }
+                        AffinityChange::Revert(orig) => {
+                            pcm.set_mem_affinity(orig).expect("Can't set affinity");
+                            return 0;
+                        }
                     }
-                }
-            })
-            .expect("Not enough memory to initialize system")),
-        )
-        .expect("Not enough memory to initialize system");
+                })
+                .expect("Not enough memory to initialize system"),
+            ))
+            .expect("Not enough memory to initialize system");
 
         processes.push(process)
     }
@@ -302,15 +307,15 @@ fn create_process_table(
 }
  */
 
- pub(crate) struct ArchProcessManagement;
-
+pub(crate) struct ArchProcessManagement;
 
 impl crate::nrproc::ProcessManager for ArchProcessManagement {
     type Process = Ring3Process;
 
     fn process_table(
         &self,
-    ) -> &'static ArrayVec<Arc<RwLock<NodeReplicated<NrProcess<Self::Process>>>>, MAX_PROCESSES> {
+    ) -> &'static ArrayVec<Arc<RwLock<NodeReplicated<NrProcess<Self::Process>>>>, MAX_PROCESSES>
+    {
         &*super::process::PROCESS_TABLE
     }
 }
@@ -1026,7 +1031,10 @@ impl Executor for Ring3Executor {
         unsafe {
             let current_pml4 = PAddr::from(controlregs::cr3());
             if current_pml4 != replica_pml4 {
-                info!("Switching from 0x{:x} to 0x{:x}", current_pml4, replica_pml4);
+                info!(
+                    "Switching from 0x{:x} to 0x{:x}",
+                    current_pml4, replica_pml4
+                );
                 controlregs::cr3_write(self.pml4.into());
             }
         }
diff --git a/kernel/src/arch/x86_64/rackscale/client_state.rs b/kernel/src/arch/x86_64/rackscale/client_state.rs
index 795cbbaf8..89e5b7612 100644
--- a/kernel/src/arch/x86_64/rackscale/client_state.rs
+++ b/kernel/src/arch/x86_64/rackscale/client_state.rs
@@ -60,7 +60,6 @@ impl ClientState {
                 crate::transport::shmem::init_shmem_rpc(true)
                     .expect("Failed to initialize shmem RPC"),
             ))
-
         };
 
         log::info!("per_process_base_pages");
diff --git a/kernel/src/arch/x86_64/rackscale/controller.rs b/kernel/src/arch/x86_64/rackscale/controller.rs
index d0e989ce8..c1e1217f3 100644
--- a/kernel/src/arch/x86_64/rackscale/controller.rs
+++ b/kernel/src/arch/x86_64/rackscale/controller.rs
@@ -75,7 +75,7 @@ pub(crate) fn run() {
         .expect("Failed to accept client");
 
     ClientReadyCount.fetch_add(1, Ordering::SeqCst);
-    
+
     log::info!("before ClientReadyCount");
     // Wait for all clients to connect before fulfilling any RPCs.
     while ClientReadyCount.load(Ordering::SeqCst) != (*crate::environment::NUM_MACHINES - 1) as u64
diff --git a/kernel/src/arch/x86_64/rackscale/get_shmem_structure.rs b/kernel/src/arch/x86_64/rackscale/get_shmem_structure.rs
index 268e99971..5f8d87969 100644
--- a/kernel/src/arch/x86_64/rackscale/get_shmem_structure.rs
+++ b/kernel/src/arch/x86_64/rackscale/get_shmem_structure.rs
@@ -21,7 +21,7 @@ use crate::error::{KError, KResult};
 use crate::memory::shmem_affinity::local_shmem_affinity;
 use crate::memory::vspace::TlbFlushHandle;
 use crate::memory::{kernel_vaddr_to_paddr, paddr_to_kernel_vaddr, PAddr, VAddr};
-use crate::nr::{KERNEL_NODE_INSTANCE, KernelNode};
+use crate::nr::{KernelNode, KERNEL_NODE_INSTANCE};
 use crate::nrproc::NrProcess;
 use crate::process::MAX_PROCESSES;
 
@@ -134,8 +134,7 @@ pub(crate) fn handle_get_shmem_structure(
                 // to a physical address, and then change it to a shmem offset by subtracting the shmem base.
                 // TODO(rackscale): try to simplify this, and below?
                 let arc_log_paddr = kernel_vaddr_to_paddr(VAddr::from_u64(
-                    (*&client_clone
-                        as *const NodeReplicated<NrProcess<Ring3Process>>) as u64,
+                    (*&client_clone as *const NodeReplicated<NrProcess<Ring3Process>>) as u64,
                 ));
                 logs[i] = arc_log_paddr.as_u64();
             }
@@ -147,10 +146,15 @@ pub(crate) fn handle_get_shmem_structure(
         ShmemStructure::NrLog => {
             let log_clone = Arc::into_raw(Arc::clone(&KERNEL_NODE_INSTANCE));
 
-            let log_paddr =
-                kernel_vaddr_to_paddr(VAddr::from_u64((*&log_clone as *const NodeReplicated<KernelNode>) as u64))
-                    .as_u64();
-            log::info!("nr_node addr {:?} &KERNEL_NODE_INSTANCE = {:p}", log_paddr, &KERNEL_NODE_INSTANCE);
+            let log_paddr = kernel_vaddr_to_paddr(VAddr::from_u64(
+                (*&log_clone as *const NodeReplicated<KernelNode>) as u64,
+            ))
+            .as_u64();
+            log::info!(
+                "nr_node addr {:?} &KERNEL_NODE_INSTANCE = {:p}",
+                log_paddr,
+                &KERNEL_NODE_INSTANCE
+            );
 
             // Modify header and write into output buffer
             unsafe { encode(&[log_paddr], &mut payload) }.unwrap();
diff --git a/kernel/src/arch/x86_64/syscall.rs b/kernel/src/arch/x86_64/syscall.rs
index f95793cd5..d73cd398f 100644
--- a/kernel/src/arch/x86_64/syscall.rs
+++ b/kernel/src/arch/x86_64/syscall.rs
@@ -296,8 +296,7 @@ impl<T: Arch86ProcessDispatch> ProcessDispatch<u64> for T {
         let pid = current_pid()?;
         let handles = if add > 0 {
             NrProcess::<Ring3Process>::add_replica(pid, rid as usize).expect("add_replica")
-        }
-        else {
+        } else {
             NrProcess::<Ring3Process>::remove_replica(pid, rid as usize).expect("remove_replica")
         };
 
@@ -308,8 +307,7 @@ impl<T: Arch86ProcessDispatch> ProcessDispatch<u64> for T {
         #[cfg(not(feature = "rackscale"))]
         super::tlb::shootdown(handles[0].clone());
 
-
-        Ok((0,0))
+        Ok((0, 0))
     }
 
     fn exit(&self, code: u64) -> Result<(u64, u64), KError> {
diff --git a/kernel/src/arch/x86_64/tlb.rs b/kernel/src/arch/x86_64/tlb.rs
index 87ecbb987..3a6e40ff9 100644
--- a/kernel/src/arch/x86_64/tlb.rs
+++ b/kernel/src/arch/x86_64/tlb.rs
@@ -54,7 +54,7 @@ lazy_static! {
             use crate::arch::kcb::per_core_mem;
             use crate::memory::shmem_affinity::local_shmem_affinity;
             let local_affinity = local_shmem_affinity();
-            
+
             log::info!("before affinity");
             // We want to allocate the queues in shared memory
             let affinity = {
diff --git a/kernel/src/arch/x86_64/vspace/mod.rs b/kernel/src/arch/x86_64/vspace/mod.rs
index e28d1cf7c..23264bc0a 100644
--- a/kernel/src/arch/x86_64/vspace/mod.rs
+++ b/kernel/src/arch/x86_64/vspace/mod.rs
@@ -113,7 +113,7 @@ impl AddressSpace for VSpace {
     fn root(&self) -> PAddr {
         self.pml4_address()
     }
-    
+
     fn map_frame(&mut self, base: VAddr, frame: Frame, action: MapAction) -> Result<(), KError> {
         if frame.size() == 0 {
             return Err(KError::InvalidFrame);
@@ -205,7 +205,12 @@ impl Drop for VSpace {
 impl VSpace {
     pub(crate) fn new() -> Result<Self, KError> {
         let mut btree = BTreeMap::new();
-        btree.try_insert(VAddr(0x0), MappingInfo::new(Frame::empty(), MapAction::none())).expect("fail");
+        btree
+            .try_insert(
+                VAddr(0x0),
+                MappingInfo::new(Frame::empty(), MapAction::none()),
+            )
+            .expect("fail");
 
         Ok(VSpace {
             mappings: BTreeMap::new(),
diff --git a/kernel/src/arch/x86_64/vspace/page_table.rs b/kernel/src/arch/x86_64/vspace/page_table.rs
index 1429a6c02..824f60da9 100644
--- a/kernel/src/arch/x86_64/vspace/page_table.rs
+++ b/kernel/src/arch/x86_64/vspace/page_table.rs
@@ -41,7 +41,7 @@ impl Clone for PageTable {
             let frame_ptr = unsafe {
                 let ptr = alloc::alloc::alloc_zeroed(PT_LAYOUT);
                 debug_assert!(!ptr.is_null());
-    
+
                 let nptr = NonNull::new_unchecked(ptr);
                 NonNull::slice_from_raw_parts(nptr, PT_LAYOUT.size())
             };
@@ -51,17 +51,17 @@ impl Clone for PageTable {
             unsafe { frame.zero() };
             frame
         }
-    
+
         fn new_pt() -> PDEntry {
             let frame = alloc_frame();
             return PDEntry::new(frame.base, PDFlags::P | PDFlags::RW | PDFlags::US);
         }
-    
+
         fn new_pd() -> PDPTEntry {
             let frame = alloc_frame();
             return PDPTEntry::new(frame.base, PDPTFlags::P | PDPTFlags::RW | PDPTFlags::US);
         }
-    
+
         fn new_pdpt() -> PML4Entry {
             let frame = alloc_frame();
             return PML4Entry::new(frame.base, PML4Flags::P | PML4Flags::RW | PML4Flags::US);
@@ -84,7 +84,6 @@ impl Clone for PageTable {
                             let cloned_pdpt_entry = cloned_pdpt[pdpt_idx];
                             drop(cloned_pdpt);
 
-
                             for pd_idx in 0..PAGE_SIZE_ENTRIES {
                                 let pd = self.get_pd(pdpt[pdpt_idx]);
                                 let cloned_pd = cloned_pt.get_pd_mut(cloned_pdpt_entry);
@@ -107,7 +106,6 @@ impl Clone for PageTable {
                                 } else {
                                     // Encountered a 2 MiB mapping
                                     cloned_pd[pd_idx] = pd[pd_idx];
-
                                 }
                             }
                         } else {
diff --git a/kernel/src/environment.rs b/kernel/src/environment.rs
index 805f0822f..b724f78ba 100644
--- a/kernel/src/environment.rs
+++ b/kernel/src/environment.rs
@@ -16,16 +16,15 @@ pub(crate) static CORE_ID: Lazy<usize> =
 #[thread_local]
 pub(crate) static MT_ID: Lazy<usize> = Lazy::new(|| {
     #[cfg(feature = "rackscale")]
-    return (*crate::environment::MACHINE_ID - 1) * (atopology::MACHINE_TOPOLOGY.num_threads()) + atopology::MACHINE_TOPOLOGY.current_thread().id;
+    return (*crate::environment::MACHINE_ID - 1) * (atopology::MACHINE_TOPOLOGY.num_threads())
+        + atopology::MACHINE_TOPOLOGY.current_thread().id;
     #[cfg(not(feature = "rackscale"))]
     *CORE_ID
 });
 
 /// Number of nodes in the current deployment.
 #[allow(unused)]
-pub(crate) static NUM_NODES: Lazy<usize> =
-    Lazy::new(|| atopology::MACHINE_TOPOLOGY.num_nodes());
-
+pub(crate) static NUM_NODES: Lazy<usize> = Lazy::new(|| atopology::MACHINE_TOPOLOGY.num_nodes());
 
 /// The NUMA node id of the current core (hardware thread).
 #[thread_local]
diff --git a/kernel/src/memory/vspace_model.rs b/kernel/src/memory/vspace_model.rs
index bd5e2230d..22b5df4dc 100644
--- a/kernel/src/memory/vspace_model.rs
+++ b/kernel/src/memory/vspace_model.rs
@@ -51,6 +51,11 @@ impl Default for ModelAddressSpace {
 }
 
 impl AddressSpace for ModelAddressSpace {
+    // TODO(correctness): I don't know if this is correct for the model
+    fn root(&self) -> PAddr {
+        PAddr::zero()
+    }
+
     fn map_frame(&mut self, base: VAddr, frame: Frame, action: MapAction) -> Result<(), KError> {
         // Don't allow mapping of zero-sized frames
         if frame.size() == 0 {
diff --git a/kernel/src/nr.rs b/kernel/src/nr.rs
index fb62458b8..263115cfa 100644
--- a/kernel/src/nr.rs
+++ b/kernel/src/nr.rs
@@ -4,12 +4,12 @@
 use crate::prelude::*;
 use core::fmt::Debug;
 
+use crate::arch::kcb;
 use alloc::sync::Arc;
 use hashbrown::HashMap;
 use log::{error, trace};
 use nr2::nr::{Dispatch, NodeReplicated, ThreadToken};
 use spin::Once;
-use crate::arch::kcb;
 
 use lazy_static::lazy_static;
 
@@ -121,7 +121,6 @@ lazy_static! {
     };
 }
 
-
 #[derive(PartialEq, Clone, Copy, Debug)]
 pub(crate) enum ReadOps {
     CurrentProcess(kpi::system::GlobalThreadId),
@@ -168,7 +167,7 @@ pub(crate) struct KernelNode {
 impl Default for KernelNode {
     fn default() -> KernelNode {
         let k = KernelNode {
-            process_map: HashMap::with_capacity(MAX_PROCESSES),   // with_capacity(MAX_PROCESSES),
+            process_map: HashMap::with_capacity(MAX_PROCESSES), // with_capacity(MAX_PROCESSES),
             scheduler_map: HashMap::with_capacity(24), // with_capacity(MAX_CORES), or, for rackscale, with_capacity(MAX_CORES * MAX_MACHINES)
         };
         k
@@ -188,10 +187,12 @@ impl KernelNode {
         gtid: Option<kpi::system::GlobalThreadId>,
     ) -> Result<kpi::system::GlobalThreadId, KError> {
         // todo node id
-        crate::nr::NR_REPLICA_REGISTRATION.call_once(|| crate::nr::KERNEL_NODE_INSTANCE.register(0).unwrap());
+        crate::nr::NR_REPLICA_REGISTRATION
+            .call_once(|| crate::nr::KERNEL_NODE_INSTANCE.register(0).unwrap());
 
         let op = Op::SchedAllocateCore(pid, affinity, gtid, entry_point);
-        let response = KERNEL_NODE_INSTANCE.execute_mut(op, *NR_REPLICA_REGISTRATION.get().unwrap());
+        let response =
+            KERNEL_NODE_INSTANCE.execute_mut(op, *NR_REPLICA_REGISTRATION.get().unwrap());
 
         match response {
             Ok(NodeResult::CoreAllocated(rgtid)) => Ok(rgtid),
@@ -206,8 +207,8 @@ impl KernelNode {
         gtid: kpi::system::GlobalThreadId,
     ) -> Result<(), KError> {
         let op = Op::SchedReleaseCore(pid, affinity, gtid);
-        let response = KERNEL_NODE_INSTANCE
-            .execute_mut(op,*NR_REPLICA_REGISTRATION.get().unwrap());
+        let response =
+            KERNEL_NODE_INSTANCE.execute_mut(op, *NR_REPLICA_REGISTRATION.get().unwrap());
         match response {
             Ok(NodeResult::CoreReleased) => Ok(()),
             Err(e) => Err(e),
diff --git a/kernel/src/nrproc.rs b/kernel/src/nrproc.rs
index 8a0512474..8c4c9e4a2 100644
--- a/kernel/src/nrproc.rs
+++ b/kernel/src/nrproc.rs
@@ -11,7 +11,7 @@ use arrayvec::ArrayVec;
 use fallible_collections::vec::FallibleVec;
 use kpi::process::{FrameId, ProcessInfo};
 use kpi::MemType;
-use nr2::nr::{Dispatch, NodeReplicated, ThreadToken, rwlock::RwLock};
+use nr2::nr::{rwlock::RwLock, Dispatch, NodeReplicated, ThreadToken};
 use spin::Once;
 
 use crate::arch::process::PROCESS_TABLE;
@@ -34,7 +34,7 @@ pub(crate) fn register_thread_with_process_replicas() {
     let node = *crate::environment::NODE_ID;
     #[cfg(feature = "rackscale")]
     let node = 0; //*crate::environment::MACHINE_ID
-    
+
     debug_assert!(PROCESS_TABLE.len() > node, "Invalid Node ID");
 
     PROCESS_TOKEN.call_once(|| {
@@ -42,8 +42,13 @@ pub(crate) fn register_thread_with_process_replicas() {
         for pid in 0..MAX_PROCESSES {
             debug_assert!(PROCESS_TABLE.len() > pid, "Invalid PID");
 
-            let token = PROCESS_TABLE[pid].read(*crate::environment::MT_ID).register(node);
-            log::info!("MT_ID is {}: {node} registered {pid} {token:?}", *crate::environment::MT_ID);
+            let token = PROCESS_TABLE[pid]
+                .read(*crate::environment::MT_ID)
+                .register(node);
+            log::info!(
+                "MT_ID is {}: {node} registered {pid} {token:?}",
+                *crate::environment::MT_ID
+            );
             tokens.push(token.expect("Need to be able to register"));
         }
 
@@ -90,10 +95,10 @@ pub(crate) enum ProcessOpMut {
 
     #[cfg(feature = "rackscale")]
     DispatcherAllocation(Frame, kpi::system::MachineId),
-    
+
     #[cfg(not(feature = "rackscale"))]
     DispatcherAllocation(Frame),
-    
+
     MemMapFrame(VAddr, Frame, MapAction),
     MemMapDevice(Frame, MapAction),
     MemMapFrameId(VAddr, FrameId, MapAction),
@@ -157,11 +162,14 @@ impl<P: Process> NrProcess<P> {
         log::info!("add_replica {pid} {rid}");
         // we use unmap of 0x0 to get a snapshot of where the core is running on
         let handle = NrProcess::<P>::unmap(pid, VAddr::from(0x0));
-        if !handle.is_ok() {
+        if handle.is_err() {
             panic!("couldn't get snapshot");
         }
 
-        PROCESS_TABLE[pid].write(*crate::environment::MT_ID).add_replica(rid).expect("add_replica failed");
+        PROCESS_TABLE[pid]
+            .write(*crate::environment::MT_ID)
+            .add_replica(rid)
+            .expect("add_replica failed");
         log::info!("added_replica {pid} {rid}");
 
         handle
@@ -174,14 +182,20 @@ impl<P: Process> NrProcess<P> {
         #[cfg(not(feature = "rackscale"))]
         let max_nodes = *crate::environment::NUM_NODES;
 
-        debug_assert!(rid < max_nodes, "Invalid Node ID {rid} max_nodes {max_nodes}");
+        debug_assert!(
+            rid < max_nodes,
+            "Invalid Node ID {rid} max_nodes {max_nodes}"
+        );
 
         // we use unmap of 0x0 to get a snapshot of where the core is running on
         let handle = NrProcess::<P>::unmap(pid, VAddr::from(0x0));
-        if !handle.is_ok() {
+        if handle.is_err() {
             panic!("couldn't get snapshot");
         }
-        PROCESS_TABLE[pid].write(*crate::environment::MT_ID).remove_replica(rid).expect("remove_replica failed");
+        PROCESS_TABLE[pid]
+            .write(*crate::environment::MT_ID)
+            .remove_replica(rid)
+            .expect("remove_replica failed");
 
         handle
     }
@@ -192,10 +206,12 @@ impl<P: Process> NrProcess<P> {
         writeable_sections: Vec<Frame>,
     ) -> Result<(), KError> {
         debug_assert!(pid < MAX_PROCESSES, "Invalid PID");
-        let response = PROCESS_TABLE[pid].read(*crate::environment::MT_ID).execute_mut(
-            ProcessOpMut::Load(pid, module_name, writeable_sections),
-            PROCESS_TOKEN.get().unwrap()[pid],
-        );
+        let response = PROCESS_TABLE[pid]
+            .read(*crate::environment::MT_ID)
+            .execute_mut(
+                ProcessOpMut::Load(pid, module_name, writeable_sections),
+                PROCESS_TOKEN.get().unwrap()[pid],
+            );
         match response {
             Ok(ProcessResult::Ok) => Ok(()),
             Err(e) => Err(e),
@@ -219,7 +235,9 @@ impl<P: Process> NrProcess<P> {
 
     pub(crate) fn synchronize(pid: Pid) {
         debug_assert!(pid < MAX_PROCESSES, "Invalid PID");
-        PROCESS_TABLE[pid].read(*crate::environment::MT_ID).sync(PROCESS_TOKEN.get().unwrap()[pid]);
+        PROCESS_TABLE[pid]
+            .read(*crate::environment::MT_ID)
+            .sync(PROCESS_TOKEN.get().unwrap()[pid]);
     }
 
     pub(crate) fn map_device_frame(
@@ -228,10 +246,12 @@ impl<P: Process> NrProcess<P> {
         action: MapAction,
     ) -> Result<(u64, u64), KError> {
         debug_assert!(pid < MAX_PROCESSES, "Invalid PID");
-        let response = PROCESS_TABLE[pid].read(*crate::environment::MT_ID).execute_mut(
-            ProcessOpMut::MemMapDevice(frame, action),
-            PROCESS_TOKEN.get().unwrap()[pid],
-        );
+        let response = PROCESS_TABLE[pid]
+            .read(*crate::environment::MT_ID)
+            .execute_mut(
+                ProcessOpMut::MemMapDevice(frame, action),
+                PROCESS_TOKEN.get().unwrap()[pid],
+            );
         match response {
             Ok(ProcessResult::Ok) => Ok((frame.base.as_u64(), frame.size() as u64)),
             Err(e) => Err(e),
@@ -241,10 +261,12 @@ impl<P: Process> NrProcess<P> {
 
     pub(crate) fn unmap(pid: Pid, base: VAddr) -> Result<Vec<TlbFlushHandle>, KError> {
         debug_assert!(pid < MAX_PROCESSES, "Invalid PID");
-        let response = PROCESS_TABLE[pid].read(*crate::environment::MT_ID).execute_mut(
-            ProcessOpMut::MemUnmap(base),
-            PROCESS_TOKEN.get().unwrap()[pid],
-        );
+        let response = PROCESS_TABLE[pid]
+            .read(*crate::environment::MT_ID)
+            .execute_mut(
+                ProcessOpMut::MemUnmap(base),
+                PROCESS_TOKEN.get().unwrap()[pid],
+            );
         match response {
             Ok(ProcessResult::Unmapped(handle)) => Ok(handle),
             Err(e) => Err(e),
@@ -260,10 +282,12 @@ impl<P: Process> NrProcess<P> {
     ) -> Result<(PAddr, usize), KError> {
         debug_assert!(pid < MAX_PROCESSES, "Invalid PID");
         //action.multiple_mappings(true);
-        let response = PROCESS_TABLE[pid].read(*crate::environment::MT_ID).execute_mut(
-            ProcessOpMut::MemMapFrameId(base, frame_id, action),
-            PROCESS_TOKEN.get().unwrap()[pid],
-        );
+        let response = PROCESS_TABLE[pid]
+            .read(*crate::environment::MT_ID)
+            .execute_mut(
+                ProcessOpMut::MemMapFrameId(base, frame_id, action),
+                PROCESS_TOKEN.get().unwrap()[pid],
+            );
         match response {
             Ok(ProcessResult::MappedFrameId(paddr, size)) => Ok((paddr, size)),
             Err(e) => Err(e),
@@ -280,10 +304,12 @@ impl<P: Process> NrProcess<P> {
         debug_assert!(pid < MAX_PROCESSES, "Invalid PID");
         let mut virtual_offset = 0;
         for frame in frames {
-            let response = PROCESS_TABLE[pid].read(*crate::environment::MT_ID).execute_mut(
-                ProcessOpMut::MemMapFrame(base + virtual_offset, frame, action),
-                PROCESS_TOKEN.get().unwrap()[pid],
-            );
+            let response = PROCESS_TABLE[pid]
+                .read(*crate::environment::MT_ID)
+                .execute_mut(
+                    ProcessOpMut::MemMapFrame(base + virtual_offset, frame, action),
+                    PROCESS_TOKEN.get().unwrap()[pid],
+                );
             match response {
                 Ok(ProcessResult::Ok) => {}
                 e => unreachable!(
@@ -301,11 +327,11 @@ impl<P: Process> NrProcess<P> {
         Ok((base.as_u64(), virtual_offset as u64))
     }
 
-
     pub(crate) fn ptroot(pid: Pid) -> Result<PAddr, KError> {
         debug_assert!(pid < MAX_PROCESSES, "Invalid PID");
-        let response =
-            PROCESS_TABLE[pid].read(*crate::environment::MT_ID).execute(ProcessOp::GetPtRoot, PROCESS_TOKEN.get().unwrap()[pid]);
+        let response = PROCESS_TABLE[pid]
+            .read(*crate::environment::MT_ID)
+            .execute(ProcessOp::GetPtRoot, PROCESS_TOKEN.get().unwrap()[pid]);
         match response {
             Ok(ProcessResult::PtRoot(paddr)) => Ok(paddr),
             Err(e) => Err(e),
@@ -315,8 +341,9 @@ impl<P: Process> NrProcess<P> {
 
     pub(crate) fn pinfo(pid: Pid) -> Result<ProcessInfo, KError> {
         debug_assert!(pid < MAX_PROCESSES, "Invalid PID");
-        let response =
-            PROCESS_TABLE[pid].read(*crate::environment::MT_ID).execute(ProcessOp::ProcessInfo, PROCESS_TOKEN.get().unwrap()[pid]);
+        let response = PROCESS_TABLE[pid]
+            .read(*crate::environment::MT_ID)
+            .execute(ProcessOp::ProcessInfo, PROCESS_TOKEN.get().unwrap()[pid]);
         match response {
             Ok(ProcessResult::ProcessInfo(pinfo)) => Ok(pinfo),
             Err(e) => Err(e),
@@ -332,10 +359,12 @@ impl<P: Process> NrProcess<P> {
         let gtid = *crate::environment::CORE_ID;
         let node = *crate::environment::NODE_ID;
 
-        let response = pm.process_table()[pid].read(*crate::environment::MT_ID).execute_mut(
-            ProcessOpMut::AssignExecutor(gtid, node),
-            PROCESS_TOKEN.get().unwrap()[pid],
-        );
+        let response = pm.process_table()[pid]
+            .read(*crate::environment::MT_ID)
+            .execute_mut(
+                ProcessOpMut::AssignExecutor(gtid, node),
+                PROCESS_TOKEN.get().unwrap()[pid],
+            );
         match response {
             Ok(ProcessResult::Executor(executor)) => Ok(executor),
             Err(e) => Err(e),
@@ -363,10 +392,12 @@ impl<P: Process> NrProcess<P> {
 
     pub(crate) fn allocate_frame_to_process(pid: Pid, frame: Frame) -> Result<FrameId, KError> {
         debug_assert!(pid < MAX_PROCESSES, "Invalid PID");
-        let response = PROCESS_TABLE[pid].read(*crate::environment::MT_ID).execute_mut(
-            ProcessOpMut::AllocateFrameToProcess(frame),
-            PROCESS_TOKEN.get().unwrap()[pid],
-        );
+        let response = PROCESS_TABLE[pid]
+            .read(*crate::environment::MT_ID)
+            .execute_mut(
+                ProcessOpMut::AllocateFrameToProcess(frame),
+                PROCESS_TOKEN.get().unwrap()[pid],
+            );
         match response {
             Ok(ProcessResult::FrameId(fid)) => Ok(fid),
             Err(e) => Err(e),
@@ -377,10 +408,12 @@ impl<P: Process> NrProcess<P> {
     pub(crate) fn release_frame_from_process(pid: Pid, fid: FrameId) -> Result<Frame, KError> {
         debug_assert!(pid < MAX_PROCESSES, "Invalid PID");
         debug_assert!(fid < MAX_FRAMES_PER_PROCESS, "Invalid FID");
-        let response = PROCESS_TABLE[pid].read(*crate::environment::MT_ID).execute_mut(
-            ProcessOpMut::ReleaseFrameFromProcess(fid),
-            PROCESS_TOKEN.get().unwrap()[pid],
-        );
+        let response = PROCESS_TABLE[pid]
+            .read(*crate::environment::MT_ID)
+            .execute_mut(
+                ProcessOpMut::ReleaseFrameFromProcess(fid),
+                PROCESS_TOKEN.get().unwrap()[pid],
+            );
         match response {
             Ok(ProcessResult::Frame(f)) => Ok(f),
             Err(e) => Err(e),
@@ -393,14 +426,16 @@ impl<P: Process> NrProcess<P> {
 
         #[cfg(feature = "rackscale")]
         let mid = *crate::environment::MACHINE_ID;
-        let response = PROCESS_TABLE[pid].read(*crate::environment::MT_ID).execute_mut(
-        #[cfg(not(feature = "rackscale"))]
-            ProcessOpMut::DispatcherAllocation(frame),
-        #[cfg(feature = "rackscale")]
-            ProcessOpMut::DispatcherAllocation(frame, mid),
-            PROCESS_TOKEN.get().unwrap()[pid],
-        );
-        
+        let response = PROCESS_TABLE[pid]
+            .read(*crate::environment::MT_ID)
+            .execute_mut(
+                #[cfg(not(feature = "rackscale"))]
+                ProcessOpMut::DispatcherAllocation(frame),
+                #[cfg(feature = "rackscale")]
+                ProcessOpMut::DispatcherAllocation(frame, mid),
+                PROCESS_TOKEN.get().unwrap()[pid],
+            );
+
         match response {
             Ok(ProcessResult::ExecutorsCreated(how_many)) => Ok(how_many),
             Err(e) => Err(e),
@@ -409,10 +444,12 @@ impl<P: Process> NrProcess<P> {
     }
 
     pub(crate) fn userslice_to_arc_slice(from: UserSlice) -> Result<Arc<[u8]>, KError> {
-        let response = PROCESS_TABLE[from.pid].read(*crate::environment::MT_ID).execute(
-            ProcessOp::ReadSlice(from),
-            PROCESS_TOKEN.get().unwrap()[from.pid],
-        );
+        let response = PROCESS_TABLE[from.pid]
+            .read(*crate::environment::MT_ID)
+            .execute(
+                ProcessOp::ReadSlice(from),
+                PROCESS_TOKEN.get().unwrap()[from.pid],
+            );
         match response {
             Ok(ProcessResult::ReadSlice(v)) => Ok(v),
             Err(e) => Err(e),
@@ -421,10 +458,12 @@ impl<P: Process> NrProcess<P> {
     }
 
     pub(crate) fn read_string_from_userspace(from: UserSlice) -> Result<String, KError> {
-        let response = PROCESS_TABLE[from.pid].read(*crate::environment::MT_ID).execute(
-            ProcessOp::ReadString(from),
-            PROCESS_TOKEN.get().unwrap()[from.pid],
-        );
+        let response = PROCESS_TABLE[from.pid]
+            .read(*crate::environment::MT_ID)
+            .execute(
+                ProcessOp::ReadString(from),
+                PROCESS_TOKEN.get().unwrap()[from.pid],
+            );
         match response {
             Ok(ProcessResult::ReadString(s)) => Ok(s),
             Err(e) => Err(e),
@@ -451,10 +490,12 @@ impl<P: Process> NrProcess<P> {
         on: UserSlice,
         f: Box<dyn Fn(&mut [u8]) -> KResult<(u64, u64)>>,
     ) -> Result<(u64, u64), KError> {
-        let response = PROCESS_TABLE[on.pid].read(*crate::environment::MT_ID).execute(
-            ProcessOp::ExecSliceMut(on, f),
-            PROCESS_TOKEN.get().unwrap()[on.pid],
-        );
+        let response = PROCESS_TABLE[on.pid]
+            .read(*crate::environment::MT_ID)
+            .execute(
+                ProcessOp::ExecSliceMut(on, f),
+                PROCESS_TOKEN.get().unwrap()[on.pid],
+            );
         match response {
             Ok(ProcessResult::SysRetOk((a, b))) => Ok((a, b)),
             Err(e) => Err(e),
@@ -466,10 +507,12 @@ impl<P: Process> NrProcess<P> {
         on: &'a UserSlice,
         f: Box<dyn Fn(&'a [u8]) -> KResult<()>>,
     ) -> Result<(), KError> {
-        let response = PROCESS_TABLE[on.pid].read(*crate::environment::MT_ID).execute(
-            ProcessOp::ExecSlice(on, f),
-            PROCESS_TOKEN.get().unwrap()[on.pid],
-        );
+        let response = PROCESS_TABLE[on.pid]
+            .read(*crate::environment::MT_ID)
+            .execute(
+                ProcessOp::ExecSlice(on, f),
+                PROCESS_TOKEN.get().unwrap()[on.pid],
+            );
         match response {
             Ok(ProcessResult::Ok) => Ok(()),
             Err(e) => Err(e),
@@ -489,9 +532,7 @@ where
 
     fn dispatch<'buf>(&self, op: Self::ReadOperation<'_>) -> Self::Response {
         match op {
-            ProcessOp::GetPtRoot => {
-                Ok(ProcessResult::PtRoot(self.process.vspace().root()))
-            }
+            ProcessOp::GetPtRoot => Ok(ProcessResult::PtRoot(self.process.vspace().root())),
             ProcessOp::ProcessInfo => Ok(ProcessResult::ProcessInfo(*self.process.pinfo())),
             ProcessOp::MemResolve(base) => {
                 let (paddr, rights) = self.process.vspace().resolve(base)?;
@@ -595,8 +636,7 @@ where
                             .expect("is_aliasable implies this op can't fail");
                     }
                     shootdown_handle
-                }
-                else {
+                } else {
                     TlbFlushHandle::new(0x0.into(), 0x0.into(), 0x0, MapAction::none())
                 };
 
diff --git a/kernel/src/process.rs b/kernel/src/process.rs
index 5fe7a0f20..0693832e8 100644
--- a/kernel/src/process.rs
+++ b/kernel/src/process.rs
@@ -79,7 +79,11 @@ pub(crate) trait Process: FrameManagement + Clone {
         affinity: atopology::NodeId,
     ) -> Result<(), alloc::collections::TryReserveError>;
 
-    fn allocate_executors(&mut self, frame: Frame, #[cfg(feature = "rackscale")] mid: kpi::system::MachineId) -> Result<usize, KError>;
+    fn allocate_executors(
+        &mut self,
+        frame: Frame,
+        #[cfg(feature = "rackscale")] mid: kpi::system::MachineId,
+    ) -> Result<usize, KError>;
 
     fn vspace_mut(&mut self) -> &mut Self::A;
 
@@ -463,7 +467,10 @@ impl elfloader::ElfLoader for DataSecAllocator {
 pub(crate) fn make_process<P: Process>(binary: &'static str) -> Result<Pid, KError> {
     // Allocate a new process
     let pid = {
-        let response = crate::nr::KERNEL_NODE_INSTANCE.execute_mut(crate::nr::Op::AllocatePid, *crate::nr::NR_REPLICA_REGISTRATION.get().unwrap())?;
+        let response = crate::nr::KERNEL_NODE_INSTANCE.execute_mut(
+            crate::nr::Op::AllocatePid,
+            *crate::nr::NR_REPLICA_REGISTRATION.get().unwrap(),
+        )?;
         if let crate::nr::NodeResult::PidAllocated(pid) = response {
             Ok(pid)
         } else {
@@ -582,11 +589,13 @@ pub(crate) fn allocate_dispatchers<P: Process>(pid: Pid, affinity: NodeId) -> Re
             let pcm = crate::arch::kcb::per_core_mem();
 
             #[cfg(feature = "rackscale")]
-            pcm.set_mem_affinity(affinity).expect("Can't change affinity");
+            pcm.set_mem_affinity(affinity)
+                .expect("Can't change affinity");
             let frame = pcm.mem_manager().allocate_large_page()?;
-            
+
             #[cfg(feature = "rackscale")]
-            pcm.set_mem_affinity(crate::memory::shmem_affinity::local_shmem_affinity()).expect("Can't reset affinity");
+            pcm.set_mem_affinity(crate::memory::shmem_affinity::local_shmem_affinity())
+                .expect("Can't reset affinity");
             frame
         };
 
diff --git a/kernel/src/syscalls.rs b/kernel/src/syscalls.rs
index f736a160a..5051cb564 100644
--- a/kernel/src/syscalls.rs
+++ b/kernel/src/syscalls.rs
@@ -128,7 +128,7 @@ enum ProcessOperationArgs<W> {
     ReleaseCore(W),
     AllocatePhysical(W, W),
     ReleasePhysical(W),
-    SetReplicas(W, W)
+    SetReplicas(W, W),
 }
 
 impl<W: Into<u64> + LowerHex + Debug + Copy + Clone> ProcessOperationArgs<W> {
diff --git a/kernel/src/transport/shmem.rs b/kernel/src/transport/shmem.rs
index 60b178262..31b854c5d 100644
--- a/kernel/src/transport/shmem.rs
+++ b/kernel/src/transport/shmem.rs
@@ -177,19 +177,25 @@ impl ShmemDevice {
             .expect("Failed to write potential shmem memory region addresses");
 
         // Note: leaving this code as a comment as a way to test if all shmem is writeable.
-        
+
         #[cfg(feature = "rackscale")]
         match crate::CMDLINE.get().map_or(Mode::Native, |c| c.mode) {
             Mode::Controller => {
                 //let mut f=  Frame::new(PAddr::from(0x70003f600000u64),2*1024*1024,12);
                 //log::info!("zeroing out shmem");
                 //unsafe {f.zero()};
-                log::info!("zeroing out shmem {:x} -- {:x}", mem_region.address, mem_region.address + mem_region.size);
+                log::info!(
+                    "zeroing out shmem {:x} -- {:x}",
+                    mem_region.address,
+                    mem_region.address + mem_region.size
+                );
                 for offset in 1..512 {
-                    let myptr: *mut u8 = 
-                            (KERNEL_BASE + mem_region.address + (offset*4096)) as *mut u8;
+                    let myptr: *mut u8 =
+                        (KERNEL_BASE + mem_region.address + (offset * 4096)) as *mut u8;
                     log::info!("at addr {:x}", (myptr as u64 - KERNEL_BASE));
-                    unsafe { *myptr = 0x0; }
+                    unsafe {
+                        *myptr = 0x0;
+                    }
                 }
                 /*
                 let mymemslice = unsafe {
@@ -197,13 +203,16 @@ impl ShmemDevice {
                         (KERNEL_BASE + mem_region.address) as *mut u8,
                         mem_region.size as usize,
                     )
-                }; 
+                };
                 mymemslice.fill(0);*/
-                log::info!("after out shmem {:x} -- {:x}", mem_region.address, mem_region.address + mem_region.size);
+                log::info!(
+                    "after out shmem {:x} -- {:x}",
+                    mem_region.address,
+                    mem_region.address + mem_region.size
+                );
             }
             _ => {}
         }
-         
 
         // Map the MSI-X table into kernel space
         kvspace
@@ -356,7 +365,9 @@ pub(crate) fn create_shmem_transport(mid: MachineId) -> KResult<ShmemTransport<'
     assert!(region_size as u64 >= SHMEM_TRANSPORT_SIZE);
 
     let allocator = ShmemAllocator::new(base_addr.as_u64(), SHMEM_TRANSPORT_SIZE);
-    crate::CMDLINE.get().map(|c| log::info!("c.mode is {:?}", c.mode));
+    crate::CMDLINE
+        .get()
+        .map(|c| log::info!("c.mode is {:?}", c.mode));
 
     match crate::CMDLINE.get().map_or(Mode::Native, |c| c.mode) {
         Mode::Controller => {
diff --git a/kernel/tests/s06_rackscale_tests.rs b/kernel/tests/s06_rackscale_tests.rs
index 25e9518af..fcc8871c6 100644
--- a/kernel/tests/s06_rackscale_tests.rs
+++ b/kernel/tests/s06_rackscale_tests.rs
@@ -39,7 +39,7 @@ fn rackscale_userspace_smoke_test(transport: RackscaleTransport) {
             "test-upcall",
             "test-scheduler",
             "test-syscalls",
-            "test-dynamic-replication"
+            "test-dynamic-replication",
         ])
         .set_rackscale(true)
         .release()
diff --git a/kernel/tests/s10_benchmarks.rs b/kernel/tests/s10_benchmarks.rs
index c39001adc..9e063edbc 100644
--- a/kernel/tests/s10_benchmarks.rs
+++ b/kernel/tests/s10_benchmarks.rs
@@ -493,7 +493,10 @@ fn s10_fxmark_benchmark() {
                 if cfg!(feature = "smoke") && cores > 2 {
                     cmdline = cmdline.nodes(std::cmp::max(machine.max_cores() / 16, 1));
                 } else {
-                    cmdline = cmdline.nodes(std::cmp::max(machine.max_cores() / 16, machine.max_numa_nodes()));
+                    cmdline = cmdline.nodes(std::cmp::max(
+                        machine.max_cores() / 16,
+                        machine.max_numa_nodes(),
+                    ));
                 }
 
                 let mut output = String::new();
diff --git a/kernel/tests/s11_rackscale_benchmarks.rs b/kernel/tests/s11_rackscale_benchmarks.rs
index 22130ea90..f863af806 100644
--- a/kernel/tests/s11_rackscale_benchmarks.rs
+++ b/kernel/tests/s11_rackscale_benchmarks.rs
@@ -1016,7 +1016,7 @@ fn s11_rackscale_memcached_dynrep_benchmark_internal() {
     } else {
         MemcachedInternalConfig {
             num_queries: 100_000_000, // TODO(rackscale): should be 100_000_000,
-            mem_size: 16,           // TODO(rackscale): should be 32_000,
+            mem_size: 16,             // TODO(rackscale): should be 32_000,
         }
     };
 
@@ -1032,7 +1032,9 @@ fn s11_rackscale_memcached_dynrep_benchmark_internal() {
     test.cores_per_client = 4;
     test.cmd = format!(
         r#"init=memcachedbench.bin initargs={} appcmd='--x-benchmark-mem={} --x-benchmark-queries={}'"#,
-        2*4, config.mem_size, config.num_queries
+        2 * 4,
+        config.mem_size,
+        config.num_queries
     );
     test.arg = Some(config);
     test.run_rackscale();
diff --git a/kernel/testutils/src/rackscale_runner.rs b/kernel/testutils/src/rackscale_runner.rs
index 3b8af826c..ee17ddf8d 100644
--- a/kernel/testutils/src/rackscale_runner.rs
+++ b/kernel/testutils/src/rackscale_runner.rs
@@ -513,7 +513,11 @@ impl<T: Clone + Send + 'static> RackscaleBench<T> {
         // Find max cores, max numa, and max cores per node
         let machine = Machine::determine();
         let max_cores = if is_smoke { 8 } else { machine.max_cores() };
-        let max_numa =  if is_smoke { 4 } else { machine.max_numa_nodes() };
+        let max_numa = if is_smoke {
+            4
+        } else {
+            machine.max_numa_nodes()
+        };
         let total_cores_per_node = core::cmp::max(1, max_cores / max_numa);
 
         // Do initial network configuration
diff --git a/lib/kpi/src/lib.rs b/lib/kpi/src/lib.rs
index 6ebd2000d..3397e0495 100644
--- a/lib/kpi/src/lib.rs
+++ b/lib/kpi/src/lib.rs
@@ -105,7 +105,7 @@ pub enum ProcessOperation {
     /// Release a physical memory page from the process.
     ReleasePhysical = 10,
     /// Set Replicas
-    SetReplicas = 11
+    SetReplicas = 11,
 }
 
 impl ProcessOperation {
diff --git a/lib/kpi/src/syscalls/process.rs b/lib/kpi/src/syscalls/process.rs
index 4110a6500..6d2e2ab27 100644
--- a/lib/kpi/src/syscalls/process.rs
+++ b/lib/kpi/src/syscalls/process.rs
@@ -14,7 +14,7 @@ use x86::bits64::paging::VAddr;
 pub struct Process;
 
 impl Process {
-    pub fn set_replicas(add: bool, replica_idx: usize) -> Result<(), SystemCallError>{
+    pub fn set_replicas(add: bool, replica_idx: usize) -> Result<(), SystemCallError> {
         let r = unsafe {
             syscall!(
                 SystemCall::Process as u64,
diff --git a/usr/init/src/init.rs b/usr/init/src/init.rs
index 17cb706fd..b2d08bfa5 100644
--- a/usr/init/src/init.rs
+++ b/usr/init/src/init.rs
@@ -989,7 +989,7 @@ pub fn upcall_test() {
 
 pub fn dynamic_replication_test() {
     info!("dynamic_replication_test START");
-    
+
     vibrio::syscalls::Process::set_replicas(false, 0).expect("Can't remove replica 0.");
 
     //vibrio::syscalls::Process::set_replicas(false, 1).expect("Can't remove replica 1.");
@@ -1079,7 +1079,7 @@ pub extern "C" fn _start() -> ! {
 
     #[cfg(feature = "test-core-alloc")]
     core_alloc_test();
-    
+
     #[cfg(feature = "test-dynamic-replication")]
     dynamic_replication_test();
 

From 652d5761941e17c5fa88400af04acb646da7a5ab Mon Sep 17 00:00:00 2001
From: Erika Hunhoff <hunhoff.erika@gmail.com>
Date: Tue, 28 Nov 2023 21:53:46 +0000
Subject: [PATCH 18/32] Add logging, remove logging, fix kernel binary to
 specific address

---
 bootloader/src/kernel.rs                  | 35 ++++++++++++++++++++++-
 bootloader/src/main.rs                    |  2 +-
 bootloader/src/vspace.rs                  | 31 ++++++++++++++------
 kernel/src/arch/x86_64/process.rs         |  5 +++-
 kernel/src/memory/mod.rs                  |  2 +-
 kernel/src/nr.rs                          |  5 ++++
 kernel/src/transport/shmem.rs             |  2 ++
 kernel/tests/s11_rackscale_benchmarks.rs  | 10 +++----
 lib/rpc/src/transport/shmem/queue_mpmc.rs | 11 +------
 9 files changed, 75 insertions(+), 28 deletions(-)

diff --git a/bootloader/src/kernel.rs b/bootloader/src/kernel.rs
index 18cbe4f0b..de5d746b0 100644
--- a/bootloader/src/kernel.rs
+++ b/bootloader/src/kernel.rs
@@ -5,7 +5,9 @@
 use crate::alloc::vec::Vec;
 
 use bootloader_shared::TlsInfo;
+use core::sync::atomic::{AtomicUsize, Ordering};
 use elfloader::{self, ElfLoaderErr};
+use uefi::table::boot::AllocateType;
 use x86::bits64::paging::*;
 
 use crate::vspace::*;
@@ -59,6 +61,11 @@ pub(crate) fn paddr_to_kernel_vaddr(paddr: PAddr) -> VAddr {
 /// displacement.
 pub const KERNEL_OFFSET: usize = 1 << 46;
 
+static calls_to_alloc: AtomicUsize = AtomicUsize::new(0);
+
+const NUM_ALLOC_ADDRESSES: usize = 1;
+static ALLOC_ADDRESSES: [usize; NUM_ALLOC_ADDRESSES] = [0x1dc0e000];
+
 /// This struct stores meta-data required to construct
 /// an address space for the kernel and relocate the
 /// kernel ELF binary into it.
@@ -162,14 +169,40 @@ impl<'a> elfloader::ElfLoader for Kernel<'a> {
             is_page_aligned!(max_end),
             "max end is not aligned to page-size"
         );
+
+        let alloc_call_num = calls_to_alloc.fetch_add(1, Ordering::SeqCst);
+        let alloc_type = if alloc_call_num < NUM_ALLOC_ADDRESSES {
+            let alloc_addr = AllocateType::Address(ALLOC_ADDRESSES[alloc_call_num]);
+            info!(
+                "Attempting to allocate index {:?} at {:?}",
+                alloc_call_num, alloc_addr
+            );
+            alloc_addr
+        } else {
+            warn!(
+                "Attempting to allocate index {:?} at AnyPage - this may not work for rackscale",
+                alloc_call_num
+            );
+            AllocateType::AnyPages
+        };
+
         let pbase = VSpace::allocate_pages_aligned(
+            alloc_type,
             ((max_end - min_base) >> BASE_PAGE_SHIFT) as usize,
             uefi::table::boot::MemoryType(KERNEL_ELF),
             max_alignment,
         );
 
         self.offset = VAddr::from(KERNEL_OFFSET + pbase.as_usize());
-        info!("Kernel loaded at address: {:#x}", self.offset);
+        info!(
+            "Kernel loaded at address: {:#x} (paddr={:#x})",
+            self.offset,
+            pbase.as_usize()
+        );
+
+        if alloc_call_num < NUM_ALLOC_ADDRESSES {
+            assert!(pbase.as_usize() == ALLOC_ADDRESSES[alloc_call_num]);
+        }
 
         // Do the mappings:
         for (base, size, _alignment, action) in self.mapping.iter() {
diff --git a/bootloader/src/main.rs b/bootloader/src/main.rs
index 4e8eee06c..c28e64715 100644
--- a/bootloader/src/main.rs
+++ b/bootloader/src/main.rs
@@ -370,7 +370,7 @@ pub extern "C" fn uefi_start(handle: uefi::Handle, mut st: SystemTable<Boot>) ->
 
     // Next create an address space for our kernel
     trace!("Allocate a PML4 (page-table root)");
-    let pml4: PAddr = VSpace::allocate_one_page();
+    let pml4: PAddr = VSpace::allocate_one_page(AllocateType::AnyPages);
     let pml4_table = unsafe { &mut *paddr_to_uefi_vaddr(pml4).as_mut_ptr::<PML4>() };
 
     let mut kernel = Kernel {
diff --git a/bootloader/src/vspace.rs b/bootloader/src/vspace.rs
index 688acd8ef..e37614816 100644
--- a/bootloader/src/vspace.rs
+++ b/bootloader/src/vspace.rs
@@ -348,14 +348,15 @@ impl<'a> VSpace<'a> {
     }
 
     /// A simple wrapper function for allocating just oen page.
-    pub(crate) fn allocate_one_page() -> PAddr {
-        let paddr = VSpace::allocate_pages(1, uefi::table::boot::MemoryType(KERNEL_PT));
+    pub(crate) fn allocate_one_page(alloc_type: AllocateType) -> PAddr {
+        let paddr = VSpace::allocate_pages(alloc_type, 1, uefi::table::boot::MemoryType(KERNEL_PT));
         trace!("allocate_one_page {:#x}", paddr);
         paddr
     }
 
     /// Does an allocation of physical memory where the base-address is a multiple of `align_to`.
     pub(crate) fn allocate_pages_aligned(
+        alloc_type: AllocateType,
         how_many: usize,
         typ: uefi::table::boot::MemoryType,
         align_to: u64,
@@ -371,7 +372,7 @@ impl<'a> VSpace<'a> {
         assert!(actual_how_many >= how_many);
 
         // The region we allocated
-        let paddr = VSpace::allocate_pages(actual_how_many, typ);
+        let paddr = VSpace::allocate_pages(alloc_type, actual_how_many, typ);
         let end = paddr + (actual_how_many * BASE_PAGE_SIZE);
 
         // The region within the allocated one we actually want
@@ -431,13 +432,17 @@ impl<'a> VSpace<'a> {
     ///
     /// Zeroes the memory we allocate (TODO: I'm not sure if this is already done by UEFI).
     /// Returns a `u64` containing the base to that.
-    pub(crate) fn allocate_pages(how_many: usize, typ: uefi::table::boot::MemoryType) -> PAddr {
+    pub(crate) fn allocate_pages(
+        alloc_type: AllocateType,
+        how_many: usize,
+        typ: uefi::table::boot::MemoryType,
+    ) -> PAddr {
         let st = system_table();
         unsafe {
             match st
                 .as_ref()
                 .boot_services()
-                .allocate_pages(AllocateType::AnyPages, typ, how_many)
+                .allocate_pages(alloc_type, typ, how_many)
             {
                 Ok(num) => {
                     st.as_ref().boot_services().set_mem(
@@ -453,17 +458,17 @@ impl<'a> VSpace<'a> {
     }
 
     fn new_pt(&mut self) -> PDEntry {
-        let paddr: PAddr = VSpace::allocate_one_page();
+        let paddr: PAddr = VSpace::allocate_one_page(AllocateType::AnyPages);
         return PDEntry::new(paddr, PDFlags::P | PDFlags::RW);
     }
 
     fn new_pd(&mut self) -> PDPTEntry {
-        let paddr: PAddr = VSpace::allocate_one_page();
+        let paddr: PAddr = VSpace::allocate_one_page(AllocateType::AnyPages);
         return PDPTEntry::new(paddr, PDPTFlags::P | PDPTFlags::RW);
     }
 
     fn new_pdpt(&mut self) -> PML4Entry {
-        let paddr: PAddr = VSpace::allocate_one_page();
+        let paddr: PAddr = VSpace::allocate_one_page(AllocateType::AnyPages);
         return PML4Entry::new(paddr, PML4Flags::P | PML4Flags::RW);
     }
 
@@ -521,10 +526,18 @@ impl<'a> VSpace<'a> {
     ///  * The base should be a multiple of `BASE_PAGE_SIZE`.
     ///  * The size should be a multiple of `BASE_PAGE_SIZE`.
     #[allow(unused)]
-    pub fn map(&mut self, base: VAddr, size: usize, rights: MapAction, palignment: u64) {
+    pub fn map(
+        &mut self,
+        alloc_type: AllocateType,
+        base: VAddr,
+        size: usize,
+        rights: MapAction,
+        palignment: u64,
+    ) {
         assert!(base.is_base_page_aligned(), "base is not page-aligned");
         assert_eq!(size % BASE_PAGE_SIZE, 0, "size is not page-aligned");
         let paddr = VSpace::allocate_pages_aligned(
+            alloc_type,
             size / BASE_PAGE_SIZE,
             uefi::table::boot::MemoryType(KERNEL_ELF),
             palignment,
diff --git a/kernel/src/arch/x86_64/process.rs b/kernel/src/arch/x86_64/process.rs
index a077c8667..540cc3b91 100644
--- a/kernel/src/arch/x86_64/process.rs
+++ b/kernel/src/arch/x86_64/process.rs
@@ -971,8 +971,12 @@ impl Executor for Ring3Executor {
             "Run on remote replica?"
         );
 
+        // THIS IS THE PROBLEM
+        log::info!("Before maybe switch vspace 2");
         self.maybe_switch_vspace();
+        log::info!("After maybe switch vspace 2");
         let entry_point = unsafe { (*self.vcpu_kernel()).resume_with_upcall };
+        log::info!("Entry point is: {:?}", entry_point);
 
         if entry_point == INVALID_EXECUTOR_START {
             Ring3Resumer::new_start(self.entry_point, self.stack_top())
@@ -980,7 +984,6 @@ impl Executor for Ring3Executor {
             // This is similar to `upcall` as it starts executing the defined upcall
             // handler, but on the regular stack (for that dispatcher) and not
             // the upcall stack. It's used to add a new core to a process.
-
             let entry_point = unsafe { (*self.vcpu_kernel()).resume_with_upcall };
             trace!("Added core entry point is at {:#x}", entry_point);
             let cpu_ctl = self.vcpu_addr().as_u64();
diff --git a/kernel/src/memory/mod.rs b/kernel/src/memory/mod.rs
index 38e38c7e0..dddebacd6 100644
--- a/kernel/src/memory/mod.rs
+++ b/kernel/src/memory/mod.rs
@@ -438,7 +438,7 @@ impl KernelAllocator {
             }
             frames
         } else {
-            log::info!("try-refill-shmem needed_base_pages={needed_base_pages} needed_large_pages={needed_large_pages} total_needed_base_pages={total_needed_base_pages} total_needed_large_pages={total_needed_large_pages}");
+            log::trace!("try-refill-shmem needed_base_pages={needed_base_pages} needed_large_pages={needed_large_pages} total_needed_base_pages={total_needed_base_pages} total_needed_large_pages={total_needed_large_pages}");
             rpc_get_shmem_frames(None, total_needed_large_pages)?
         };
 
diff --git a/kernel/src/nr.rs b/kernel/src/nr.rs
index 263115cfa..ae4eb0b86 100644
--- a/kernel/src/nr.rs
+++ b/kernel/src/nr.rs
@@ -49,6 +49,7 @@ lazy_static! {
 
             let nr  = Arc::try_new(
                 NodeReplicated::new(num_replicas, |afc: AffinityChange| {
+                    log::info!("Got AffinityChange 1: {:?}", afc);
                     let pcm = kcb::per_core_mem();
                     //log::info!("Got AffinityChange: {:?}", afc);
                     match afc {
@@ -239,11 +240,15 @@ impl Dispatch for KernelNode {
             Op::AllocatePid => {
                 // TODO(performance): O(n) scan probably not what we really
                 // want, fine for now, MAX_PROCESSES is tiny
+                log::info!("in op alloc pid");
                 for i in 0..MAX_PROCESSES {
                     if !self.process_map.contains_key(&i) {
+                        log::info!("in op alloc pid 1");
                         self.process_map.try_reserve(1)?;
+                        log::info!("in op alloc pid 2");
                         let r = self.process_map.insert(i, ());
                         assert!(r.is_none(), "!contains_key");
+                        log::info!("in op alloc pid 3");
                         return Ok(NodeResult::PidAllocated(i));
                     }
                 }
diff --git a/kernel/src/transport/shmem.rs b/kernel/src/transport/shmem.rs
index 31b854c5d..72e78d898 100644
--- a/kernel/src/transport/shmem.rs
+++ b/kernel/src/transport/shmem.rs
@@ -178,6 +178,7 @@ impl ShmemDevice {
 
         // Note: leaving this code as a comment as a way to test if all shmem is writeable.
 
+        /*
         #[cfg(feature = "rackscale")]
         match crate::CMDLINE.get().map_or(Mode::Native, |c| c.mode) {
             Mode::Controller => {
@@ -213,6 +214,7 @@ impl ShmemDevice {
             }
             _ => {}
         }
+        */
 
         // Map the MSI-X table into kernel space
         kvspace
diff --git a/kernel/tests/s11_rackscale_benchmarks.rs b/kernel/tests/s11_rackscale_benchmarks.rs
index f863af806..40e11d702 100644
--- a/kernel/tests/s11_rackscale_benchmarks.rs
+++ b/kernel/tests/s11_rackscale_benchmarks.rs
@@ -897,9 +897,9 @@ fn s11_rackscale_memcached_dynrep_benchmark_internal() {
         .user_feature("rkapps:memcached-bench")
         .set_rackscale(true)
         .kernel_feature("pages-4k")
-        .kernel_feature("dynrep")
         .release()
         .build();
+    //.kernel_feature("dynrep")
 
     fn controller_match_fn(
         proc: &mut PtySession,
@@ -1010,13 +1010,13 @@ fn s11_rackscale_memcached_dynrep_benchmark_internal() {
 
     let config = if is_smoke {
         MemcachedInternalConfig {
-            num_queries: 100_000_000,
+            num_queries: 100_000,
             mem_size: 16,
         }
     } else {
         MemcachedInternalConfig {
-            num_queries: 100_000_000, // TODO(rackscale): should be 100_000_000,
-            mem_size: 16,             // TODO(rackscale): should be 32_000,
+            num_queries: 100_000, // TODO(rackscale): should be 100_000_000,
+            mem_size: 16,         // TODO(rackscale): should be 32_000,
         }
     };
 
@@ -1032,7 +1032,7 @@ fn s11_rackscale_memcached_dynrep_benchmark_internal() {
     test.cores_per_client = 4;
     test.cmd = format!(
         r#"init=memcachedbench.bin initargs={} appcmd='--x-benchmark-mem={} --x-benchmark-queries={}'"#,
-        2 * 4,
+        test.num_clients * test.cores_per_client,
         config.mem_size,
         config.num_queries
     );
diff --git a/lib/rpc/src/transport/shmem/queue_mpmc.rs b/lib/rpc/src/transport/shmem/queue_mpmc.rs
index 7febd1ca9..7a8fdb45a 100644
--- a/lib/rpc/src/transport/shmem/queue_mpmc.rs
+++ b/lib/rpc/src/transport/shmem/queue_mpmc.rs
@@ -154,24 +154,17 @@ impl<'a> State<'a> {
         // Calculate and check total data to push
         let push_data_len = values.iter().fold(0, |acc, x| acc + x.len());
         assert!(push_data_len <= QUEUE_ENTRY_SIZE);
-        log::info!("Attempt to push {:?} bytes", push_data_len);
 
         if push_data_len == 0 {
             return true;
         }
 
         let mask = self.mask;
-        log::info!("before enq");
         let mut pos = self.enqueue_pos(Relaxed);
-        log::info!("after enq");
         loop {
-            log::info!("before node");
             let node = &self.buffer[pos & mask];
-            log::info!("after node");
             let seq = (*node.get()).sequence.load(Acquire);
-            log::info!("after seq");
             let diff: isize = seq as isize - pos as isize;
-            log::info!("seq {:#x} diff {:#x}", seq, diff);
 
             match diff {
                 0 => {
@@ -184,8 +177,6 @@ impl<'a> State<'a> {
                             // Copy each value into the queue
                             let mut offset = 0;
                             for d in values.iter() {
-                                log::info!("d is at {:#x}", d.as_ptr() as usize);
-
                                 (*node.get()).value[offset..offset + d.len()].copy_from_slice(d);
                                 offset += d.len();
                             }
@@ -200,7 +191,7 @@ impl<'a> State<'a> {
                 _ => pos = self.enqueue_pos(Relaxed),
             }
         }
-        log::info!("pushed {:?} bytes", push_data_len);
+        log::trace!("pushed {:?} bytes", push_data_len);
 
         true
     }

From 95efcc4dcce8587a7dbb1731f614dff57145d545 Mon Sep 17 00:00:00 2001
From: Erika Hunhoff <hunhoff.erika@gmail.com>
Date: Tue, 28 Nov 2023 22:18:39 +0000
Subject: [PATCH 19/32] Note in process.rs about maybe switch vspace

---
 kernel/src/arch/x86_64/process.rs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/src/arch/x86_64/process.rs b/kernel/src/arch/x86_64/process.rs
index 540cc3b91..cbfce3790 100644
--- a/kernel/src/arch/x86_64/process.rs
+++ b/kernel/src/arch/x86_64/process.rs
@@ -1038,7 +1038,8 @@ impl Executor for Ring3Executor {
                     "Switching from 0x{:x} to 0x{:x}",
                     current_pml4, replica_pml4
                 );
-                controlregs::cr3_write(self.pml4.into());
+                //  TODO: if the replica changes, maybe this sometimes needs to be self.pml4.into()?
+                controlregs::cr3_write(replica_pml4.into());
             }
         }
     }

From 6e6bfaa05bf317cca26717970348019ffe3daa27 Mon Sep 17 00:00:00 2001
From: Erika Hunhoff <hunhoff.erika@gmail.com>
Date: Wed, 29 Nov 2023 02:12:28 +0000
Subject: [PATCH 20/32] Saving debugging state - may need to revert these
 changes later

---
 kernel/src/arch/x86_64/process.rs           | 58 ++++++++++++++++-
 kernel/src/arch/x86_64/vspace/page_table.rs | 71 ++++++++++++++++++++-
 kernel/src/memory/mod.rs                    | 11 +++-
 kernel/src/nr.rs                            |  2 +-
 kernel/src/nrproc.rs                        |  2 +-
 kernel/tests/s11_rackscale_benchmarks.rs    |  8 ++-
 6 files changed, 143 insertions(+), 9 deletions(-)

diff --git a/kernel/src/arch/x86_64/process.rs b/kernel/src/arch/x86_64/process.rs
index cbfce3790..e3b3b5a40 100644
--- a/kernel/src/arch/x86_64/process.rs
+++ b/kernel/src/arch/x86_64/process.rs
@@ -1030,15 +1030,52 @@ impl Executor for Ring3Executor {
     }
 
     fn maybe_switch_vspace(&self) {
+        //use crate::arch::vspace::page_table::ReadOnlyPageTable;
+
         let replica_pml4 = NrProcess::<Ring3Process>::ptroot(self.pid).expect("Can't read pml4");
         unsafe {
             let current_pml4 = PAddr::from(controlregs::cr3());
+            /*
+            let pml4_entry = paddr_to_kernel_vaddr(current_pml4).as_ptr::<PML4Entry>();
+            let pml4_slice = core::slice::from_raw_parts(pml4_entry, 512);
+            let current_ropt = ReadOnlyPageTable {
+                pml4: pml4_slice.try_into().unwrap(),
+            };
+            log::info!("Printing current read only page table");
+            let current_walk = current_ropt.walk();
+            log::info!("current len = {:?}", current_walk.len());
+
+            let pml4_entry_rep = paddr_to_kernel_vaddr(replica_pml4).as_ptr::<PML4Entry>();
+            let pml4_slice_rep = core::slice::from_raw_parts(pml4_entry_rep, 512);
+            let rep_ropt = ReadOnlyPageTable {
+                pml4: pml4_slice_rep.try_into().unwrap(),
+            };
+            log::info!("Printing replica read only page table");
+            let rep_walk = rep_ropt.walk();
+
+            //assert!(current_walk == rep_walk);
+            let max = core::cmp::min(current_walk.len(), rep_walk.len());
+            log::info!("current len = {:?} rep len = {:?}", current_walk.len(), rep_walk.len());
+            for i in 0..max {
+                if current_walk[i] != rep_walk[i] {
+                    panic!("index {:?} current={:#x} replica={:#x}", i, current_walk[i], rep_walk[i]);
+                }
+            }
+            */
+
+            /*
+            for i in 0..512 {
+                if pml4_slice[i] != pml4_slice_rep[i] {
+                    log::info!("index={:?} current={:?} replica={:?}", i, pml4_slice[i], pml4_slice_rep[i]);
+                }
+            }
+            */
+
             if current_pml4 != replica_pml4 {
                 info!(
                     "Switching from 0x{:x} to 0x{:x}",
                     current_pml4, replica_pml4
                 );
-                //  TODO: if the replica changes, maybe this sometimes needs to be self.pml4.into()?
                 controlregs::cr3_write(replica_pml4.into());
             }
         }
@@ -1427,6 +1464,8 @@ impl Process for Ring3Process {
         module_name: String,
         writeable_sections: Vec<Frame>,
     ) -> Result<(), KError> {
+        info!("IN PROCESS LOAD");
+
         self.pid = pid;
         // TODO(error-handling): properly unwind on error
         self.writeable_sections.clear();
@@ -1466,11 +1505,26 @@ impl Process for Ring3Process {
         // TODO(broken): Big (>= 2 MiB) allocations should be inserted here too
         // TODO(ugly): Find a better way to express this mess
         let kvspace = super::vspace::INITIAL_VSPACE.lock();
+
+        use crate::arch::vspace::page_table::ReadOnlyPageTable;
+        let pt = ReadOnlyPageTable {
+            pml4: &kvspace.pml4,
+        };
+        let walk = pt.walk();
+        info!("Walk is len: {:?}", walk.len());
+        for (addr_idx, frame, action) in walk {
+            self.vspace
+                .map_frame(addr_idx, frame, action)
+                .expect("failed map");
+        }
+
+        /*
         for i in 128..=510 {
             let kernel_pml_entry = kvspace.pml4[i];
-            trace!("Patched in kernel mappings at {:?}", kernel_pml_entry);
+            info!("Patched in kernel mappings at {:?}", kernel_pml_entry);
             self.vspace.page_table.pml4[i] = kernel_pml_entry;
         }
+        */
 
         Ok(())
     }
diff --git a/kernel/src/arch/x86_64/vspace/page_table.rs b/kernel/src/arch/x86_64/vspace/page_table.rs
index 824f60da9..d05de6d12 100644
--- a/kernel/src/arch/x86_64/vspace/page_table.rs
+++ b/kernel/src/arch/x86_64/vspace/page_table.rs
@@ -978,9 +978,11 @@ impl PageTable {
 }
 
 pub(crate) struct ReadOnlyPageTable<'a> {
-    pml4: &'a PML4,
+    pub pml4: &'a PML4,
 }
 
+use alloc::vec::Vec;
+
 impl<'a> ReadOnlyPageTable<'a> {
     /// Get read-only access to the current page-table.
     ///
@@ -1021,6 +1023,73 @@ impl<'a> ReadOnlyPageTable<'a> {
         assert_ne!(entry.address(), PAddr::zero());
         unsafe { transmute::<VAddr, &mut PDPT>(paddr_to_kernel_vaddr(entry.address())) }
     }
+
+    pub fn walk(&self) -> Vec<(VAddr, Frame, MapAction)> {
+        log::info!("calling walk in PageTable");
+        let mut my_walk = Vec::with_capacity(1024);
+
+        // Do a DFS and free all page-table memory allocated below kernel-base,
+        // don't free the mapped frames -- we return them later through NR
+        for pml4_idx in 128..PAGE_SIZE_ENTRIES {
+            if self.pml4[pml4_idx].is_present() {
+                for pdpt_idx in 0..PAGE_SIZE_ENTRIES {
+                    let pdpt = self.get_pdpt(self.pml4[pml4_idx]);
+                    if pdpt[pdpt_idx].is_present() {
+                        if !pdpt[pdpt_idx].is_page() {
+                            for pd_idx in 0..PAGE_SIZE_ENTRIES {
+                                let pd = self.get_pd(pdpt[pdpt_idx]);
+                                if pd[pd_idx].is_present() {
+                                    if !pd[pd_idx].is_page() {
+                                        for pt_idx in 0..PAGE_SIZE_ENTRIES {
+                                            let pt = self.get_pt(pd[pd_idx]);
+                                            if pt[pt_idx].is_present() {
+                                                let addr = pt[pt_idx].address();
+                                                let flags = pt[pt_idx].flags();
+                                                let frame = Frame::new(addr, BASE_PAGE_SIZE, 0);
+                                                let vaddr_pos: VAddr = VAddr::from(
+                                                    PML4_SLOT_SIZE * pml4_idx
+                                                        + HUGE_PAGE_SIZE * pdpt_idx
+                                                        + LARGE_PAGE_SIZE * pd_idx
+                                                        + pt_idx * BASE_PAGE_SIZE,
+                                                );
+                                                //let vaddr = paddr_to_kernel_vaddr(addr);
+                                                //log::info!("4K mapping addr={:?} vaddr={:?}", addr, vaddr);
+                                                my_walk.push((vaddr_pos, frame, flags.into()));
+                                            }
+                                        }
+                                    } else {
+                                        // is page
+                                        let addr = pd[pd_idx].address();
+                                        let flags = pd[pd_idx].flags();
+                                        let frame = Frame::new(addr, LARGE_PAGE_SIZE, 0);
+                                        let vaddr_pos: VAddr = VAddr::from(
+                                            PML4_SLOT_SIZE * pml4_idx
+                                                + HUGE_PAGE_SIZE * pdpt_idx
+                                                + LARGE_PAGE_SIZE * pd_idx,
+                                        );
+                                        //let vaddr = paddr_to_kernel_vaddr(addr);
+                                        //log::info!("2 MB mapping addr={:?} vaddr={:?}", addr, vaddr);
+                                        my_walk.push((vaddr_pos, frame, flags.into()));
+                                    }
+                                }
+                            }
+                        } else {
+                            // Encountered Page is a 1 GiB mapping, nothing to free
+                            let addr = pdpt[pdpt_idx].address();
+                            let flags = pdpt[pdpt_idx].flags();
+                            let frame = Frame::new(addr, HUGE_PAGE_SIZE, 0); // TODO: size is wrong
+                            let vaddr_pos: VAddr =
+                                VAddr::from(PML4_SLOT_SIZE * pml4_idx + HUGE_PAGE_SIZE * pdpt_idx);
+                            //let vaddr = paddr_to_kernel_vaddr(addr);
+                            //log::info!("1 GiB mapping addr={:?} vaddr={:?}", addr, vaddr);
+                            my_walk.push((vaddr_pos, frame, flags.into()));
+                        }
+                    }
+                }
+            }
+        }
+        my_walk
+    }
 }
 
 impl<'a> AddressSpace for ReadOnlyPageTable<'a> {
diff --git a/kernel/src/memory/mod.rs b/kernel/src/memory/mod.rs
index dddebacd6..a0bb4ed54 100644
--- a/kernel/src/memory/mod.rs
+++ b/kernel/src/memory/mod.rs
@@ -761,6 +761,8 @@ unsafe impl GlobalAlloc for KernelAllocator {
     }
 
     unsafe fn realloc(&self, ptr: *mut u8, layout: Layout, new_size: usize) -> *mut u8 {
+        #[cfg(feature = "rackscale")]
+        let mut drop = true;
         try_per_core_mem().map_or_else(
             || {
                 unreachable!("Trying to reallocate {:p} {:?} without a KCB.", ptr, layout);
@@ -788,7 +790,8 @@ unsafe impl GlobalAlloc for KernelAllocator {
                             } else if is_shmem_affinity(affinity) && !is_shmem_addr_with_affinity(ptr as u64, affinity, true) {
                                 // TODO(rackscale): should switch to non-shmem affinity for alloc below.
                                 // TODO(rackscale): check if shmem is a match for id?
-                                //panic!("Trying to realloc shmem to wrong or non- shmem allocator");
+                                warn!("Trying to realloc shmem to wrong or non- shmem allocator");
+                                drop = false;
                             } else if !is_shmem_affinity(affinity) && is_shmem_addr(ptr as u64, false, true) {
                                 // TODO(rackscale): should switch to use shmem affinity for alloc below.
                                 // TODO(rackscale): check if shmem is a match for id?
@@ -806,6 +809,12 @@ unsafe impl GlobalAlloc for KernelAllocator {
                             new_ptr,
                             core::cmp::min(layout.size(), new_size),
                         );
+                        #[cfg(feature = "rackscale")]
+                        if drop {
+                            self.dealloc(ptr, layout);
+                        }
+
+                        #[cfg(not(feature = "rackscale"))]
                         self.dealloc(ptr, layout);
                     }
                     new_ptr
diff --git a/kernel/src/nr.rs b/kernel/src/nr.rs
index ae4eb0b86..7d8f3df19 100644
--- a/kernel/src/nr.rs
+++ b/kernel/src/nr.rs
@@ -49,7 +49,7 @@ lazy_static! {
 
             let nr  = Arc::try_new(
                 NodeReplicated::new(num_replicas, |afc: AffinityChange| {
-                    log::info!("Got AffinityChange 1: {:?}", afc);
+                    log::trace!("Got AffinityChange 1: {:?}", afc);
                     let pcm = kcb::per_core_mem();
                     //log::info!("Got AffinityChange: {:?}", afc);
                     match afc {
diff --git a/kernel/src/nrproc.rs b/kernel/src/nrproc.rs
index 8c4c9e4a2..ab366a514 100644
--- a/kernel/src/nrproc.rs
+++ b/kernel/src/nrproc.rs
@@ -33,7 +33,7 @@ pub(crate) fn register_thread_with_process_replicas() {
     #[cfg(not(feature = "rackscale"))]
     let node = *crate::environment::NODE_ID;
     #[cfg(feature = "rackscale")]
-    let node = 0; //*crate::environment::MACHINE_ID
+    let node = *crate::environment::MACHINE_ID; // node = 0;
 
     debug_assert!(PROCESS_TABLE.len() > node, "Invalid Node ID");
 
diff --git a/kernel/tests/s11_rackscale_benchmarks.rs b/kernel/tests/s11_rackscale_benchmarks.rs
index 40e11d702..9f52e2671 100644
--- a/kernel/tests/s11_rackscale_benchmarks.rs
+++ b/kernel/tests/s11_rackscale_benchmarks.rs
@@ -1011,7 +1011,7 @@ fn s11_rackscale_memcached_dynrep_benchmark_internal() {
     let config = if is_smoke {
         MemcachedInternalConfig {
             num_queries: 100_000,
-            mem_size: 16,
+            mem_size: 16, //4 * 1024,
         }
     } else {
         MemcachedInternalConfig {
@@ -1023,12 +1023,14 @@ fn s11_rackscale_memcached_dynrep_benchmark_internal() {
     let mut test = RackscaleRun::new("userspace-smp".to_string(), built);
     test.controller_match_fn = controller_match_fn;
     test.transport = transport;
-    test.shmem_size *= 2;
+    test.controller_timeout *= 2; //*= 8;
+                                  //test.client_timeout *= 8;
+    test.shmem_size = 1024 * 64;
     test.use_affinity_shmem = cfg!(feature = "affinity-shmem");
     test.use_qemu_huge_pages = cfg!(feature = "affinity-shmem");
     test.file_name = file_name.to_string();
     test.run_dhcpd_for_baseline = true;
-    test.num_clients = 2;
+    test.num_clients = 3;
     test.cores_per_client = 4;
     test.cmd = format!(
         r#"init=memcachedbench.bin initargs={} appcmd='--x-benchmark-mem={} --x-benchmark-queries={}'"#,

From 84dc6291bea4c88bdf7c0b58e607839897212431 Mon Sep 17 00:00:00 2001
From: Erika Hunhoff <hunhoff.erika@gmail.com>
Date: Wed, 29 Nov 2023 18:07:30 +0000
Subject: [PATCH 21/32] pushing current debugging state

---
 kernel/src/arch/x86_64/irq.rs            | 27 ++++++++++++++++--------
 kernel/src/memory/mod.rs                 |  3 ++-
 kernel/tests/s11_rackscale_benchmarks.rs |  5 +++--
 kernel/testutils/src/rackscale_runner.rs |  4 +++-
 4 files changed, 26 insertions(+), 13 deletions(-)

diff --git a/kernel/src/arch/x86_64/irq.rs b/kernel/src/arch/x86_64/irq.rs
index 3fe5c75fc..23db52a5b 100644
--- a/kernel/src/arch/x86_64/irq.rs
+++ b/kernel/src/arch/x86_64/irq.rs
@@ -533,36 +533,45 @@ unsafe fn timer_handler(_a: &ExceptionArguments) {
         use crate::arch::process::current_pid;
         let pid = current_pid().expect("dont have a pid?");
 
-        if rawtime::BOOT_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(10)
+        if rawtime::BOOT_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(33)
             && *REPLICA_STATE == 0
         {
-            info!("got a timer after 10s, remove rid 1");
+            warn!("got a timer after 10s, remove rid 1");
             let handles =
                 nrproc::NrProcess::<Ring3Process>::remove_replica(pid, 1).expect("removed");
             #[cfg(not(feature = "rackscale"))]
             super::tlb::shootdown(handles[0].clone());
             unsafe { *REPLICA_STATE.as_mut_ptr() = 1 };
         }
-        if rawtime::BOOT_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(20)
+        if rawtime::BOOT_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(36)
             && *REPLICA_STATE == 1
         {
-            info!("got a timer after 20s, add rid 1");
-            let handles = nrproc::NrProcess::<Ring3Process>::add_replica(pid, 1).expect("added");
+            warn!("got a timer after 20s, add rid 1");
+            let handles =
+            nrproc::NrProcess::<Ring3Process>::remove_replica(pid, 2).expect("removed");
             #[cfg(not(feature = "rackscale"))]
             super::tlb::shootdown(handles[0].clone());
             unsafe { *REPLICA_STATE.as_mut_ptr() = 2 };
         }
-        if rawtime::BOOT_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(30)
+        if rawtime::BOOT_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(39)
             && *REPLICA_STATE == 2
         {
-            info!("got a timer after 30s");
+            warn!("got a timer after 30s");
             unsafe { *REPLICA_STATE.as_mut_ptr() = 3 };
+
+            let handles = nrproc::NrProcess::<Ring3Process>::add_replica(pid, 1).expect("added");
+            #[cfg(not(feature = "rackscale"))]
+            super::tlb::shootdown(handles[0].clone());
         }
-        if rawtime::BOOT_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(40)
+        if rawtime::BOOT_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(32)
             && *REPLICA_STATE == 3
         {
-            info!("got a timer after 40s");
+            warn!("got a timer after 40s");
             unsafe { *REPLICA_STATE.as_mut_ptr() = 4 };
+
+            let handles = nrproc::NrProcess::<Ring3Process>::add_replica(pid, 2).expect("added");
+            #[cfg(not(feature = "rackscale"))]
+            super::tlb::shootdown(handles[0].clone());
         }
     }
     let kcb = get_kcb();
diff --git a/kernel/src/memory/mod.rs b/kernel/src/memory/mod.rs
index a0bb4ed54..5869868c0 100644
--- a/kernel/src/memory/mod.rs
+++ b/kernel/src/memory/mod.rs
@@ -795,7 +795,8 @@ unsafe impl GlobalAlloc for KernelAllocator {
                             } else if !is_shmem_affinity(affinity) && is_shmem_addr(ptr as u64, false, true) {
                                 // TODO(rackscale): should switch to use shmem affinity for alloc below.
                                 // TODO(rackscale): check if shmem is a match for id?
-                                panic!("Trying to realloc shmem using non-shmem allocator");
+                                warn!("Trying to realloc shmem using non-shmem allocator");
+                                drop = false;
                             }
                         }
                     }
diff --git a/kernel/tests/s11_rackscale_benchmarks.rs b/kernel/tests/s11_rackscale_benchmarks.rs
index 9f52e2671..946f71f88 100644
--- a/kernel/tests/s11_rackscale_benchmarks.rs
+++ b/kernel/tests/s11_rackscale_benchmarks.rs
@@ -897,6 +897,7 @@ fn s11_rackscale_memcached_dynrep_benchmark_internal() {
         .user_feature("rkapps:memcached-bench")
         .set_rackscale(true)
         .kernel_feature("pages-4k")
+        .kernel_feature("dynrep")
         .release()
         .build();
     //.kernel_feature("dynrep")
@@ -1010,7 +1011,7 @@ fn s11_rackscale_memcached_dynrep_benchmark_internal() {
 
     let config = if is_smoke {
         MemcachedInternalConfig {
-            num_queries: 100_000,
+            num_queries: 100_000_000,
             mem_size: 16, //4 * 1024,
         }
     } else {
@@ -1024,7 +1025,7 @@ fn s11_rackscale_memcached_dynrep_benchmark_internal() {
     test.controller_match_fn = controller_match_fn;
     test.transport = transport;
     test.controller_timeout *= 2; //*= 8;
-                                  //test.client_timeout *= 8;
+    test.client_timeout *= 2;
     test.shmem_size = 1024 * 64;
     test.use_affinity_shmem = cfg!(feature = "affinity-shmem");
     test.use_qemu_huge_pages = cfg!(feature = "affinity-shmem");
diff --git a/kernel/testutils/src/rackscale_runner.rs b/kernel/testutils/src/rackscale_runner.rs
index ee17ddf8d..c5de20c53 100644
--- a/kernel/testutils/src/rackscale_runner.rs
+++ b/kernel/testutils/src/rackscale_runner.rs
@@ -397,11 +397,12 @@ impl<T: Clone + Send + 'static> RackscaleRun<T> {
         let _ignore = dcm.process.kill(SIGKILL);
 
         // If there's been an error, print everything
-        if controller_ret.is_err() || (&client_rets).into_iter().any(|ret| ret.is_err()) {
+        if true { //controller_ret.is_err() || (&client_rets).into_iter().any(|ret| ret.is_err()) {
             let outputs = all_outputs.lock().expect("Failed to get output lock");
             for (name, output) in outputs.iter() {
                 log_qemu_out_with_name(None, name.to_string(), output.to_string());
             }
+            /*
             if controller_ret.is_err() {
                 let dcm_log = dcm.exp_eof();
                 if dcm_log.is_ok() {
@@ -410,6 +411,7 @@ impl<T: Clone + Send + 'static> RackscaleRun<T> {
                     eprintln!("Failed to print DCM log.");
                 }
             }
+            */
         }
 
         for client_ret in client_rets {

From 02b71d823dc5b1feb66c57b9f7cb67bec0208864 Mon Sep 17 00:00:00 2001
From: Erika Hunhoff <hunhoff.erika@gmail.com>
Date: Wed, 29 Nov 2023 21:11:04 +0000
Subject: [PATCH 22/32] Updated parsing for per-thread throughput

---
 kernel/tests/s11_rackscale_benchmarks.rs | 43 ++++++++++++++++++++++--
 1 file changed, 40 insertions(+), 3 deletions(-)

diff --git a/kernel/tests/s11_rackscale_benchmarks.rs b/kernel/tests/s11_rackscale_benchmarks.rs
index 946f71f88..28db715c3 100644
--- a/kernel/tests/s11_rackscale_benchmarks.rs
+++ b/kernel/tests/s11_rackscale_benchmarks.rs
@@ -905,7 +905,7 @@ fn s11_rackscale_memcached_dynrep_benchmark_internal() {
     fn controller_match_fn(
         proc: &mut PtySession,
         output: &mut String,
-        _cores_per_client: usize,
+        cores_per_client: usize,
         num_clients: usize,
         file_name: &str,
         is_baseline: bool,
@@ -948,6 +948,30 @@ fn s11_rackscale_memcached_dynrep_benchmark_internal() {
         *output += prev.as_str();
         *output += matched.as_str();
 
+        let mut thread_results = Vec::new();
+        let mut num_not_finished = num_clients * cores_per_client;
+        while num_not_finished > 0 {
+            let (prev, matched) = proc.exp_regex(r#"thread.(\d+).*\r\r"#)?;
+            *output += prev.as_str();
+            *output += matched.as_str();
+
+            if matched.contains("done") {
+                println!("> Thread done: {:?}", matched);
+                num_not_finished -= 1;
+            } else if matched.contains("executed") {
+                let matched = matched.replace("thread.", "");
+                let tokens = matched.split(" ").collect::<Vec<&str>>();
+                let thread_id = tokens[0].to_string();
+                let queries = tokens[2].to_string();
+                let time = tokens[5].to_string();
+                println!(
+                    "> thread {:?} performed {:?} queries in {:?} us",
+                    thread_id, queries, time
+                );
+                thread_results.push((thread_id, queries, time));
+            }
+        }
+
         // benchmark took 129 seconds
         let (prev, matched) = proc.exp_regex(r#"benchmark took (\d+) ms"#)?;
         println!("> {}", matched);
@@ -986,7 +1010,7 @@ fn s11_rackscale_memcached_dynrep_benchmark_internal() {
             .open(file_name)
             .expect("Can't open file");
         if write_headers {
-            let row = "git_rev,benchmark,nthreads,mem,queries,time,thpt,num_clients,num_replicas\n";
+            let row = "git_rev,benchmark,nthreads,mem,queries,time,thpt,num_clients,num_replicas,thead_num\n";
             let r = csv_file.write(row.as_bytes());
             assert!(r.is_ok());
         }
@@ -996,14 +1020,27 @@ fn s11_rackscale_memcached_dynrep_benchmark_internal() {
         let r = csv_file.write(format!("{},", env!("GIT_HASH")).as_bytes());
         assert!(r.is_ok());
         let out = format!(
-            "memcached,{},{},{},{},{},{},{}",
+            "memcached,{},{},{},{},{},{},{},aggregate",
             b_threads, b_mem, b_queries, b_time, b_thpt, actual_num_clients, num_clients
         );
+
         let r = csv_file.write(out.as_bytes());
         assert!(r.is_ok());
         let r = csv_file.write("\n".as_bytes());
         assert!(r.is_ok());
 
+        for (thread_id, queries, time) in thread_results {
+            let out = format!(
+                "memcached,{},{},{},{},,{},{},{}",
+                b_threads, b_mem, queries, time, actual_num_clients, num_clients, thread_id
+            );
+
+            let r = csv_file.write(out.as_bytes());
+            assert!(r.is_ok());
+            let r = csv_file.write("\n".as_bytes());
+            assert!(r.is_ok());
+        }
+
         println!("> {}", output);
 
         Ok(())

From 57187f9203c715d1fafb7e070f4175b48c8557dc Mon Sep 17 00:00:00 2001
From: Erika Hunhoff <hunhoff.erika@gmail.com>
Date: Wed, 29 Nov 2023 21:29:59 +0000
Subject: [PATCH 23/32] Update librettos commit

---
 usr/rkapps/build.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/usr/rkapps/build.rs b/usr/rkapps/build.rs
index 064c22d36..2e3dd2dbd 100644
--- a/usr/rkapps/build.rs
+++ b/usr/rkapps/build.rs
@@ -138,11 +138,11 @@ fn main() {
             .unwrap();
 
         println!(
-            "CHECKOUT 1f36e49a3c5fc529cd80278a1bd73b9941c813e6 {:?}",
+            "CHECKOUT eece690294fbfed418f43034b5dc77290865f8cf {:?}",
             out_dir
         );
         Command::new("git")
-            .args(&["checkout", "1f36e49a3c5fc529cd80278a1bd73b9941c813e6"])
+            .args(&["checkout", "eece690294fbfed418f43034b5dc77290865f8cf"])
             .current_dir(&Path::new(&out_dir))
             .status()
             .unwrap();

From f81763ef19afad030da046bfd0656bc86d003366 Mon Sep 17 00:00:00 2001
From: Gerd Zellweger <mail@gerdzellweger.com>
Date: Wed, 29 Nov 2023 22:11:47 -0800
Subject: [PATCH 24/32] Modifications for dynrep+new bios code.

Signed-off-by: Gerd Zellweger <mail@gerdzellweger.com>
---
 kernel/src/arch/x86_64/irq.rs               | 63 +++++++++++++++------
 kernel/src/arch/x86_64/process.rs           | 50 +++-------------
 kernel/src/arch/x86_64/syscall.rs           | 26 +++++++--
 kernel/src/arch/x86_64/tlb.rs               | 11 ++++
 kernel/src/arch/x86_64/vspace/page_table.rs |  8 ++-
 kernel/src/nrproc.rs                        |  4 +-
 kernel/src/process.rs                       |  2 +-
 lib/node-replication2                       |  2 +-
 lib/vibrio/src/rumprt/crt/mod.rs            |  7 +++
 9 files changed, 104 insertions(+), 69 deletions(-)

diff --git a/kernel/src/arch/x86_64/irq.rs b/kernel/src/arch/x86_64/irq.rs
index 23db52a5b..4318710c6 100644
--- a/kernel/src/arch/x86_64/irq.rs
+++ b/kernel/src/arch/x86_64/irq.rs
@@ -509,6 +509,14 @@ unsafe fn bkp_handler(a: &ExceptionArguments) {
 #[thread_local]
 pub(crate) static REPLICA_STATE: Lazy<usize> = Lazy::new(|| 0);
 
+
+pub static DYNREP_ENABLED: core::sync::atomic::AtomicBool = core::sync::atomic::AtomicBool::new(false);
+
+use lazy_static::lazy_static;
+lazy_static! {
+    pub static ref DYNREP_TIME_ANCHOR: rawtime::Instant = rawtime::Instant::now();
+}
+
 /// Handler for the timer exception.
 ///
 /// We currently use it to periodically make sure that a replica
@@ -529,51 +537,74 @@ unsafe fn timer_handler(_a: &ExceptionArguments) {
     }
 
     #[cfg(feature = "dynrep")]
-    if *crate::environment::MT_ID == 4 {
+    if *crate::environment::MT_ID == 0 && DYNREP_ENABLED.load(core::sync::atomic::Ordering::SeqCst) {
         use crate::arch::process::current_pid;
         let pid = current_pid().expect("dont have a pid?");
 
-        if rawtime::BOOT_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(33)
+        if DYNREP_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(0)
             && *REPLICA_STATE == 0
         {
-            warn!("got a timer after 10s, remove rid 1");
+            warn!("PHASE 1: remove rid 1");
+
             let handles =
                 nrproc::NrProcess::<Ring3Process>::remove_replica(pid, 1).expect("removed");
+
             #[cfg(not(feature = "rackscale"))]
             super::tlb::shootdown(handles[0].clone());
+            #[cfg(feature = "rackscale")]
+            super::tlb::remote_shootdown(handles);
+
             unsafe { *REPLICA_STATE.as_mut_ptr() = 1 };
         }
-        if rawtime::BOOT_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(36)
+        if DYNREP_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(3)
             && *REPLICA_STATE == 1
         {
-            warn!("got a timer after 20s, add rid 1");
+            warn!("PHASE 2: remove rid 2");
+
             let handles =
-            nrproc::NrProcess::<Ring3Process>::remove_replica(pid, 2).expect("removed");
+                nrproc::NrProcess::<Ring3Process>::remove_replica(pid, 2).expect("removed");
+
             #[cfg(not(feature = "rackscale"))]
             super::tlb::shootdown(handles[0].clone());
+            #[cfg(feature = "rackscale")]
+            super::tlb::remote_shootdown(handles);
+
             unsafe { *REPLICA_STATE.as_mut_ptr() = 2 };
         }
-        if rawtime::BOOT_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(39)
+        if DYNREP_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(6)
             && *REPLICA_STATE == 2
         {
-            warn!("got a timer after 30s");
-            unsafe { *REPLICA_STATE.as_mut_ptr() = 3 };
+            warn!("PHASE 3: add rid 1");
 
             let handles = nrproc::NrProcess::<Ring3Process>::add_replica(pid, 1).expect("added");
+
             #[cfg(not(feature = "rackscale"))]
             super::tlb::shootdown(handles[0].clone());
+            #[cfg(feature = "rackscale")]
+            super::tlb::remote_shootdown(handles);
+
+            unsafe { *REPLICA_STATE.as_mut_ptr() = 3 };
+
         }
-        if rawtime::BOOT_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(32)
+        if DYNREP_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(9)
             && *REPLICA_STATE == 3
         {
-            warn!("got a timer after 40s");
-            unsafe { *REPLICA_STATE.as_mut_ptr() = 4 };
+            warn!("PHASE 4: add rid 2");
 
             let handles = nrproc::NrProcess::<Ring3Process>::add_replica(pid, 2).expect("added");
+
             #[cfg(not(feature = "rackscale"))]
             super::tlb::shootdown(handles[0].clone());
+            #[cfg(feature = "rackscale")]
+            super::tlb::remote_shootdown(handles);
+
+            unsafe { *REPLICA_STATE.as_mut_ptr() = 4 };
         }
     }
+    else {
+        //info!("dynrep not enabled MT_ID={} DYNREP_ENABLED.load(core::sync::atomic::Ordering::SeqCst)={}", *crate::environment::MT_ID, DYNREP_ENABLED.load(core::sync::atomic::Ordering::SeqCst));
+    }
+
     let kcb = get_kcb();
 
     if super::process::has_executor() {
@@ -798,10 +829,10 @@ pub extern "C" fn handle_generic_exception(a: ExceptionArguments) -> ! {
                 // Return immediately
                 TLB_TIME.update(|t| t + x86::time::rdtsc() - start);
 
-                let mut pborrow = super::process::CURRENT_EXECUTOR.borrow_mut();
-                let p = pborrow.as_ref().unwrap();
-                p.maybe_switch_vspace();
-                drop(pborrow);
+                //let mut pborrow = super::process::CURRENT_EXECUTOR.borrow_mut();
+                //let p = pborrow.as_ref().unwrap();
+                //p.maybe_switch_vspace();
+                //drop(pborrow);
 
                 kcb_iret_handle(kcb).resume()
             } else {
diff --git a/kernel/src/arch/x86_64/process.rs b/kernel/src/arch/x86_64/process.rs
index e3b3b5a40..7d0f66a9b 100644
--- a/kernel/src/arch/x86_64/process.rs
+++ b/kernel/src/arch/x86_64/process.rs
@@ -76,7 +76,7 @@ lazy_static! {
     pub(crate) static ref PROCESS_TABLE: ArrayVec<Arc<RwLock<NodeReplicated<NrProcess<Ring3Process>>>>, MAX_PROCESSES> = {
         use crate::memory::shmem_affinity::mid_to_shmem_affinity;
         use crate::arch::kcb::per_core_mem;
-        use crate::environment::NUM_MACHINES;
+        //use crate::environment::NUM_MACHINES;
 
         if !crate::CMDLINE
             .get()
@@ -107,7 +107,7 @@ lazy_static! {
 
         // Want at least one replica...
         let num_replicas =
-            NonZeroUsize::new(core::cmp::max(1, atopology::MACHINE_TOPOLOGY.num_nodes() * (*NUM_MACHINES))).unwrap();
+            NonZeroUsize::new(3).unwrap();
         let mut processes = ArrayVec::new();
 
         for _pid in 0..MAX_PROCESSES {
@@ -191,6 +191,7 @@ lazy_static! {
 }
 
 #[cfg(not(feature = "rackscale"))]
+#[allow(unused_variables)]
 fn create_process_table(
 ) -> ArrayVec<Arc<RwLock<NodeReplicated<NrProcess<Ring3Process>>>>, MAX_PROCESSES> {
     // Want at least one replica...
@@ -213,6 +214,7 @@ fn create_process_table(
                     match afc {
                         AffinityChange::Replica(r) => {
                             let affinity = { pcm.physical_memory.borrow().affinity };
+                            #[cfg(feature = "rackscale")]
                             pcm.set_mem_affinity(
                                 crate::memory::shmem_affinity::mid_to_shmem_affinity(r),
                             )
@@ -972,9 +974,7 @@ impl Executor for Ring3Executor {
         );
 
         // THIS IS THE PROBLEM
-        log::info!("Before maybe switch vspace 2");
         self.maybe_switch_vspace();
-        log::info!("After maybe switch vspace 2");
         let entry_point = unsafe { (*self.vcpu_kernel()).resume_with_upcall };
         log::info!("Entry point is: {:?}", entry_point);
 
@@ -1035,48 +1035,16 @@ impl Executor for Ring3Executor {
         let replica_pml4 = NrProcess::<Ring3Process>::ptroot(self.pid).expect("Can't read pml4");
         unsafe {
             let current_pml4 = PAddr::from(controlregs::cr3());
-            /*
-            let pml4_entry = paddr_to_kernel_vaddr(current_pml4).as_ptr::<PML4Entry>();
-            let pml4_slice = core::slice::from_raw_parts(pml4_entry, 512);
-            let current_ropt = ReadOnlyPageTable {
-                pml4: pml4_slice.try_into().unwrap(),
-            };
-            log::info!("Printing current read only page table");
-            let current_walk = current_ropt.walk();
-            log::info!("current len = {:?}", current_walk.len());
-
-            let pml4_entry_rep = paddr_to_kernel_vaddr(replica_pml4).as_ptr::<PML4Entry>();
-            let pml4_slice_rep = core::slice::from_raw_parts(pml4_entry_rep, 512);
-            let rep_ropt = ReadOnlyPageTable {
-                pml4: pml4_slice_rep.try_into().unwrap(),
-            };
-            log::info!("Printing replica read only page table");
-            let rep_walk = rep_ropt.walk();
-
-            //assert!(current_walk == rep_walk);
-            let max = core::cmp::min(current_walk.len(), rep_walk.len());
-            log::info!("current len = {:?} rep len = {:?}", current_walk.len(), rep_walk.len());
-            for i in 0..max {
-                if current_walk[i] != rep_walk[i] {
-                    panic!("index {:?} current={:#x} replica={:#x}", i, current_walk[i], rep_walk[i]);
-                }
-            }
-            */
-
-            /*
-            for i in 0..512 {
-                if pml4_slice[i] != pml4_slice_rep[i] {
-                    log::info!("index={:?} current={:?} replica={:?}", i, pml4_slice[i], pml4_slice_rep[i]);
-                }
-            }
-            */
-
             if current_pml4 != replica_pml4 {
-                info!(
+                debug!(
                     "Switching from 0x{:x} to 0x{:x}",
                     current_pml4, replica_pml4
                 );
                 controlregs::cr3_write(replica_pml4.into());
+                debug!("switched");
+            }
+            else {
+                debug!("not switched, the same");
             }
         }
     }
diff --git a/kernel/src/arch/x86_64/syscall.rs b/kernel/src/arch/x86_64/syscall.rs
index d73cd398f..0137b0610 100644
--- a/kernel/src/arch/x86_64/syscall.rs
+++ b/kernel/src/arch/x86_64/syscall.rs
@@ -294,18 +294,34 @@ impl<T: Arch86ProcessDispatch> ProcessDispatch<u64> for T {
 
     fn set_replicas(&self, add: u64, rid: u64) -> crate::error::KResult<(u64, u64)> {
         let pid = current_pid()?;
+
+        if rid == 99 {
+            log::info!("call from memcached");
+            lazy_static::initialize(&super::irq::DYNREP_TIME_ANCHOR);
+            super::irq::DYNREP_ENABLED.store(true, core::sync::atomic::Ordering::SeqCst);
+            info!("set_replicas: MT_ID={} DYNREP_ENABLED={} DYNREP_TIME_ANCHOR={}", 
+                *crate::environment::MT_ID, 
+                super::irq::DYNREP_ENABLED.load(core::sync::atomic::Ordering::SeqCst),
+                super::irq::DYNREP_TIME_ANCHOR.elapsed().as_nanos(),
+            );
+
+            return Ok((0, 0));
+        }
+
         let handles = if add > 0 {
             NrProcess::<Ring3Process>::add_replica(pid, rid as usize).expect("add_replica")
         } else {
             NrProcess::<Ring3Process>::remove_replica(pid, rid as usize).expect("remove_replica")
         };
 
-        #[cfg(feature = "rackscale")]
-        super::tlb::remote_shootdown(handles);
+        if handles.len() > 0 {
+            #[cfg(feature = "rackscale")]
+            super::tlb::remote_shootdown(handles);
 
-        // There will only be one handle in non-rackscale build
-        #[cfg(not(feature = "rackscale"))]
-        super::tlb::shootdown(handles[0].clone());
+            // There will only be one handle in non-rackscale build
+            #[cfg(not(feature = "rackscale"))]
+            super::tlb::shootdown(handles[0].clone());
+        }
 
         Ok((0, 0))
     }
diff --git a/kernel/src/arch/x86_64/tlb.rs b/kernel/src/arch/x86_64/tlb.rs
index 3a6e40ff9..fd5595be1 100644
--- a/kernel/src/arch/x86_64/tlb.rs
+++ b/kernel/src/arch/x86_64/tlb.rs
@@ -172,6 +172,10 @@ impl Shootdown {
 
         if self.vregion.start == 0u64 && self.vregion.end == 0u64 {
             log::info!("got special unmap for 0..0, skipping TLB flush");
+            use crate::process::Executor;
+            let pborrow = super::process::CURRENT_EXECUTOR.borrow_mut();
+            let p = pborrow.as_ref().unwrap();
+            p.maybe_switch_vspace();
             return;
         }
 
@@ -237,6 +241,13 @@ pub(crate) fn remote_dequeue(mid: kpi::system::MachineId) {
             // Process locally, then mark as complete
             shootdown(h);
             s.acknowledge();
+
+            //use crate::process::Executor;
+            //let pborrow = super::process::CURRENT_EXECUTOR.borrow_mut();
+            //let p = pborrow.as_ref().unwrap();
+            //p.maybe_switch_vspace();
+            //drop(pborrow);
+
         }
         None => return,
     }
diff --git a/kernel/src/arch/x86_64/vspace/page_table.rs b/kernel/src/arch/x86_64/vspace/page_table.rs
index d05de6d12..3b2680402 100644
--- a/kernel/src/arch/x86_64/vspace/page_table.rs
+++ b/kernel/src/arch/x86_64/vspace/page_table.rs
@@ -37,6 +37,8 @@ pub(crate) struct PageTable {
 
 impl Clone for PageTable {
     fn clone(&self) -> Self {
+        let start = rawtime::Instant::now();
+
         fn alloc_frame() -> Frame {
             let frame_ptr = unsafe {
                 let ptr = alloc::alloc::alloc_zeroed(PT_LAYOUT);
@@ -71,7 +73,7 @@ impl Clone for PageTable {
 
         // Do a DFS and find all mapped entries and replicate them in the new `pt`
         for pml4_idx in 0..PAGE_SIZE_ENTRIES {
-            if pml4_idx < pml4_index(KERNEL_BASE.into()) && self.pml4[pml4_idx].is_present() {
+            if self.pml4[pml4_idx].is_present() {
                 cloned_pt.pml4[pml4_idx] = new_pdpt();
 
                 for pdpt_idx in 0..PAGE_SIZE_ENTRIES {
@@ -115,7 +117,7 @@ impl Clone for PageTable {
                 }
             }
         }
-
+        log::debug!("PageTable::clone() completed in {:?}. {:#x}", start.elapsed(), cloned_pt.pml4_address());
         cloned_pt
     }
 }
@@ -123,7 +125,7 @@ impl Clone for PageTable {
 impl Drop for PageTable {
     #[allow(unreachable_code)]
     fn drop(&mut self) {
-        log::info!("calling drop in PageTable, skipping for now");
+        log::debug!("calling drop in PageTable, skipping for now");
         return;
 
         use alloc::alloc::dealloc;
diff --git a/kernel/src/nrproc.rs b/kernel/src/nrproc.rs
index ab366a514..b9b2a291a 100644
--- a/kernel/src/nrproc.rs
+++ b/kernel/src/nrproc.rs
@@ -33,7 +33,7 @@ pub(crate) fn register_thread_with_process_replicas() {
     #[cfg(not(feature = "rackscale"))]
     let node = *crate::environment::NODE_ID;
     #[cfg(feature = "rackscale")]
-    let node = *crate::environment::MACHINE_ID; // node = 0;
+    let node = *crate::environment::MACHINE_ID - 1; // node = 0;
 
     debug_assert!(PROCESS_TABLE.len() > node, "Invalid Node ID");
 
@@ -170,7 +170,7 @@ impl<P: Process> NrProcess<P> {
             .write(*crate::environment::MT_ID)
             .add_replica(rid)
             .expect("add_replica failed");
-        log::info!("added_replica {pid} {rid}");
+        log::debug!("added_replica {pid} {rid}");
 
         handle
     }
diff --git a/kernel/src/process.rs b/kernel/src/process.rs
index 0693832e8..6dca6eee0 100644
--- a/kernel/src/process.rs
+++ b/kernel/src/process.rs
@@ -49,7 +49,7 @@ pub(crate) type Pid = usize;
 pub(crate) type Eid = usize;
 
 /// How many (concurrent) processes the systems supports.
-pub(crate) const MAX_PROCESSES: usize = 12;
+pub(crate) const MAX_PROCESSES: usize = 1;
 
 /// How many registered "named" frames a process can have.
 pub(crate) const MAX_FRAMES_PER_PROCESS: usize = MAX_CORES;
diff --git a/lib/node-replication2 b/lib/node-replication2
index eba8f64b5..23d793890 160000
--- a/lib/node-replication2
+++ b/lib/node-replication2
@@ -1 +1 @@
-Subproject commit eba8f64b55cbde8e510565a382465f77472c8238
+Subproject commit 23d79389079659a202abc03a4d9753130acce22d
diff --git a/lib/vibrio/src/rumprt/crt/mod.rs b/lib/vibrio/src/rumprt/crt/mod.rs
index bb004d9c1..6036a632e 100644
--- a/lib/vibrio/src/rumprt/crt/mod.rs
+++ b/lib/vibrio/src/rumprt/crt/mod.rs
@@ -114,6 +114,13 @@ static mut INIT_INFO: InitInfo = InitInfo {
     ],
 };
 
+
+#[no_mangle]
+pub unsafe extern "C" fn rs_start_dynrep_protocol() {
+    log::info!("start_dynrep_protocol");
+    crate::syscalls::Process::set_replicas(true, 0x99).expect("start dynrep failed");
+}
+
 /// Sets up ps strings.
 pub unsafe fn netbsd_userlevel_init() {
     extern "C" {

From 4ae9f3fcff7120999d05837560b6eab85fd9eb0c Mon Sep 17 00:00:00 2001
From: Gerd Zellweger <mail@gerdzellweger.com>
Date: Wed, 29 Nov 2023 22:23:40 -0800
Subject: [PATCH 25/32] Update memcached build.

Signed-off-by: Gerd Zellweger <mail@gerdzellweger.com>
---
 kernel/tests/s11_rackscale_benchmarks.rs | 4 ++--
 usr/rkapps/build.rs                      | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/kernel/tests/s11_rackscale_benchmarks.rs b/kernel/tests/s11_rackscale_benchmarks.rs
index 28db715c3..52bf82eb6 100644
--- a/kernel/tests/s11_rackscale_benchmarks.rs
+++ b/kernel/tests/s11_rackscale_benchmarks.rs
@@ -1063,13 +1063,13 @@ fn s11_rackscale_memcached_dynrep_benchmark_internal() {
     test.transport = transport;
     test.controller_timeout *= 2; //*= 8;
     test.client_timeout *= 2;
-    test.shmem_size = 1024 * 64;
+    test.shmem_size = 1024 * 2;
     test.use_affinity_shmem = cfg!(feature = "affinity-shmem");
     test.use_qemu_huge_pages = cfg!(feature = "affinity-shmem");
     test.file_name = file_name.to_string();
     test.run_dhcpd_for_baseline = true;
     test.num_clients = 3;
-    test.cores_per_client = 4;
+    test.cores_per_client = 1;
     test.cmd = format!(
         r#"init=memcachedbench.bin initargs={} appcmd='--x-benchmark-mem={} --x-benchmark-queries={}'"#,
         test.num_clients * test.cores_per_client,
diff --git a/usr/rkapps/build.rs b/usr/rkapps/build.rs
index 2e3dd2dbd..ecd269b1e 100644
--- a/usr/rkapps/build.rs
+++ b/usr/rkapps/build.rs
@@ -138,11 +138,11 @@ fn main() {
             .unwrap();
 
         println!(
-            "CHECKOUT eece690294fbfed418f43034b5dc77290865f8cf {:?}",
+            "CHECKOUT d999a1f02b6fe0797f27ea109c028e6ae278012f {:?}",
             out_dir
         );
         Command::new("git")
-            .args(&["checkout", "eece690294fbfed418f43034b5dc77290865f8cf"])
+            .args(&["checkout", "d999a1f02b6fe0797f27ea109c028e6ae278012f"])
             .current_dir(&Path::new(&out_dir))
             .status()
             .unwrap();

From 7c139d24159a5995d1925643364568142fcda642 Mon Sep 17 00:00:00 2001
From: Gerd Zellweger <mail@gerdzellweger.com>
Date: Wed, 29 Nov 2023 22:41:30 -0800
Subject: [PATCH 26/32] remove depth limit on clone.

Signed-off-by: Gerd Zellweger <mail@gerdzellweger.com>
---
 usr/rkapps/build.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/usr/rkapps/build.rs b/usr/rkapps/build.rs
index ecd269b1e..755768c94 100644
--- a/usr/rkapps/build.rs
+++ b/usr/rkapps/build.rs
@@ -133,7 +133,7 @@ fn main() {
         println!("CLONE {:?}", out_dir);
         let url = "https://github.com/gz/librettos-packages.git";
         Command::new("git")
-            .args(&["clone", "--depth=1", url, out_dir.as_str()])
+            .args(&["clone", url, out_dir.as_str()])
             .status()
             .unwrap();
 

From c4923ec2750000c5533df6d00933c9d76141610d Mon Sep 17 00:00:00 2001
From: Erika Hunhoff <hunhoff.erika@gmail.com>
Date: Thu, 30 Nov 2023 07:10:30 +0000
Subject: [PATCH 27/32] Some config for dynrep and rump thread pinning hack for
 memcached

---
 kernel/tests/s11_rackscale_benchmarks.rs | 30 +++++++++++++++++-------
 lib/vibrio/src/rumprt/prt/mod.rs         | 20 +++++++++++++++-
 2 files changed, 41 insertions(+), 9 deletions(-)

diff --git a/kernel/tests/s11_rackscale_benchmarks.rs b/kernel/tests/s11_rackscale_benchmarks.rs
index 52bf82eb6..5586ea15a 100644
--- a/kernel/tests/s11_rackscale_benchmarks.rs
+++ b/kernel/tests/s11_rackscale_benchmarks.rs
@@ -1010,7 +1010,7 @@ fn s11_rackscale_memcached_dynrep_benchmark_internal() {
             .open(file_name)
             .expect("Can't open file");
         if write_headers {
-            let row = "git_rev,benchmark,nthreads,mem,queries,time,thpt,num_clients,num_replicas,thead_num\n";
+            let row = "git_rev,benchmark,nthreads,mem,queries,time,thpt,num_clients,num_replicas,thread_num\n";
             let r = csv_file.write(row.as_bytes());
             assert!(r.is_ok());
         }
@@ -1020,8 +1020,15 @@ fn s11_rackscale_memcached_dynrep_benchmark_internal() {
         let r = csv_file.write(format!("{},", env!("GIT_HASH")).as_bytes());
         assert!(r.is_ok());
         let out = format!(
-            "memcached,{},{},{},{},{},{},{},aggregate",
-            b_threads, b_mem, b_queries, b_time, b_thpt, actual_num_clients, num_clients
+            "memcached,{},{},{},{},{},{},{},{}",
+            b_threads,
+            b_mem,
+            b_queries,
+            b_time,
+            b_thpt,
+            actual_num_clients,
+            num_clients,
+            "aggregate"
         );
 
         let r = csv_file.write(out.as_bytes());
@@ -1030,9 +1037,12 @@ fn s11_rackscale_memcached_dynrep_benchmark_internal() {
         assert!(r.is_ok());
 
         for (thread_id, queries, time) in thread_results {
+            let r = csv_file.write(format!("{},", env!("GIT_HASH")).as_bytes());
+            assert!(r.is_ok());
+
             let out = format!(
-                "memcached,{},{},{},{},,{},{},{}",
-                b_threads, b_mem, queries, time, actual_num_clients, num_clients, thread_id
+                "memcached,{},{},{},{},{},{},{},{}",
+                b_threads, b_mem, queries, time, " ", actual_num_clients, num_clients, thread_id
             );
 
             let r = csv_file.write(out.as_bytes());
@@ -1050,11 +1060,15 @@ fn s11_rackscale_memcached_dynrep_benchmark_internal() {
         MemcachedInternalConfig {
             num_queries: 100_000_000,
             mem_size: 16, //4 * 1024,
+                          //num_queries: 1_000_000_000,
+                          //mem_size: 512,
         }
     } else {
         MemcachedInternalConfig {
-            num_queries: 100_000, // TODO(rackscale): should be 100_000_000,
-            mem_size: 16,         // TODO(rackscale): should be 32_000,
+            num_queries: 100_000_000,
+            mem_size: 16, //4 * 1024,
+                          //num_queries: 1_000_000_000, // 1_000_000_000, // TODO(rackscale): should be 100_000_000,
+                          //mem_size: 512,              // TODO(rackscale): should be 32_000,
         }
     };
 
@@ -1069,7 +1083,7 @@ fn s11_rackscale_memcached_dynrep_benchmark_internal() {
     test.file_name = file_name.to_string();
     test.run_dhcpd_for_baseline = true;
     test.num_clients = 3;
-    test.cores_per_client = 1;
+    test.cores_per_client = 1; // 2
     test.cmd = format!(
         r#"init=memcachedbench.bin initargs={} appcmd='--x-benchmark-mem={} --x-benchmark-queries={}'"#,
         test.num_clients * test.cores_per_client,
diff --git a/lib/vibrio/src/rumprt/prt/mod.rs b/lib/vibrio/src/rumprt/prt/mod.rs
index 5582e8e23..685e520a0 100644
--- a/lib/vibrio/src/rumprt/prt/mod.rs
+++ b/lib/vibrio/src/rumprt/prt/mod.rs
@@ -191,7 +191,16 @@ pub unsafe extern "C" fn rumprun_makelwp(
     rump_pub_lwproc_switch(curlwp);
 
     let coreid = (rlid as usize) % AVAILABLE_CORES.load(Ordering::Relaxed);
-    let gtid = crate::rumprt::CPUIDX_TO_GTID.lock()[coreid];
+    let hacky_coreid = match rlid as usize {
+        2 => 0,
+        11 => 1,
+        12 => 2,
+        13 => 3,
+        14 => 4,
+        15 => 5,
+        _ => coreid,
+    };
+    let gtid = crate::rumprt::CPUIDX_TO_GTID.lock()[hacky_coreid];
     let tid = Environment::thread().spawn_with_args(
         stack,
         Some(rumprun_makelwp_tramp),
@@ -200,6 +209,15 @@ pub unsafe extern "C" fn rumprun_makelwp(
         None,
         tls_private,
     );
+    trace!(
+        "rlid={:?}, available_cores={:?} rump_core_id={:?} gtid={:?}, tid={:?}",
+        rlid,
+        AVAILABLE_CORES.load(Ordering::Relaxed),
+        coreid,
+        gtid,
+        tid
+    );
+
     debug!(
         "rumprun_makelwp spawned {:?} on core {} (gtid={:?})",
         tid, coreid, gtid

From 5748e0c959765973212f2973e3f44fb98ec9b693 Mon Sep 17 00:00:00 2001
From: Erika Hunhoff <54562339+hunhoffe@users.noreply.github.com>
Date: Thu, 30 Nov 2023 11:00:33 -0700
Subject: [PATCH 28/32] change dynrep intervals to 5s

---
 kernel/src/arch/x86_64/irq.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel/src/arch/x86_64/irq.rs b/kernel/src/arch/x86_64/irq.rs
index 4318710c6..4adb1386e 100644
--- a/kernel/src/arch/x86_64/irq.rs
+++ b/kernel/src/arch/x86_64/irq.rs
@@ -556,7 +556,7 @@ unsafe fn timer_handler(_a: &ExceptionArguments) {
 
             unsafe { *REPLICA_STATE.as_mut_ptr() = 1 };
         }
-        if DYNREP_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(3)
+        if DYNREP_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(5)
             && *REPLICA_STATE == 1
         {
             warn!("PHASE 2: remove rid 2");
@@ -571,7 +571,7 @@ unsafe fn timer_handler(_a: &ExceptionArguments) {
 
             unsafe { *REPLICA_STATE.as_mut_ptr() = 2 };
         }
-        if DYNREP_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(6)
+        if DYNREP_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(10)
             && *REPLICA_STATE == 2
         {
             warn!("PHASE 3: add rid 1");
@@ -586,7 +586,7 @@ unsafe fn timer_handler(_a: &ExceptionArguments) {
             unsafe { *REPLICA_STATE.as_mut_ptr() = 3 };
 
         }
-        if DYNREP_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(9)
+        if DYNREP_TIME_ANCHOR.elapsed() > rawtime::Duration::from_secs(15)
             && *REPLICA_STATE == 3
         {
             warn!("PHASE 4: add rid 2");

From ac0965f5d48d70c3523502fffca4467a676d8d79 Mon Sep 17 00:00:00 2001
From: Reto Achermann <achreto@gmail.com>
Date: Thu, 30 Nov 2023 10:55:57 -0800
Subject: [PATCH 29/32] working 64GB config

Signed-off-by: Reto Achermann <achreto@gmail.com>
---
 kernel/tests/s11_rackscale_benchmarks.rs | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/kernel/tests/s11_rackscale_benchmarks.rs b/kernel/tests/s11_rackscale_benchmarks.rs
index 5586ea15a..db0291fe6 100644
--- a/kernel/tests/s11_rackscale_benchmarks.rs
+++ b/kernel/tests/s11_rackscale_benchmarks.rs
@@ -1059,14 +1059,14 @@ fn s11_rackscale_memcached_dynrep_benchmark_internal() {
     let config = if is_smoke {
         MemcachedInternalConfig {
             num_queries: 100_000_000,
-            mem_size: 16, //4 * 1024,
+            mem_size: 64*1024, //4 * 1024,
                           //num_queries: 1_000_000_000,
                           //mem_size: 512,
         }
     } else {
         MemcachedInternalConfig {
             num_queries: 100_000_000,
-            mem_size: 16, //4 * 1024,
+            mem_size: 64*1024, //4 * 1024,
                           //num_queries: 1_000_000_000, // 1_000_000_000, // TODO(rackscale): should be 100_000_000,
                           //mem_size: 512,              // TODO(rackscale): should be 32_000,
         }
@@ -1075,14 +1075,15 @@ fn s11_rackscale_memcached_dynrep_benchmark_internal() {
     let mut test = RackscaleRun::new("userspace-smp".to_string(), built);
     test.controller_match_fn = controller_match_fn;
     test.transport = transport;
-    test.controller_timeout *= 2; //*= 8;
-    test.client_timeout *= 2;
-    test.shmem_size = 1024 * 2;
+    test.controller_timeout *= 100; //*= 8; TODO: make this dependent on the memory size
+    test.client_timeout *= 100;
+    test.shmem_size = std::cmp::max(1024 * 2, 2*config.mem_size);
     test.use_affinity_shmem = cfg!(feature = "affinity-shmem");
     test.use_qemu_huge_pages = cfg!(feature = "affinity-shmem");
     test.file_name = file_name.to_string();
     test.run_dhcpd_for_baseline = true;
     test.num_clients = 3;
+    test.memory = 2*4096;
     test.cores_per_client = 1; // 2
     test.cmd = format!(
         r#"init=memcachedbench.bin initargs={} appcmd='--x-benchmark-mem={} --x-benchmark-queries={}'"#,

From 9dbf884e15ac56f940b6cc281de084bdc2bc2aff Mon Sep 17 00:00:00 2001
From: zmckevitt <zack.mckevitt@gmail.com>
Date: Tue, 5 Dec 2023 22:26:38 -0500
Subject: [PATCH 30/32] Added metrics for page allocation/deallocation to page
 table replication

---
 kernel/src/arch/x86_64/vspace/page_table.rs | 50 +++++++++++++++++++--
 1 file changed, 46 insertions(+), 4 deletions(-)

diff --git a/kernel/src/arch/x86_64/vspace/page_table.rs b/kernel/src/arch/x86_64/vspace/page_table.rs
index 3b2680402..259e7c928 100644
--- a/kernel/src/arch/x86_64/vspace/page_table.rs
+++ b/kernel/src/arch/x86_64/vspace/page_table.rs
@@ -15,6 +15,9 @@ use crate::error::KError;
 use crate::memory::vspace::*;
 use crate::memory::{kernel_vaddr_to_paddr, paddr_to_kernel_vaddr, Frame, PAddr, VAddr};
 
+use lazy_static::lazy_static;
+use spin::Mutex;
+
 /// Describes a potential modification operation on existing page tables.
 pub(super) const PT_LAYOUT: Layout =
     unsafe { Layout::from_size_align_unchecked(BASE_PAGE_SIZE, BASE_PAGE_SIZE) };
@@ -30,6 +33,12 @@ enum Modify {
     Unmap,
 }
 
+// Stats for dynamic replication
+lazy_static! {
+    pub static ref ALLOCS: Mutex<u32> = Mutex::new(0);
+    pub static ref DEALLOCS: Mutex<u32> = Mutex::new(0);
+}
+
 /// The actual page-table. We allocate the PML4 upfront.
 pub(crate) struct PageTable {
     pub pml4: Pin<Box<PML4>>,
@@ -39,6 +48,13 @@ impl Clone for PageTable {
     fn clone(&self) -> Self {
         let start = rawtime::Instant::now();
 
+        *ALLOCS.lock() += 1;
+        log::info!(
+            "PageTable::Allocations: {} (pml4 addr: {:?})",
+            *ALLOCS.lock(),
+            self.pml4_address()
+        );
+
         fn alloc_frame() -> Frame {
             let frame_ptr = unsafe {
                 let ptr = alloc::alloc::alloc_zeroed(PT_LAYOUT);
@@ -70,11 +86,16 @@ impl Clone for PageTable {
         }
 
         let mut cloned_pt = PageTable::new().expect("Can't clone PT");
-
+        let mut p_allocs = 0;
         // Do a DFS and find all mapped entries and replicate them in the new `pt`
         for pml4_idx in 0..PAGE_SIZE_ENTRIES {
+
+            let reached_kernel = pml4_idx >= pml4_index(KERNEL_BASE.into());
             if self.pml4[pml4_idx].is_present() {
                 cloned_pt.pml4[pml4_idx] = new_pdpt();
+                if !reached_kernel {
+                    p_allocs += 1;
+                }
 
                 for pdpt_idx in 0..PAGE_SIZE_ENTRIES {
                     let pdpt = self.get_pdpt(self.pml4[pml4_idx]);
@@ -83,6 +104,9 @@ impl Clone for PageTable {
                     if pdpt[pdpt_idx].is_present() {
                         if !pdpt[pdpt_idx].is_page() {
                             cloned_pdpt[pdpt_idx] = new_pd();
+                            if !reached_kernel {
+                                p_allocs += 1;
+                            }
                             let cloned_pdpt_entry = cloned_pdpt[pdpt_idx];
                             drop(cloned_pdpt);
 
@@ -93,6 +117,9 @@ impl Clone for PageTable {
                                 if pd[pd_idx].is_present() {
                                     if !pd[pd_idx].is_page() {
                                         cloned_pd[pd_idx] = new_pt();
+                                        if !reached_kernel {
+                                            p_allocs += 1;
+                                        }
                                         let cloned_pd_entry = cloned_pd[pd_idx];
                                         drop(cloned_pd);
 
@@ -117,7 +144,12 @@ impl Clone for PageTable {
                 }
             }
         }
-        log::debug!("PageTable::clone() completed in {:?}. {:#x}", start.elapsed(), cloned_pt.pml4_address());
+        log::info!("PageTable::Page Allocations: {}", p_allocs);
+        log::debug!(
+            "PageTable::clone() completed in {:?}. {:#x}",
+            start.elapsed(),
+            cloned_pt.pml4_address()
+        );
         cloned_pt
     }
 }
@@ -126,8 +158,14 @@ impl Drop for PageTable {
     #[allow(unreachable_code)]
     fn drop(&mut self) {
         log::debug!("calling drop in PageTable, skipping for now");
-        return;
-
+        *DEALLOCS.lock() += 1;
+        log::info!(
+            "PageTable::Deallocations: {} (pml4 addr: {:?})",
+            *DEALLOCS.lock(),
+            self.pml4_address()
+        );
+        // return;
+        let mut p_deallocs = 0;
         use alloc::alloc::dealloc;
         // Do a DFS and free all page-table memory allocated below kernel-base,
         // don't free the mapped frames -- we return them later through NR
@@ -149,6 +187,7 @@ impl Drop for PageTable {
                                         let addr = pd[pd_idx].address();
                                         let vaddr = paddr_to_kernel_vaddr(addr);
                                         unsafe { dealloc(vaddr.as_mut_ptr(), PT_LAYOUT) };
+                                        p_deallocs += 1;
                                     }
                                 } else {
                                     // Encountered a 2 MiB mapping, nothing to free
@@ -158,6 +197,7 @@ impl Drop for PageTable {
                             let addr = pdpt[pdpt_idx].address();
                             let vaddr = paddr_to_kernel_vaddr(addr);
                             unsafe { dealloc(vaddr.as_mut_ptr(), PT_LAYOUT) };
+                            p_deallocs += 1;
                         } else {
                             // Encountered Page is a 1 GiB mapping, nothing to free
                         }
@@ -168,9 +208,11 @@ impl Drop for PageTable {
                 let addr = self.pml4[pml4_idx].address();
                 let vaddr = paddr_to_kernel_vaddr(addr);
                 unsafe { dealloc(vaddr.as_mut_ptr(), PT_LAYOUT) };
+                p_deallocs += 1;
                 self.pml4[pml4_idx] = PML4Entry(0x0);
             }
         }
+        log::info!("PageTable::Page Deallocations: {}", p_deallocs);
     }
 }
 

From cb513936e8701f1729867382310f4df4f23f45d9 Mon Sep 17 00:00:00 2001
From: zmckevitt <zack.mckevitt@gmail.com>
Date: Tue, 5 Dec 2023 22:29:23 -0500
Subject: [PATCH 31/32] Fixed formatting for page table metrics

---
 kernel/src/arch/x86_64/vspace/page_table.rs | 1 -
 1 file changed, 1 deletion(-)

diff --git a/kernel/src/arch/x86_64/vspace/page_table.rs b/kernel/src/arch/x86_64/vspace/page_table.rs
index 259e7c928..fccae3fb1 100644
--- a/kernel/src/arch/x86_64/vspace/page_table.rs
+++ b/kernel/src/arch/x86_64/vspace/page_table.rs
@@ -89,7 +89,6 @@ impl Clone for PageTable {
         let mut p_allocs = 0;
         // Do a DFS and find all mapped entries and replicate them in the new `pt`
         for pml4_idx in 0..PAGE_SIZE_ENTRIES {
-
             let reached_kernel = pml4_idx >= pml4_index(KERNEL_BASE.into());
             if self.pml4[pml4_idx].is_present() {
                 cloned_pt.pml4[pml4_idx] = new_pdpt();

From 68bb6ac939632951f9366c104b148fe301475cda Mon Sep 17 00:00:00 2001
From: Erika Hunhoff <hunhoff.erika@gmail.com>
Date: Wed, 6 Dec 2023 19:15:49 -0700
Subject: [PATCH 32/32] update memcached

---
 usr/rkapps/build.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/usr/rkapps/build.rs b/usr/rkapps/build.rs
index 755768c94..baa777371 100644
--- a/usr/rkapps/build.rs
+++ b/usr/rkapps/build.rs
@@ -138,11 +138,11 @@ fn main() {
             .unwrap();
 
         println!(
-            "CHECKOUT d999a1f02b6fe0797f27ea109c028e6ae278012f {:?}",
+            "CHECKOUT 5c42c159f781fa62a269dd99fcb8ded6389049e0 {:?}",
             out_dir
         );
         Command::new("git")
-            .args(&["checkout", "d999a1f02b6fe0797f27ea109c028e6ae278012f"])
+            .args(&["checkout", "5c42c159f781fa62a269dd99fcb8ded6389049e0"])
             .current_dir(&Path::new(&out_dir))
             .status()
             .unwrap();