Skip to content

Commit 6bc5e60

Browse files
faithanalogiliana
andauthored
RandomnWithDistinctSleds region allocation strategy (#3858)
PR #3650 introduced the Random region allocation strategy to allocate regions randomly across the rack. This expands on that with the addition of the RandomWithDistinctSleds region allocation strategy. This strategy is the same, but requires the 3 crucible regions be allocated on 3 different sleds to improve resiliency against a whole-sled failure. The Random strategy still exists, and does not require 3 distinct sleds. This is useful in one-sled environments such as the integration tests, and lab setups. This also fixes a shortcoming of #3650 whereby multiple datasets on a single zpool could be selected. That fix applies to both the old Random strategy and the new RandomWithDistinctSleds strategy. In the present, I have unit tests that verify the allocation behavior works correctly with cockroachdb, and we can try it out on dogfood. Adds the `-r` / `--rack-topology` command line argument to omicron-package target create. Use this to specify whether you are packaging for a single-sled or multi-sled environment. Under single-sled environments, the requirement for 3 distinct sleds is removed. Fixes #3702 --------- Co-authored-by: iliana etaoin <[email protected]>
1 parent e86579c commit 6bc5e60

File tree

24 files changed

+617
-204
lines changed

24 files changed

+617
-204
lines changed

.github/buildomat/jobs/deploy.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,7 @@ cd /opt/oxide/work
143143

144144
ptime -m tar xvzf /input/package/work/package.tar.gz
145145
cp /input/package/work/zones/* out/
146+
mv out/omicron-nexus-single-sled.tar.gz out/omicron-nexus.tar.gz
146147
mkdir tests
147148
for p in /input/ci-tools/work/end-to-end-tests/*.gz; do
148149
ptime -m gunzip < "$p" > "tests/$(basename "${p%.gz}")"

.github/buildomat/jobs/package.sh

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ ptime -m ./tools/ci_download_softnpu_machinery
4545

4646
# Build the test target
4747
ptime -m cargo run --locked --release --bin omicron-package -- \
48-
-t test target create -i standard -m non-gimlet -s softnpu
48+
-t test target create -i standard -m non-gimlet -s softnpu -r single-sled
4949
ptime -m cargo run --locked --release --bin omicron-package -- \
5050
-t test package
5151

@@ -81,9 +81,13 @@ stamp_packages() {
8181
done
8282
}
8383

84+
# Keep the single-sled Nexus zone around for the deploy job. (The global zone
85+
# build below overwrites the file.)
86+
mv out/omicron-nexus.tar.gz out/omicron-nexus-single-sled.tar.gz
87+
8488
# Build necessary for the global zone
8589
ptime -m cargo run --locked --release --bin omicron-package -- \
86-
-t host target create -i standard -m gimlet -s asic
90+
-t host target create -i standard -m gimlet -s asic -r multi-sled
8791
ptime -m cargo run --locked --release --bin omicron-package -- \
8892
-t host package
8993
stamp_packages omicron-sled-agent maghemite propolis-server overlay
@@ -111,6 +115,7 @@ zones=(
111115
out/external-dns.tar.gz
112116
out/internal-dns.tar.gz
113117
out/omicron-nexus.tar.gz
118+
out/omicron-nexus-single-sled.tar.gz
114119
out/oximeter-collector.tar.gz
115120
out/propolis-server.tar.gz
116121
out/switch-*.tar.gz

.github/buildomat/jobs/tuf-repo.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,10 +77,11 @@ done
7777
mkdir /work/package
7878
pushd /work/package
7979
tar xf /input/package/work/package.tar.gz out package-manifest.toml target/release/omicron-package
80-
target/release/omicron-package -t default target create -i standard -m gimlet -s asic
80+
target/release/omicron-package -t default target create -i standard -m gimlet -s asic -r multi-sled
8181
ln -s /input/package/work/zones/* out/
8282
rm out/switch-softnpu.tar.gz # not used when target switch=asic
8383
rm out/omicron-gateway-softnpu.tar.gz # not used when target switch=asic
84+
rm out/omicron-nexus-single-sled.tar.gz # only used for deploy tests
8485
for zone in out/*.tar.gz; do
8586
target/release/omicron-package stamp "$(basename "${zone%.tar.gz}")" "$VERSION"
8687
done

.github/workflows/rust.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ jobs:
4141
- name: Install Pre-Requisites
4242
run: ./tools/install_builder_prerequisites.sh -y
4343
- name: Set default target
44-
run: cargo run --bin omicron-package -- -t default target create
44+
run: cargo run --bin omicron-package -- -t default target create -r single-sled
4545
- name: Check build of deployed Omicron packages
4646
run: cargo run --bin omicron-package -- -t default check
4747

common/src/nexus_config.rs

Lines changed: 59 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -372,6 +372,8 @@ pub struct PackageConfig {
372372
pub dendrite: HashMap<SwitchLocation, DpdConfig>,
373373
/// Background task configuration
374374
pub background_tasks: BackgroundTaskConfig,
375+
/// Default Crucible region allocation strategy
376+
pub default_region_allocation_strategy: RegionAllocationStrategy,
375377
}
376378

377379
#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)]
@@ -594,6 +596,9 @@ mod test {
594596
dns_external.period_secs_propagation = 7
595597
dns_external.max_concurrent_server_updates = 8
596598
external_endpoints.period_secs = 9
599+
[default_region_allocation_strategy]
600+
type = "random"
601+
seed = 0
597602
"##,
598603
)
599604
.unwrap();
@@ -677,6 +682,10 @@ mod test {
677682
period_secs: Duration::from_secs(9),
678683
}
679684
},
685+
default_region_allocation_strategy:
686+
crate::nexus_config::RegionAllocationStrategy::Random {
687+
seed: Some(0)
688+
}
680689
},
681690
}
682691
);
@@ -724,6 +733,8 @@ mod test {
724733
dns_external.period_secs_propagation = 7
725734
dns_external.max_concurrent_server_updates = 8
726735
external_endpoints.period_secs = 9
736+
[default_region_allocation_strategy]
737+
type = "random"
727738
"##,
728739
)
729740
.unwrap();
@@ -864,25 +875,31 @@ mod test {
864875
struct DummyConfig {
865876
deployment: DeploymentConfig,
866877
}
867-
let config_path = "../smf/nexus/config-partial.toml";
868-
println!(
869-
"checking {:?} with example deployment section added",
870-
config_path
871-
);
872-
let mut contents = std::fs::read_to_string(config_path)
873-
.expect("failed to read Nexus SMF config file");
874-
contents.push_str(
875-
"\n\n\n \
876-
# !! content below added by test_repo_configs_are_valid()\n\
877-
\n\n\n",
878-
);
879878
let example_deployment = toml::to_string_pretty(&DummyConfig {
880879
deployment: example_config.deployment,
881880
})
882881
.unwrap();
883-
contents.push_str(&example_deployment);
884-
let _: Config = toml::from_str(&contents)
885-
.expect("Nexus SMF config file is not valid");
882+
883+
let nexus_config_paths = [
884+
"../smf/nexus/single-sled/config-partial.toml",
885+
"../smf/nexus/multi-sled/config-partial.toml",
886+
];
887+
for config_path in nexus_config_paths {
888+
println!(
889+
"checking {:?} with example deployment section added",
890+
config_path
891+
);
892+
let mut contents = std::fs::read_to_string(config_path)
893+
.expect("failed to read Nexus SMF config file");
894+
contents.push_str(
895+
"\n\n\n \
896+
# !! content below added by test_repo_configs_are_valid()\n\
897+
\n\n\n",
898+
);
899+
contents.push_str(&example_deployment);
900+
let _: Config = toml::from_str(&contents)
901+
.expect("Nexus SMF config file is not valid");
902+
}
886903
}
887904

888905
#[test]
@@ -894,3 +911,30 @@ mod test {
894911
);
895912
}
896913
}
914+
915+
/// Defines a strategy for choosing what physical disks to use when allocating
916+
/// new crucible regions.
917+
///
918+
/// NOTE: More strategies can - and should! - be added.
919+
///
920+
/// See <https://rfd.shared.oxide.computer/rfd/0205> for a more
921+
/// complete discussion.
922+
///
923+
/// Longer-term, we should consider:
924+
/// - Storage size + remaining free space
925+
/// - Sled placement of datasets
926+
/// - What sort of loads we'd like to create (even split across all disks
927+
/// may not be preferable, especially if maintenance is expected)
928+
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
929+
#[serde(tag = "type", rename_all = "snake_case")]
930+
pub enum RegionAllocationStrategy {
931+
/// Choose disks pseudo-randomly. An optional seed may be provided to make
932+
/// the ordering deterministic, otherwise the current time in nanoseconds
933+
/// will be used. Ordering is based on sorting the output of `md5(UUID of
934+
/// candidate dataset + seed)`. The seed does not need to come from a
935+
/// cryptographically secure source.
936+
Random { seed: Option<u64> },
937+
938+
/// Like Random, but ensures that each region is allocated on its own sled.
939+
RandomWithDistinctSleds { seed: Option<u64> },
940+
}

docs/how-to-run.adoc

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -321,20 +321,42 @@ Error: Creates a new build target, and sets it as "active"
321321
Usage: omicron-package target create [OPTIONS]
322322
323323
Options:
324-
-i, --image <IMAGE> [default: standard] [possible values: standard, trampoline]
325-
-m, --machine <MACHINE> [possible values: gimlet, gimlet-standalone, non-gimlet]
326-
-s, --switch <SWITCH> [possible values: asic, stub, softnpu]
327-
-h, --help Print help (see more with '--help')
324+
-i, --image <IMAGE>
325+
[default: standard]
326+
327+
Possible values:
328+
- standard: A typical host OS image
329+
- trampoline: A recovery host OS image, intended to bootstrap a Standard image
330+
331+
-m, --machine <MACHINE>
332+
Possible values:
333+
- gimlet: Use sled agent configuration for a Gimlet
334+
- gimlet-standalone: Use sled agent configuration for a Gimlet running in isolation
335+
- non-gimlet: Use sled agent configuration for a device emulating a Gimlet
336+
337+
-s, --switch <SWITCH>
338+
Possible values:
339+
- asic: Use the "real" Dendrite, that attempts to interact with the Tofino
340+
- stub: Use a "stub" Dendrite that does not require any real hardware
341+
- softnpu: Use a "softnpu" Dendrite that uses the SoftNPU asic emulator
342+
343+
-r, --rack-topology <RACK_TOPOLOGY>
344+
Possible values:
345+
- multi-sled: Use configurations suitable for a multi-sled deployment, such as dogfood and production racks
346+
- single-sled: Use configurations suitable for a single-sled deployment, such as CI and dev machines
347+
348+
-h, --help
349+
Print help (see a summary with '-h')
328350
329351
----
330352

331353
To set up a build target for a non-Gimlet machine with simulated (but fully functional) external networking, you would run:
332354

333355
[source,console]
334356
----
335-
$ cargo run --release --bin omicron-package -- -t default target create -i standard -m non-gimlet -s softnpu
357+
$ cargo run --release --bin omicron-package -- -t default target create -i standard -m non-gimlet -s softnpu -r single-sled
336358
Finished release [optimized] target(s) in 0.66s
337-
Running `target/release/omicron-package -t default target create -i standard -m non-gimlet -s softnpu`
359+
Running `target/release/omicron-package -t default target create -i standard -m non-gimlet -s softnpu -r single-sled`
338360
Created new build target 'default' and set it as active
339361
----
340362

installinator/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,3 +57,4 @@ tokio-stream.workspace = true
5757
[features]
5858
image-standard = []
5959
image-trampoline = []
60+
rack-topology-single-sled = []

nexus/db-model/src/queries/region_allocation.rs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,13 @@ table! {
4747
}
4848
}
4949

50+
table! {
51+
shuffled_candidate_datasets {
52+
id -> Uuid,
53+
pool_id -> Uuid,
54+
}
55+
}
56+
5057
table! {
5158
candidate_regions {
5259
id -> Uuid,
@@ -89,6 +96,19 @@ table! {
8996
}
9097
}
9198

99+
table! {
100+
one_zpool_per_sled (pool_id) {
101+
pool_id -> Uuid
102+
}
103+
}
104+
105+
table! {
106+
one_dataset_per_zpool {
107+
id -> Uuid,
108+
pool_id -> Uuid
109+
}
110+
}
111+
92112
table! {
93113
inserted_regions {
94114
id -> Uuid,
@@ -141,6 +161,7 @@ diesel::allow_tables_to_appear_in_same_query!(
141161
);
142162

143163
diesel::allow_tables_to_appear_in_same_query!(old_regions, dataset,);
164+
diesel::allow_tables_to_appear_in_same_query!(old_regions, zpool,);
144165

145166
diesel::allow_tables_to_appear_in_same_query!(
146167
inserted_regions,
@@ -149,6 +170,7 @@ diesel::allow_tables_to_appear_in_same_query!(
149170

150171
diesel::allow_tables_to_appear_in_same_query!(candidate_zpools, dataset,);
151172
diesel::allow_tables_to_appear_in_same_query!(candidate_zpools, zpool,);
173+
diesel::allow_tables_to_appear_in_same_query!(candidate_datasets, dataset);
152174

153175
// == Needed for random region allocation ==
154176

0 commit comments

Comments
 (0)