Skip to content

Commit

Permalink
fix claim queue size (#6257)
Browse files Browse the repository at this point in the history
Reported in
#6161 (comment)

Fixes a bug introduced in
#5461, where the claim
queue would contain entries even if the validator groups storage is
empty (which happens during the first session).

This PR sets the claim queue core count to be the minimum between the
num_cores param and the number of validator groups

TODO:
- [x] prdoc
- [x] unit test
  • Loading branch information
alindima authored Nov 4, 2024
1 parent 9353a28 commit f4133b0
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 14 deletions.
36 changes: 23 additions & 13 deletions polkadot/runtime/parachains/src/scheduler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ use frame_support::{pallet_prelude::*, traits::Defensive};
use frame_system::pallet_prelude::BlockNumberFor;
pub use polkadot_core_primitives::v2::BlockNumber;
use polkadot_primitives::{
CoreIndex, GroupIndex, GroupRotationInfo, Id as ParaId, ScheduledCore, ValidatorIndex,
CoreIndex, GroupIndex, GroupRotationInfo, Id as ParaId, ScheduledCore, SchedulerParams,
ValidatorIndex,
};
use sp_runtime::traits::One;

Expand Down Expand Up @@ -131,7 +132,7 @@ impl<T: Config> Pallet<T> {
pub(crate) fn initializer_on_new_session(
notification: &SessionChangeNotification<BlockNumberFor<T>>,
) {
let SessionChangeNotification { validators, new_config, prev_config, .. } = notification;
let SessionChangeNotification { validators, new_config, .. } = notification;
let config = new_config;
let assigner_cores = config.scheduler_params.num_cores;

Expand Down Expand Up @@ -186,7 +187,7 @@ impl<T: Config> Pallet<T> {
}

// Resize and populate claim queue.
Self::maybe_resize_claim_queue(prev_config.scheduler_params.num_cores, assigner_cores);
Self::maybe_resize_claim_queue();
Self::populate_claim_queue_after_session_change();

let now = frame_system::Pallet::<T>::block_number() + One::one();
Expand All @@ -203,6 +204,12 @@ impl<T: Config> Pallet<T> {
ValidatorGroups::<T>::decode_len().unwrap_or(0)
}

/// Expected claim queue len. Can be different than the real length if for example we don't have
/// assignments for a core.
fn expected_claim_queue_len(config: &SchedulerParams<BlockNumberFor<T>>) -> u32 {
core::cmp::min(config.num_cores, Self::num_availability_cores() as u32)
}

/// Get the group assigned to a specific core by index at the current block number. Result
/// undefined if the core index is unknown or the block number is less than the session start
/// index.
Expand Down Expand Up @@ -325,11 +332,11 @@ impl<T: Config> Pallet<T> {
/// and fill the queue from the assignment provider.
pub(crate) fn advance_claim_queue(except_for: &BTreeSet<CoreIndex>) {
let config = configuration::ActiveConfig::<T>::get();
let num_assigner_cores = config.scheduler_params.num_cores;
let expected_claim_queue_len = Self::expected_claim_queue_len(&config.scheduler_params);
// Extra sanity, config should already never be smaller than 1:
let n_lookahead = config.scheduler_params.lookahead.max(1);

for core_idx in 0..num_assigner_cores {
for core_idx in 0..expected_claim_queue_len {
let core_idx = CoreIndex::from(core_idx);

if !except_for.contains(&core_idx) {
Expand All @@ -345,13 +352,16 @@ impl<T: Config> Pallet<T> {
}

// on new session
fn maybe_resize_claim_queue(old_core_count: u32, new_core_count: u32) {
if new_core_count < old_core_count {
fn maybe_resize_claim_queue() {
let cq = ClaimQueue::<T>::get();
let Some((old_max_core, _)) = cq.last_key_value() else { return };
let config = configuration::ActiveConfig::<T>::get();
let new_core_count = Self::expected_claim_queue_len(&config.scheduler_params);

if new_core_count < (old_max_core.0 + 1) {
ClaimQueue::<T>::mutate(|cq| {
let to_remove: Vec<_> = cq
.range(CoreIndex(new_core_count)..CoreIndex(old_core_count))
.map(|(k, _)| *k)
.collect();
let to_remove: Vec<_> =
cq.range(CoreIndex(new_core_count)..=*old_max_core).map(|(k, _)| *k).collect();
for key in to_remove {
if let Some(dropped_assignments) = cq.remove(&key) {
Self::push_back_to_assignment_provider(dropped_assignments.into_iter());
Expand All @@ -367,9 +377,9 @@ impl<T: Config> Pallet<T> {
let config = configuration::ActiveConfig::<T>::get();
// Extra sanity, config should already never be smaller than 1:
let n_lookahead = config.scheduler_params.lookahead.max(1);
let new_core_count = config.scheduler_params.num_cores;
let expected_claim_queue_len = Self::expected_claim_queue_len(&config.scheduler_params);

for core_idx in 0..new_core_count {
for core_idx in 0..expected_claim_queue_len {
let core_idx = CoreIndex::from(core_idx);
Self::fill_claim_queue(core_idx, n_lookahead);
}
Expand Down
20 changes: 19 additions & 1 deletion polkadot/runtime/parachains/src/scheduler/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -726,8 +726,26 @@ fn session_change_decreasing_number_of_cores() {
[assignment_b.clone()].into_iter().collect::<VecDeque<_>>()
);

// No more assignments now.
Scheduler::advance_claim_queue(&Default::default());
// No more assignments now.
assert_eq!(Scheduler::claim_queue_len(), 0);

// Retain number of cores to 1 but remove all validator groups. The claim queue length
// should be the minimum of these two.

// Add an assignment.
MockAssigner::add_test_assignment(assignment_b.clone());

run_to_block(4, |number| match number {
4 => Some(SessionChangeNotification {
new_config: new_config.clone(),
prev_config: new_config.clone(),
validators: vec![],
..Default::default()
}),
_ => None,
});

assert_eq!(Scheduler::claim_queue_len(), 0);
});
}
Expand Down
10 changes: 10 additions & 0 deletions prdoc/pr_6257.prdoc
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
title: 'fix claim queue size when validator groups count is smaller'
doc:
- audience: Runtime Dev
description: 'Fixes a bug introduced in https://github.com/paritytech/polkadot-sdk/pull/5461, where the claim queue
would contain entries even if the validator groups storage is empty (which happens during the first session).
This PR sets the claim queue core count to be the minimum between the num_cores param and the number of validator groups.'

crates:
- name: polkadot-runtime-parachains
bump: patch

0 comments on commit f4133b0

Please sign in to comment.