Skip to content

Commit

Permalink
hrtimers: Push pending hrtimers away from outgoing CPU earlier
Browse files Browse the repository at this point in the history
2b8272f ("cpu/hotplug: Prevent self deadlock on CPU hot-unplug")
solved the straight forward CPU hotplug deadlock vs. the scheduler
bandwidth timer. Yu discovered a more involved variant where a task which
has a bandwidth timer started on the outgoing CPU holds a lock and then
gets throttled. If the lock required by one of the CPU hotplug callbacks
the hotplug operation deadlocks because the unthrottling timer event is not
handled on the dying CPU and can only be recovered once the control CPU
reaches the hotplug state which pulls the pending hrtimers from the dead
CPU.

Solve this by pushing the hrtimers away from the dying CPU in the dying
callbacks. Nothing can queue a hrtimer on the dying CPU at that point because
all other CPUs spin in stop_machine() with interrupts disabled and once the
operation is finished the CPU is marked offline.

Reported-by: Yu Liao <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Tested-by: Liu Tie <[email protected]>
Link: https://lore.kernel.org/r/87a5rphara.ffs@tglx
  • Loading branch information
KAGA-KOKO committed Nov 11, 2023
1 parent ffc2532 commit 5c0930c
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 24 deletions.
1 change: 1 addition & 0 deletions include/linux/cpuhotplug.h
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,7 @@ enum cpuhp_state {
CPUHP_AP_ARM_CORESIGHT_CTI_STARTING,
CPUHP_AP_ARM64_ISNDEP_STARTING,
CPUHP_AP_SMPCFD_DYING,
CPUHP_AP_HRTIMERS_DYING,
CPUHP_AP_X86_TBOOT_DYING,
CPUHP_AP_ARM_CACHE_B15_RAC_DYING,
CPUHP_AP_ONLINE,
Expand Down
4 changes: 2 additions & 2 deletions include/linux/hrtimer.h
Original file line number Diff line number Diff line change
Expand Up @@ -531,9 +531,9 @@ extern void sysrq_timer_list_show(void);

int hrtimers_prepare_cpu(unsigned int cpu);
#ifdef CONFIG_HOTPLUG_CPU
int hrtimers_dead_cpu(unsigned int cpu);
int hrtimers_cpu_dying(unsigned int cpu);
#else
#define hrtimers_dead_cpu NULL
#define hrtimers_cpu_dying NULL
#endif

#endif
8 changes: 7 additions & 1 deletion kernel/cpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -2098,7 +2098,7 @@ static struct cpuhp_step cpuhp_hp_states[] = {
[CPUHP_HRTIMERS_PREPARE] = {
.name = "hrtimers:prepare",
.startup.single = hrtimers_prepare_cpu,
.teardown.single = hrtimers_dead_cpu,
.teardown.single = NULL,
},
[CPUHP_SMPCFD_PREPARE] = {
.name = "smpcfd:prepare",
Expand Down Expand Up @@ -2190,6 +2190,12 @@ static struct cpuhp_step cpuhp_hp_states[] = {
.startup.single = NULL,
.teardown.single = smpcfd_dying_cpu,
},
[CPUHP_AP_HRTIMERS_DYING] = {
.name = "hrtimers:dying",
.startup.single = NULL,
.teardown.single = hrtimers_cpu_dying,
},

/* Entry state on starting. Interrupts enabled from here on. Transient
* state for synchronsization */
[CPUHP_AP_ONLINE] = {
Expand Down
33 changes: 12 additions & 21 deletions kernel/time/hrtimer.c
Original file line number Diff line number Diff line change
Expand Up @@ -2219,29 +2219,22 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
}
}

int hrtimers_dead_cpu(unsigned int scpu)
int hrtimers_cpu_dying(unsigned int dying_cpu)
{
struct hrtimer_cpu_base *old_base, *new_base;
int i;
int i, ncpu = cpumask_first(cpu_active_mask);

BUG_ON(cpu_online(scpu));
tick_cancel_sched_timer(scpu);
tick_cancel_sched_timer(dying_cpu);

old_base = this_cpu_ptr(&hrtimer_bases);
new_base = &per_cpu(hrtimer_bases, ncpu);

/*
* this BH disable ensures that raise_softirq_irqoff() does
* not wakeup ksoftirqd (and acquire the pi-lock) while
* holding the cpu_base lock
*/
local_bh_disable();
local_irq_disable();
old_base = &per_cpu(hrtimer_bases, scpu);
new_base = this_cpu_ptr(&hrtimer_bases);
/*
* The caller is globally serialized and nobody else
* takes two locks at once, deadlock is not possible.
*/
raw_spin_lock(&new_base->lock);
raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
raw_spin_lock(&old_base->lock);
raw_spin_lock_nested(&new_base->lock, SINGLE_DEPTH_NESTING);

for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
migrate_hrtimer_list(&old_base->clock_base[i],
Expand All @@ -2252,15 +2245,13 @@ int hrtimers_dead_cpu(unsigned int scpu)
* The migration might have changed the first expiring softirq
* timer on this CPU. Update it.
*/
hrtimer_update_softirq_timer(new_base, false);
__hrtimer_get_next_event(new_base, HRTIMER_ACTIVE_SOFT);
/* Tell the other CPU to retrigger the next event */
smp_call_function_single(ncpu, retrigger_next_event, NULL, 0);

raw_spin_unlock(&old_base->lock);
raw_spin_unlock(&new_base->lock);
raw_spin_unlock(&old_base->lock);

/* Check, if we got expired work to do */
__hrtimer_peek_ahead_timers();
local_irq_enable();
local_bh_enable();
return 0;
}

Expand Down

0 comments on commit 5c0930c

Please sign in to comment.