From 20a1dafe1dbd879841637355e0b074b272b358a1 Mon Sep 17 00:00:00 2001 From: Qi Date: Sat, 19 Jul 2025 21:14:18 +0800 Subject: [PATCH 001/100] add macro `cfg_rt_and_time` and `cfg_rt_or_time` Signed-off-by: ADD-SP --- tokio/src/macros/cfg.rs | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tokio/src/macros/cfg.rs b/tokio/src/macros/cfg.rs index 7c1dcc612de..6ed616cfa11 100644 --- a/tokio/src/macros/cfg.rs +++ b/tokio/src/macros/cfg.rs @@ -704,3 +704,27 @@ macro_rules! cfg_tokio_uring { )* }; } + +macro_rules! cfg_rt_and_time{ + ($($item:item)*) => { + $( + #[cfg(all( + feature = "rt", + feature = "time", + ))] + $item + )* + }; +} + +macro_rules! cfg_rt_or_time{ + ($($item:item)*) => { + $( + #[cfg(any( + feature = "rt", + feature = "time", + ))] + $item + )* + }; +} From 3e65b072cc23ab92ea0659625b8a3f00e0ce6fad Mon Sep 17 00:00:00 2001 From: Qi Date: Sun, 13 Jul 2025 14:59:42 +0800 Subject: [PATCH 002/100] impl `wheel::Entry` Signed-off-by: ADD-SP --- tokio/src/runtime/time/mod.rs | 4 - tokio/src/runtime/time/wheel/entry.rs | 234 ++++++++++++++++++++++++++ tokio/src/runtime/time/wheel/level.rs | 21 ++- tokio/src/runtime/time/wheel/mod.rs | 117 +++++++------ 4 files changed, 314 insertions(+), 62 deletions(-) create mode 100644 tokio/src/runtime/time/wheel/entry.rs diff --git a/tokio/src/runtime/time/mod.rs b/tokio/src/runtime/time/mod.rs index 3250dce97f6..f3d696fc469 100644 --- a/tokio/src/runtime/time/mod.rs +++ b/tokio/src/runtime/time/mod.rs @@ -6,10 +6,6 @@ //! Time driver. -mod entry; -pub(crate) use entry::TimerEntry; -use entry::{EntryList, TimerHandle, TimerShared, MAX_SAFE_MILLIS_DURATION}; - mod handle; pub(crate) use self::handle::Handle; diff --git a/tokio/src/runtime/time/wheel/entry.rs b/tokio/src/runtime/time/wheel/entry.rs new file mode 100644 index 00000000000..7e280989c2b --- /dev/null +++ b/tokio/src/runtime/time/wheel/entry.rs @@ -0,0 +1,234 @@ +use crate::loom::sync::atomic::{AtomicU8, Ordering::*}; +use crate::loom::sync::{Arc, Mutex}; +use crate::{sync::AtomicWaker, util::linked_list}; +use std::marker::PhantomPinned; +use std::ptr::NonNull; +use std::sync::mpsc::Sender; +use std::task::Waker; + +pub(crate) type EntryList = linked_list::LinkedList; + +/// A pure new entry, no any changes to the state. +const STATE_UNREGISTERED: u8 = 0; + +/// The entry is registered to the timer wheel, +/// but not in the pending queue of the timer wheel. +const STATE_REGISTERED: u8 = 2; + +/// The entry is in the pending queue of the timer wheel, +/// and not in any wheel level. +const STATE_PENDING: u8 = 3; + +/// The waker has been called, and the entry is no longer in the timer wheel +/// (both each wheel level and the pending queue). +const STATE_FIRED: u8 = 4;/// The entry in the timer wheel. + +#[derive(Debug)] +struct Inner { + /// The tick when this entry is scheduled to expire. + deadline: u64, + + /// The currently registered waker. + waker: AtomicWaker, + + /// The mpsc channel used to cancel the entry. + // Since the race is very unlikely, we use `Mutex` here + // for lower complexity. + cancel_tx: Mutex>>, + + state: AtomicU8, + + _pin: PhantomPinned, +} + +/// The entry in the timer wheel. +pub(crate) struct Entry { + /// The pointers used by the intrusive linked list. + pointers: linked_list::Pointers, + + inner: Arc, + + _pin: PhantomPinned, +} + +generate_addr_of_methods! { + impl<> Entry { + unsafe fn addr_of_pointers(self: NonNull) -> NonNull> { + &self.pointers + } + } +} + +unsafe impl linked_list::Link for Entry { + type Handle = RawHandle; + type Target = Entry; + + fn as_raw(hdl: &Self::Handle) -> NonNull { + hdl.ptr + } + + unsafe fn from_raw(ptr: NonNull) -> Self::Handle { + RawHandle { ptr } + } + + unsafe fn pointers( + target: NonNull, + ) -> NonNull> { + Entry::addr_of_pointers(target) + } +} + +/// Another version of [`Handle`] which doesn't [`Arc::clone`] +/// the [`Inner`], this is used for intrusive linked list. +pub(crate) struct RawHandle { + ptr: NonNull, +} + +impl RawHandle { + /// # Safety + /// + /// [`Self::ptr`] must be a valid pointer to an [`Entry`]. + pub(crate) unsafe fn upgrade(self) -> Handle { + let inner = Arc::clone(&self.ptr.as_ref().inner); + Handle { + ptr: self.ptr, + inner, + } + } +} + +#[derive(Debug, Clone)] +pub(crate) struct Handle { + /// A pointer to the entry in the timer wheel. + ptr: NonNull, + + inner: Arc, +} + +/// Safety: [`Inner`] is protected by atomic variables and [`Mutex`], +unsafe impl Send for Handle {} +unsafe impl Sync for Handle {} + +impl Handle { + pub(crate) fn new(deadline: u64, waker: &Waker) -> Self { + let inner = Arc::new(Inner { + deadline, + waker: AtomicWaker::new(), + cancel_tx: Mutex::new(None), + state: AtomicU8::new(STATE_UNREGISTERED), + _pin: PhantomPinned, + }); + inner.waker.register_by_ref(waker); + + let ptr = Box::into_raw(Box::new(Entry { + pointers: linked_list::Pointers::new(), + inner: Arc::clone(&inner), + _pin: PhantomPinned, + })); + let ptr = unsafe { NonNull::new_unchecked(ptr) }; + + Handle { ptr, inner } + } + + /// Wake the entry if it is already in the pending queue of the timer wheel. + /// + /// # Panic + /// + /// Panics if the entry is not transitioned to the pending state. + pub(crate) fn wake(&self) { + let old = self.inner.state.swap(STATE_FIRED, SeqCst); + assert!(old == STATE_PENDING); + self.inner.waker.wake(); + } + + /// Wake the entry if it has already elapsed before registering to the timer wheel. + /// + /// # Panic + /// + /// Panics if the entry is not in the unregistered state. + pub(crate) fn wake_unregistered(&self) { + let old = self.inner.state.swap(STATE_FIRED, SeqCst); + assert!(old == STATE_UNREGISTERED); + self.inner.waker.wake(); + } + + pub(crate) fn register_waker(&self, waker: &Waker) { + self.inner.waker.register_by_ref(waker); + } + + /// # Panic + /// + /// Panics if the entry is not in the unregistered state. + pub(crate) fn transition_to_registered(&self, cancel_tx: Sender) { + { + let mut maybe_tx = self.inner.cancel_tx.lock(); + assert!(maybe_tx.is_none(), "cancel sender already set"); + *maybe_tx = Some(cancel_tx); + // lock is dropped here + } + let old = self.inner.state.swap(STATE_REGISTERED, SeqCst); + assert_eq!(old, STATE_UNREGISTERED, "Entry not unregistered"); + } + + /// # Panic + /// + /// Panics if the entry is not in the registered state. + pub(crate) fn transition_to_pending(&self, not_after: u64) -> Result<(), u64> { + if self.inner.deadline > not_after { + return Err(self.inner.deadline); + } + let old = self.inner.state.swap(STATE_PENDING, SeqCst); + assert_eq!(old, STATE_REGISTERED, "Entry not registered"); + Ok(()) + } + + /// # Panic + /// + /// Panics if receiver side is closed, this is usually caused by + /// the shutdown logic dropping the receiver side too early. + pub(crate) fn cancel(&self) { + let state = self.inner.state.fetch_or(0, SeqCst); + if state & STATE_REGISTERED != 0 { + let maybe_tx = { + let mut lock = self.inner.cancel_tx.lock(); + lock.take() + // lock is dropped here to avoid poisoning the Mutex + }; + if let Some(tx) = maybe_tx { + tx.send(self.clone()) + .expect("cancel sender should not be closed"); + } + } + } + + pub(crate) fn deadline(&self) -> u64 { + self.inner.deadline + } + + pub(crate) fn is_registered(&self) -> bool { + self.inner.state.fetch_or(0, SeqCst) == STATE_REGISTERED + } + + pub(crate) fn is_pending(&self) -> bool { + self.inner.state.fetch_or(0, SeqCst) == STATE_PENDING + } + + pub(crate) fn is_fired(&self) -> bool { + self.inner.state.fetch_or(0, SeqCst) == STATE_FIRED + } + + pub(crate) fn as_raw(&self) -> RawHandle { + RawHandle { ptr: self.ptr } + } + + pub(crate) fn as_entry_ptr(&self) -> NonNull { + self.ptr + } + + /// # Safety + /// + /// [`Self::ptr`] must be a valid pointer to an [`Entry`]. + pub(crate) unsafe fn drop_entry(&self) { + drop(Box::from_raw(self.ptr.as_ptr())); + } +} diff --git a/tokio/src/runtime/time/wheel/level.rs b/tokio/src/runtime/time/wheel/level.rs index 754e638bf57..99b1d41c71d 100644 --- a/tokio/src/runtime/time/wheel/level.rs +++ b/tokio/src/runtime/time/wheel/level.rs @@ -1,6 +1,5 @@ -use crate::runtime::time::{EntryList, TimerHandle, TimerShared}; - -use std::{array, fmt, ptr::NonNull}; +use super::{EntryHandle, EntryList}; +use std::{array, fmt}; /// Wheel for a single level in the timer. This wheel contains 64 slots. pub(crate) struct Level { @@ -119,18 +118,22 @@ impl Level { Some(slot) } - pub(crate) unsafe fn add_entry(&mut self, item: TimerHandle) { - let slot = slot_for(item.registered_when(), self.level); + pub(crate) unsafe fn add_entry(&mut self, hdl: EntryHandle) { + // Safety: the associated entry must be valid. + let deadline = unsafe { hdl.deadline() }; + let slot = slot_for(deadline, self.level); - self.slot[slot].push_front(item); + self.slot[slot].push_front(hdl.as_raw()); self.occupied |= occupied_bit(slot); } - pub(crate) unsafe fn remove_entry(&mut self, item: NonNull) { - let slot = slot_for(unsafe { item.as_ref().registered_when() }, self.level); + pub(crate) unsafe fn remove_entry(&mut self, hdl: EntryHandle) { + // Safety: the associated entry must be valid. + let deadline = unsafe { hdl.deadline() }; + let slot = slot_for(deadline, self.level); - unsafe { self.slot[slot].remove(item) }; + unsafe { self.slot[slot].remove(hdl.as_entry_ptr()) }; if self.slot[slot].is_empty() { // The bit is currently set debug_assert!(self.occupied & occupied_bit(slot) != 0); diff --git a/tokio/src/runtime/time/wheel/mod.rs b/tokio/src/runtime/time/wheel/mod.rs index 8d94303544c..570b06f942b 100644 --- a/tokio/src/runtime/time/wheel/mod.rs +++ b/tokio/src/runtime/time/wheel/mod.rs @@ -1,14 +1,13 @@ -use crate::runtime::time::{TimerHandle, TimerShared}; -use crate::time::error::InsertError; - mod level; pub(crate) use self::level::Expiration; use self::level::Level; -use std::{array, ptr::NonNull}; +mod entry; +use entry::EntryList; +pub(crate) use entry::Entry; +pub(crate) use entry::Handle as EntryHandle; -use super::entry::STATE_DEREGISTERED; -use super::EntryList; +use std::{array, sync::mpsc}; /// Timing wheel implementation. /// @@ -70,38 +69,41 @@ impl Wheel { /// /// # Arguments /// - /// * `item`: The item to insert into the wheel. + /// * `hdl`: The entry handle to insert into the wheel. /// /// # Return /// - /// Returns `Ok` when the item is successfully inserted, `Err` otherwise. - /// - /// `Err(Elapsed)` indicates that `when` represents an instant that has - /// already passed. In this case, the caller should fire the timeout - /// immediately. - /// - /// `Err(Invalid)` indicates an invalid `when` argument as been supplied. + /// * `true`: The entry was successfully inserted. + /// * `false`: the entry has already expired, in this case, + /// the entry is not inserted into the wheel. /// /// # Safety /// - /// This function registers item into an intrusive linked list. The caller - /// must ensure that `item` is pinned and will not be dropped without first - /// being deregistered. + /// The caller must ensure: + /// + /// * The associated entry is valid. + /// * AND the entry is not already registered in the wheel. pub(crate) unsafe fn insert( &mut self, - item: TimerHandle, - ) -> Result { - let when = item.sync_when(); - - if when <= self.elapsed { - return Err((item, InsertError::Elapsed)); + hdl: EntryHandle, + cancel_tx: mpsc::Sender, + ) -> bool { + // Safety: the associated entry must be valid. + let deadline = hdl.deadline(); + + if deadline <= self.elapsed { + unsafe { + hdl.drop_entry(); + } + return false; } // Get the level at which the entry should be stored - let level = self.level_for(when); + let level = self.level_for(deadline); + hdl.transition_to_registered(cancel_tx); unsafe { - self.levels[level].add_entry(item); + self.levels[level].add_entry(hdl); } debug_assert!({ @@ -111,39 +113,51 @@ impl Wheel { .unwrap_or(true) }); - Ok(when) + true } /// Removes `item` from the timing wheel. - pub(crate) unsafe fn remove(&mut self, item: NonNull) { + /// + /// # Safety + /// + /// The caller must ensure: + /// + /// * The associated entry is valid. + /// * AND the entry is already registered in the wheel. + pub(crate) unsafe fn remove(&mut self, hdl: EntryHandle) { unsafe { - let when = item.as_ref().registered_when(); - if when == STATE_DEREGISTERED { - self.pending.remove(item); + if hdl.is_pending() { + self.pending.remove(hdl.as_entry_ptr()); + unsafe { + hdl.drop_entry(); + } } else { + let deadline = hdl.deadline(); debug_assert!( - self.elapsed <= when, - "elapsed={}; when={}", + self.elapsed <= deadline, + "elapsed={}; deadline={}", self.elapsed, - when + deadline ); - let level = self.level_for(when); - self.levels[level].remove_entry(item); + let level = self.level_for(deadline); + self.levels[level].remove_entry(hdl.clone()); + unsafe { + hdl.drop_entry(); + } } } } - /// Instant at which to poll. - pub(crate) fn poll_at(&self) -> Option { - self.next_expiration().map(|expiration| expiration.deadline) - } - /// Advances the timer up to the instant represented by `now`. - pub(crate) fn poll(&mut self, now: u64) -> Option { + pub(crate) fn poll(&mut self, now: u64) -> Option { loop { - if let Some(handle) = self.pending.pop_back() { - return Some(handle); + if let Some(raw_hdl) = self.pending.pop_back() { + let hdl = unsafe { raw_hdl.upgrade() }; + unsafe { + hdl.drop_entry(); + } + return Some(hdl); } match self.next_expiration() { @@ -163,7 +177,10 @@ impl Wheel { } } - self.pending.pop_back() + self.pending.pop_back().map(|raw_hdl| { + // Safety: the handle is valid as it was just popped from the pending list. + unsafe { raw_hdl.upgrade() } + }) } /// Returns the instant at which the next timeout expires. @@ -229,22 +246,24 @@ impl Wheel { // those entries again or we'll end up in an infinite loop. let mut entries = self.take_entries(expiration); - while let Some(item) = entries.pop_back() { + while let Some(raw_hdl) = entries.pop_back() { + let hdl = unsafe { raw_hdl.upgrade() }; + if expiration.level == 0 { - debug_assert_eq!(unsafe { item.registered_when() }, expiration.deadline); + debug_assert_eq!(hdl.deadline(), expiration.deadline); } // Try to expire the entry; this is cheap (doesn't synchronize) if // the timer is not expired, and updates registered_when. - match unsafe { item.mark_pending(expiration.deadline) } { + match unsafe { hdl.transition_to_pending(expiration.deadline) } { Ok(()) => { // Item was expired - self.pending.push_front(item); + self.pending.push_front(hdl.as_raw()); } Err(expiration_tick) => { let level = level_for(expiration.deadline, expiration_tick); unsafe { - self.levels[level].add_entry(item); + self.levels[level].add_entry(hdl); } } } From 7618162d4917bbf67261d4a30381213b97863556 Mon Sep 17 00:00:00 2001 From: Qi Date: Sun, 13 Jul 2025 15:19:09 +0800 Subject: [PATCH 003/100] remove global timer wheel Signed-off-by: ADD-SP --- tokio/src/runtime/time/handle.rs | 53 +++++- tokio/src/runtime/time/mod.rs | 308 ++----------------------------- tokio/src/runtime/time/source.rs | 7 +- 3 files changed, 67 insertions(+), 301 deletions(-) diff --git a/tokio/src/runtime/time/handle.rs b/tokio/src/runtime/time/handle.rs index fce791d998c..c3e8f92aaa4 100644 --- a/tokio/src/runtime/time/handle.rs +++ b/tokio/src/runtime/time/handle.rs @@ -1,13 +1,50 @@ -use crate::runtime::time::TimeSource; +use crate::loom::sync::atomic::{AtomicBool, Ordering}; +use crate::loom::sync::Arc; +use crate::runtime::time::{TimeSource, Wheel}; use std::fmt; /// Handle to time driver instance. pub(crate) struct Handle { pub(super) time_source: TimeSource, - pub(super) inner: super::Inner, + pub(super) is_shutdown: Arc, + + // When `true`, a call to `park_timeout` should immediately return and time + // should not advance. One reason for this to be `true` is if the task + // passed to `Runtime::block_on` called `task::yield_now()`. + // + // While it may look racy, it only has any effect when the clock is paused + // and pausing the clock is restricted to a single-threaded runtime. + #[cfg(feature = "test-util")] + pub(super) did_wake: Arc, } impl Handle { + pub(crate) fn process_at_time(&self, wheel: &mut Wheel, mut now: u64) { + if now < wheel.elapsed() { + // Time went backwards! This normally shouldn't happen as the Rust language + // guarantees that an Instant is monotonic, but can happen when running + // Linux in a VM on a Windows host due to std incorrectly trusting the + // hardware clock to be monotonic. + // + // See for more information. + now = wheel.elapsed(); + } + + while let Some(hdl) = wheel.poll(now) { + unsafe { + hdl.wake(); + } + } + } + + pub(crate) fn shutdown(&self, wheel: &mut Wheel) { + // self.is_shutdown.store(true, Ordering::SeqCst); + // Advance time forward to the end of time. + // This will ensure that all timers are fired. + let max_tick = u64::MAX; + self.process_at_time(wheel, max_tick); + } + /// Returns the time source associated with this handle. pub(crate) fn time_source(&self) -> &TimeSource { &self.time_source @@ -15,15 +52,19 @@ impl Handle { /// Checks whether the driver has been shutdown. pub(super) fn is_shutdown(&self) -> bool { - self.inner.is_shutdown() + self.is_shutdown.load(Ordering::SeqCst) } /// Track that the driver is being unparked pub(crate) fn unpark(&self) { #[cfg(feature = "test-util")] - self.inner - .did_wake - .store(true, std::sync::atomic::Ordering::SeqCst); + self.did_wake.store(true, Ordering::SeqCst); + } + + cfg_test_util! { + pub(crate) fn did_wake(&self) -> bool { + self.did_wake.swap(false, Ordering::SeqCst) + } } } diff --git a/tokio/src/runtime/time/mod.rs b/tokio/src/runtime/time/mod.rs index f3d696fc469..c8459ea2b89 100644 --- a/tokio/src/runtime/time/mod.rs +++ b/tokio/src/runtime/time/mod.rs @@ -13,16 +13,17 @@ mod source; pub(crate) use source::TimeSource; mod wheel; +cfg_rt_and_time! { + pub(crate) use wheel::EntryHandle; +} +cfg_rt_or_time! { + pub(crate) use wheel::Wheel; +} use crate::loom::sync::atomic::{AtomicBool, Ordering}; -use crate::loom::sync::Mutex; -use crate::runtime::driver::{self, IoHandle, IoStack}; -use crate::time::error::Error; +use crate::loom::sync::Arc; +use crate::runtime::driver::{self, IoStack}; use crate::time::{Clock, Duration}; -use crate::util::WakeList; - -use std::fmt; -use std::{num::NonZeroU64, ptr::NonNull}; /// Time implementation that drives [`Sleep`][sleep], [`Interval`][interval], and [`Timeout`][timeout]. /// @@ -83,33 +84,8 @@ use std::{num::NonZeroU64, ptr::NonNull}; pub(crate) struct Driver { /// Parker to delegate to. park: IoStack, -} - -/// Timer state shared between `Driver`, `Handle`, and `Registration`. -struct Inner { - // The state is split like this so `Handle` can access `is_shutdown` without locking the mutex - state: Mutex, - - /// True if the driver is being shutdown. - is_shutdown: AtomicBool, - // When `true`, a call to `park_timeout` should immediately return and time - // should not advance. One reason for this to be `true` is if the task - // passed to `Runtime::block_on` called `task::yield_now()`. - // - // While it may look racy, it only has any effect when the clock is paused - // and pausing the clock is restricted to a single-threaded runtime. - #[cfg(feature = "test-util")] - did_wake: AtomicBool, -} - -/// Time state shared which must be protected by a `Mutex` -struct InnerState { - /// The earliest time at which we promise to wake up without unparking. - next_wake: Option, - - /// Timer wheel. - wheel: wheel::Wheel, + is_shutdown: Arc, } // ===== impl Driver ===== @@ -121,32 +97,26 @@ impl Driver { /// Specifying the source of time is useful when testing. pub(crate) fn new(park: IoStack, clock: &Clock) -> (Driver, Handle) { let time_source = TimeSource::new(clock); + let is_shutdown = Arc::new(AtomicBool::new(false)); let handle = Handle { time_source, - inner: Inner { - state: Mutex::new(InnerState { - next_wake: None, - wheel: wheel::Wheel::new(), - }), - is_shutdown: AtomicBool::new(false), - - #[cfg(feature = "test-util")] - did_wake: AtomicBool::new(false), - }, + is_shutdown: is_shutdown.clone(), + #[cfg(feature = "test-util")] + did_wake: Arc::new(AtomicBool::new(false)), }; - let driver = Driver { park }; + let driver = Driver { park, is_shutdown }; (driver, handle) } pub(crate) fn park(&mut self, handle: &driver::Handle) { - self.park_internal(handle, None); + self.park.park(handle); } pub(crate) fn park_timeout(&mut self, handle: &driver::Handle, duration: Duration) { - self.park_internal(handle, Some(duration)); + self.park.park_timeout(handle, duration); } pub(crate) fn shutdown(&mut self, rt_handle: &driver::Handle) { @@ -156,250 +126,10 @@ impl Driver { return; } - handle.inner.is_shutdown.store(true, Ordering::SeqCst); - - // Advance time forward to the end of time. - - handle.process_at_time(u64::MAX); - + self.is_shutdown.store(true, Ordering::SeqCst); self.park.shutdown(rt_handle); } - - fn park_internal(&mut self, rt_handle: &driver::Handle, limit: Option) { - let handle = rt_handle.time(); - let mut lock = handle.inner.lock(); - - assert!(!handle.is_shutdown()); - - let next_wake = lock.wheel.next_expiration_time(); - lock.next_wake = - next_wake.map(|t| NonZeroU64::new(t).unwrap_or_else(|| NonZeroU64::new(1).unwrap())); - - drop(lock); - - match next_wake { - Some(when) => { - let now = handle.time_source.now(rt_handle.clock()); - // Note that we effectively round up to 1ms here - this avoids - // very short-duration microsecond-resolution sleeps that the OS - // might treat as zero-length. - let mut duration = handle - .time_source - .tick_to_duration(when.saturating_sub(now)); - - if duration > Duration::from_millis(0) { - if let Some(limit) = limit { - duration = std::cmp::min(limit, duration); - } - - self.park_thread_timeout(rt_handle, duration); - } else { - self.park.park_timeout(rt_handle, Duration::from_secs(0)); - } - } - None => { - if let Some(duration) = limit { - self.park_thread_timeout(rt_handle, duration); - } else { - self.park.park(rt_handle); - } - } - } - - // Process pending timers after waking up - handle.process(rt_handle.clock()); - } - - cfg_test_util! { - fn park_thread_timeout(&mut self, rt_handle: &driver::Handle, duration: Duration) { - let handle = rt_handle.time(); - let clock = rt_handle.clock(); - - if clock.can_auto_advance() { - self.park.park_timeout(rt_handle, Duration::from_secs(0)); - - // If the time driver was woken, then the park completed - // before the "duration" elapsed (usually caused by a - // yield in `Runtime::block_on`). In this case, we don't - // advance the clock. - if !handle.did_wake() { - // Simulate advancing time - if let Err(msg) = clock.advance(duration) { - panic!("{}", msg); - } - } - } else { - self.park.park_timeout(rt_handle, duration); - } - } - } - - cfg_not_test_util! { - fn park_thread_timeout(&mut self, rt_handle: &driver::Handle, duration: Duration) { - self.park.park_timeout(rt_handle, duration); - } - } -} - -impl Handle { - pub(self) fn process(&self, clock: &Clock) { - let now = self.time_source().now(clock); - - self.process_at_time(now); - } - - pub(self) fn process_at_time(&self, mut now: u64) { - let mut waker_list = WakeList::new(); - - let mut lock = self.inner.lock(); - - if now < lock.wheel.elapsed() { - // Time went backwards! This normally shouldn't happen as the Rust language - // guarantees that an Instant is monotonic, but can happen when running - // Linux in a VM on a Windows host due to std incorrectly trusting the - // hardware clock to be monotonic. - // - // See for more information. - now = lock.wheel.elapsed(); - } - - while let Some(entry) = lock.wheel.poll(now) { - debug_assert!(unsafe { entry.is_pending() }); - - // SAFETY: We hold the driver lock, and just removed the entry from any linked lists. - if let Some(waker) = unsafe { entry.fire(Ok(())) } { - waker_list.push(waker); - - if !waker_list.can_push() { - // Wake a batch of wakers. To avoid deadlock, we must do this with the lock temporarily dropped. - drop(lock); - - waker_list.wake_all(); - - lock = self.inner.lock(); - } - } - } - - lock.next_wake = lock - .wheel - .poll_at() - .map(|t| NonZeroU64::new(t).unwrap_or_else(|| NonZeroU64::new(1).unwrap())); - - drop(lock); - - waker_list.wake_all(); - } - - /// Removes a registered timer from the driver. - /// - /// The timer will be moved to the cancelled state. Wakers will _not_ be - /// invoked. If the timer is already completed, this function is a no-op. - /// - /// This function always acquires the driver lock, even if the entry does - /// not appear to be registered. - /// - /// SAFETY: The timer must not be registered with some other driver, and - /// `add_entry` must not be called concurrently. - pub(self) unsafe fn clear_entry(&self, entry: NonNull) { - unsafe { - let mut lock = self.inner.lock(); - - if entry.as_ref().might_be_registered() { - lock.wheel.remove(entry); - } - - entry.as_ref().handle().fire(Ok(())); - } - } - - /// Removes and re-adds an entry to the driver. - /// - /// SAFETY: The timer must be either unregistered, or registered with this - /// driver. No other threads are allowed to concurrently manipulate the - /// timer at all (the current thread should hold an exclusive reference to - /// the `TimerEntry`) - pub(self) unsafe fn reregister( - &self, - unpark: &IoHandle, - new_tick: u64, - entry: NonNull, - ) { - let waker = unsafe { - let mut lock = self.inner.lock(); - - // We may have raced with a firing/deregistration, so check before - // deregistering. - if unsafe { entry.as_ref().might_be_registered() } { - lock.wheel.remove(entry); - } - - // Now that we have exclusive control of this entry, mint a handle to reinsert it. - let entry = entry.as_ref().handle(); - - if self.is_shutdown() { - unsafe { entry.fire(Err(crate::time::error::Error::shutdown())) } - } else { - entry.set_expiration(new_tick); - - // Note: We don't have to worry about racing with some other resetting - // thread, because add_entry and reregister require exclusive control of - // the timer entry. - match unsafe { lock.wheel.insert(entry) } { - Ok(when) => { - if lock - .next_wake - .map(|next_wake| when < next_wake.get()) - .unwrap_or(true) - { - unpark.unpark(); - } - - None - } - Err((entry, crate::time::error::InsertError::Elapsed)) => unsafe { - entry.fire(Ok(())) - }, - } - } - - // Must release lock before invoking waker to avoid the risk of deadlock. - }; - - // The timer was fired synchronously as a result of the reregistration. - // Wake the waker; this is needed because we might reset _after_ a poll, - // and otherwise the task won't be awoken to poll again. - if let Some(waker) = waker { - waker.wake(); - } - } - - cfg_test_util! { - fn did_wake(&self) -> bool { - self.inner.did_wake.swap(false, Ordering::SeqCst) - } - } -} - -// ===== impl Inner ===== - -impl Inner { - /// Locks the driver's inner structure - pub(super) fn lock(&self) -> crate::loom::sync::MutexGuard<'_, InnerState> { - self.state.lock() - } - - // Check whether the driver has been shutdown - pub(super) fn is_shutdown(&self) -> bool { - self.is_shutdown.load(Ordering::SeqCst) - } -} - -impl fmt::Debug for Inner { - fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt.debug_struct("Inner").finish() - } } -#[cfg(test)] -mod tests; +// #[cfg(test)] +// mod tests; diff --git a/tokio/src/runtime/time/source.rs b/tokio/src/runtime/time/source.rs index e3ba8d790c0..8f53c63d2d9 100644 --- a/tokio/src/runtime/time/source.rs +++ b/tokio/src/runtime/time/source.rs @@ -1,4 +1,3 @@ -use super::MAX_SAFE_MILLIS_DURATION; use crate::time::{Clock, Duration, Instant}; /// A structure which handles conversion from Instants to `u64` timestamps. @@ -22,11 +21,7 @@ impl TimeSource { pub(crate) fn instant_to_tick(&self, t: Instant) -> u64 { // round up let dur: Duration = t.saturating_duration_since(self.start_time); - let ms = dur - .as_millis() - .try_into() - .unwrap_or(MAX_SAFE_MILLIS_DURATION); - ms.min(MAX_SAFE_MILLIS_DURATION) + dur.as_millis().try_into().unwrap_or(u64::MAX) } pub(crate) fn tick_to_duration(&self, t: u64) -> Duration { From 4b0a86600374a3fdfd47256ba0baa0cf43152c53 Mon Sep 17 00:00:00 2001 From: Qi Date: Sun, 13 Jul 2025 15:36:59 +0800 Subject: [PATCH 004/100] add local timer wheel to the current_thread scheduler Signed-off-by: ADD-SP --- .../runtime/scheduler/current_thread/mod.rs | 53 ++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/tokio/src/runtime/scheduler/current_thread/mod.rs b/tokio/src/runtime/scheduler/current_thread/mod.rs index 2097d34606a..58b559485cc 100644 --- a/tokio/src/runtime/scheduler/current_thread/mod.rs +++ b/tokio/src/runtime/scheduler/current_thread/mod.rs @@ -1,10 +1,11 @@ use crate::loom::sync::atomic::AtomicBool; -use crate::loom::sync::Arc; +use crate::loom::sync::{Arc, Mutex}; use crate::runtime::driver::{self, Driver}; use crate::runtime::scheduler::{self, Defer, Inject}; use crate::runtime::task::{ self, JoinHandle, OwnedTasks, Schedule, SpawnLocation, Task, TaskHarnessScheduleHooks, }; +use crate::runtime::time::{EntryHandle, Wheel}; use crate::runtime::{ blocking, context, Config, MetricsBatch, SchedulerMetrics, TaskHooks, TaskMeta, WorkerMetrics, }; @@ -16,6 +17,7 @@ use std::cell::RefCell; use std::collections::VecDeque; use std::future::{poll_fn, Future}; use std::sync::atomic::Ordering::{AcqRel, Release}; +use std::sync::mpsc; use std::task::Poll::{Pending, Ready}; use std::task::Waker; use std::thread::ThreadId; @@ -62,6 +64,15 @@ struct Core { /// Current tick tick: u32, + /// Worker local timer wheel + wheel: Wheel, + + /// Channel for sending timers that need to be cancelled + timer_cancel_tx: mpsc::Sender, + + /// Channel for receiving timers that need to be cancelled + timer_cancel_rx: mpsc::Receiver, + /// Runtime driver /// /// The driver is removed before starting to park the thread @@ -83,6 +94,11 @@ struct Shared { /// Remote run queue inject: Inject>, + /// Timers pending to be registered. + /// This is used to register a timer but the [`Core`] + /// is not available in the current thread. + inject_timers: Mutex>, + /// Collection of all active tasks spawned onto this executor. owned: OwnedTasks>, @@ -152,6 +168,7 @@ impl CurrentThread { }, shared: Shared { inject: Inject::new(), + inject_timers: Mutex::new(Vec::new()), owned: OwnedTasks::new(1), woken: AtomicBool::new(false), config, @@ -164,9 +181,13 @@ impl CurrentThread { local_tid, }); + let (timer_cancel_tx, timer_cancel_rx) = mpsc::channel(); let core = AtomicCell::new(Some(Box::new(Core { tasks: VecDeque::with_capacity(INITIAL_CAPACITY), tick: 0, + wheel: Wheel::new(), + timer_cancel_tx, + timer_cancel_rx, driver: Some(driver), metrics: MetricsBatch::new(&handle.shared.worker_metrics), global_queue_interval, @@ -439,6 +460,27 @@ impl Context { pub(crate) fn defer(&self, waker: &Waker) { self.defer.defer(waker); } + + fn with_core(&self, f: F) -> R + where + F: FnOnce(Option<&mut Core>) -> R, + { + let mut core = self.core.borrow_mut(); + f(core.as_mut().map(|c| c.as_mut())) + } + + pub(crate) fn with_wheel(&self, f: F) -> R + where + F: FnOnce(Option<(&mut Wheel, mpsc::Sender)>) -> R, + { + self.with_core(|maybe_core| { + if let Some(core) = maybe_core { + f(Some((&mut core.wheel, core.timer_cancel_tx.clone()))) + } else { + f(None) + } + }) + } } // ===== impl Handle ===== @@ -584,6 +626,15 @@ impl Handle { assert_eq!(0, worker); &self.shared.worker_metrics } + + /// Push a timer handle from the remote thread. + pub(crate) fn push_remote_timer(&self, entry: EntryHandle) { + { + let mut inject_timers = self.shared.inject_timers.lock(); + inject_timers.push(entry); + } + self.driver.unpark(); + } } cfg_unstable_metrics! { From 78d433f05ccabdc3486d3545574abb33f828b13e Mon Sep 17 00:00:00 2001 From: Qi Date: Sun, 13 Jul 2025 15:46:06 +0800 Subject: [PATCH 005/100] add local timer wheel to the multi_thread scheduler Signed-off-by: ADD-SP --- .../runtime/scheduler/multi_thread/worker.rs | 54 +++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/tokio/src/runtime/scheduler/multi_thread/worker.rs b/tokio/src/runtime/scheduler/multi_thread/worker.rs index 7ec3f126467..5ae0bdce79b 100644 --- a/tokio/src/runtime/scheduler/multi_thread/worker.rs +++ b/tokio/src/runtime/scheduler/multi_thread/worker.rs @@ -63,6 +63,7 @@ use crate::runtime::scheduler::multi_thread::{ }; use crate::runtime::scheduler::{inject, Defer, Lock}; use crate::runtime::task::OwnedTasks; +use crate::runtime::time::{EntryHandle, Wheel}; use crate::runtime::{blocking, driver, scheduler, task, Config, SchedulerMetrics, WorkerMetrics}; use crate::runtime::{context, TaskHooks}; use crate::task::coop; @@ -73,6 +74,7 @@ use std::cell::RefCell; use std::task::Waker; use std::thread; use std::time::Duration; +use std::sync::mpsc; mod metrics; @@ -115,6 +117,15 @@ struct Core { /// The worker-local run queue. run_queue: queue::Local>, + /// Worker local timer wheel + wheel: Wheel, + + /// Channel for sending timers that need to be cancelled + timer_cancel_tx: mpsc::Sender, + + /// Channel for receiving timers that need to be cancelled + timer_cancel_rx: mpsc::Receiver, + /// True if the worker is currently searching for more work. Searching /// involves attempting to steal from other workers. is_searching: bool, @@ -193,6 +204,11 @@ pub(crate) struct Synced { /// Synchronized state for `Inject`. pub(crate) inject: inject::Synced, + + /// Timers pending to be registered. + /// This is used to register a timer but the [`Core`] + /// is not available in the current thread. + inject_timers: Vec, } /// Used to communicate with a worker from other threads. @@ -254,12 +270,17 @@ pub(super) fn create( let unpark = park.unpark(); let metrics = WorkerMetrics::from_config(&config); let stats = Stats::new(&metrics); + let wheel = Wheel::new(); + let (timer_cancel_tx, timer_cancel_rx) = mpsc::channel(); cores.push(Box::new(Core { tick: 0, lifo_slot: None, lifo_enabled: !config.disable_lifo_slot, run_queue, + wheel, + timer_cancel_tx, + timer_cancel_rx, is_searching: false, is_shutdown: false, is_traced: false, @@ -287,6 +308,7 @@ pub(super) fn create( synced: Mutex::new(Synced { idle: idle_synced, inject: inject_synced, + inject_timers: vec![], }), shutdown_cores: Mutex::new(vec![]), trace_status: TraceStatus::new(remotes_len), @@ -793,6 +815,29 @@ impl Context { self.defer.defer(waker); } } + + fn with_core(&self, f: F) -> R + where + F: FnOnce(Option<&mut Core>) -> R, + { + match self.core.borrow_mut().as_mut() { + Some(core) => f(Some(core)), + None => f(None), + } + } + + pub(crate) fn with_wheel(&self, f: F) -> R + where + F: FnOnce(Option<(&mut Wheel, mpsc::Sender)>) -> R, + { + self.with_core(|core| { + if let Some(core) = core { + f(Some((&mut core.wheel, core.timer_cancel_tx.clone()))) + } else { + f(None) + } + }) + } } impl Core { @@ -1131,6 +1176,15 @@ impl Handle { } } + /// Push a timer handle from the remote thread. + pub(crate) fn push_remote_timer(&self, hdl: EntryHandle) { + { + let mut synced = self.shared.synced.lock(); + synced.inject_timers.push(hdl); + } + self.notify_parked_remote(); + } + pub(super) fn close(&self) { if self .shared From 4727a56d5895ce3d77161b513e30eec964c9ba11 Mon Sep 17 00:00:00 2001 From: Qi Date: Sun, 13 Jul 2025 16:24:22 +0800 Subject: [PATCH 006/100] remove `tokio/src/runtime/time/entry.rs` Signed-off-by: ADD-SP --- tokio/src/runtime/time/entry.rs | 687 -------------------------- tokio/src/runtime/time/wheel/entry.rs | 44 +- tokio/src/runtime/time/wheel/level.rs | 6 +- tokio/src/runtime/time/wheel/mod.rs | 63 ++- 4 files changed, 67 insertions(+), 733 deletions(-) delete mode 100644 tokio/src/runtime/time/entry.rs diff --git a/tokio/src/runtime/time/entry.rs b/tokio/src/runtime/time/entry.rs deleted file mode 100644 index 627fcbc5ec3..00000000000 --- a/tokio/src/runtime/time/entry.rs +++ /dev/null @@ -1,687 +0,0 @@ -//! Timer state structures. -//! -//! This module contains the heart of the intrusive timer implementation, and as -//! such the structures inside are full of tricky concurrency and unsafe code. -//! -//! # Ground rules -//! -//! The heart of the timer implementation here is the [`TimerShared`] structure, -//! shared between the [`TimerEntry`] and the driver. Generally, we permit access -//! to [`TimerShared`] ONLY via either 1) a mutable reference to [`TimerEntry`] or -//! 2) a held driver lock. -//! -//! It follows from this that any changes made while holding BOTH 1 and 2 will -//! be reliably visible, regardless of ordering. This is because of the `acq/rel` -//! fences on the driver lock ensuring ordering with 2, and rust mutable -//! reference rules for 1 (a mutable reference to an object can't be passed -//! between threads without an `acq/rel` barrier, and same-thread we have local -//! happens-before ordering). -//! -//! # State field -//! -//! Each timer has a state field associated with it. This field contains either -//! the current scheduled time, or a special flag value indicating its state. -//! This state can either indicate that the timer is on the 'pending' queue (and -//! thus will be fired with an `Ok(())` result soon) or that it has already been -//! fired/deregistered. -//! -//! This single state field allows for code that is firing the timer to -//! synchronize with any racing `reset` calls reliably. -//! -//! # Registered vs true timeouts -//! -//! To allow for the use case of a timeout that is periodically reset before -//! expiration to be as lightweight as possible, we support optimistically -//! lock-free timer resets, in the case where a timer is rescheduled to a later -//! point than it was originally scheduled for. -//! -//! This is accomplished by lazily rescheduling timers. That is, we update the -//! state field with the true expiration of the timer from the holder of -//! the [`TimerEntry`]. When the driver services timers (ie, whenever it's -//! walking lists of timers), it checks this "true when" value, and reschedules -//! based on it. -//! -//! We do, however, also need to track what the expiration time was when we -//! originally registered the timer; this is used to locate the right linked -//! list when the timer is being cancelled. -//! This is referred to as the `registered_when` internally. -//! -//! There is of course a race condition between timer reset and timer -//! expiration. If the driver fails to observe the updated expiration time, it -//! could trigger expiration of the timer too early. However, because -//! [`mark_pending`][mark_pending] performs a compare-and-swap, it will identify this race and -//! refuse to mark the timer as pending. -//! -//! [mark_pending]: TimerHandle::mark_pending - -use crate::loom::cell::UnsafeCell; -use crate::loom::sync::atomic::AtomicU64; -use crate::loom::sync::atomic::Ordering; - -use crate::runtime::scheduler; -use crate::sync::AtomicWaker; -use crate::time::Instant; -use crate::util::linked_list; - -use pin_project_lite::pin_project; -use std::task::{Context, Poll, Waker}; -use std::{marker::PhantomPinned, pin::Pin, ptr::NonNull}; - -type TimerResult = Result<(), crate::time::error::Error>; - -pub(in crate::runtime::time) const STATE_DEREGISTERED: u64 = u64::MAX; -const STATE_PENDING_FIRE: u64 = STATE_DEREGISTERED - 1; -const STATE_MIN_VALUE: u64 = STATE_PENDING_FIRE; -/// The largest safe integer to use for ticks. -/// -/// This value should be updated if any other signal values are added above. -pub(super) const MAX_SAFE_MILLIS_DURATION: u64 = STATE_MIN_VALUE - 1; - -/// This structure holds the current shared state of the timer - its scheduled -/// time (if registered), or otherwise the result of the timer completing, as -/// well as the registered waker. -/// -/// Generally, the `StateCell` is only permitted to be accessed from two contexts: -/// Either a thread holding the corresponding `&mut TimerEntry`, or a thread -/// holding the timer driver lock. The write actions on the `StateCell` amount to -/// passing "ownership" of the `StateCell` between these contexts; moving a timer -/// from the `TimerEntry` to the driver requires _both_ holding the `&mut -/// TimerEntry` and the driver lock, while moving it back (firing the timer) -/// requires only the driver lock. -pub(super) struct StateCell { - /// Holds either the scheduled expiration time for this timer, or (if the - /// timer has been fired and is unregistered), `u64::MAX`. - state: AtomicU64, - /// If the timer is fired (an Acquire order read on state shows - /// `u64::MAX`), holds the result that should be returned from - /// polling the timer. Otherwise, the contents are unspecified and reading - /// without holding the driver lock is undefined behavior. - result: UnsafeCell, - /// The currently-registered waker - waker: AtomicWaker, -} - -impl Default for StateCell { - fn default() -> Self { - Self::new() - } -} - -impl std::fmt::Debug for StateCell { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "StateCell({:?})", self.read_state()) - } -} - -impl StateCell { - fn new() -> Self { - Self { - state: AtomicU64::new(STATE_DEREGISTERED), - result: UnsafeCell::new(Ok(())), - waker: AtomicWaker::new(), - } - } - - fn is_pending(&self) -> bool { - self.state.load(Ordering::Relaxed) == STATE_PENDING_FIRE - } - - /// Returns the current expiration time, or None if not currently scheduled. - fn when(&self) -> Option { - let cur_state = self.state.load(Ordering::Relaxed); - - if cur_state == STATE_DEREGISTERED { - None - } else { - Some(cur_state) - } - } - - /// If the timer is completed, returns the result of the timer. Otherwise, - /// returns None and registers the waker. - fn poll(&self, waker: &Waker) -> Poll { - // We must register first. This ensures that either `fire` will - // observe the new waker, or we will observe a racing fire to have set - // the state, or both. - self.waker.register_by_ref(waker); - - self.read_state() - } - - fn read_state(&self) -> Poll { - let cur_state = self.state.load(Ordering::Acquire); - - if cur_state == STATE_DEREGISTERED { - // SAFETY: The driver has fired this timer; this involves writing - // the result, and then writing (with release ordering) the state - // field. - Poll::Ready(unsafe { self.result.with(|p| *p) }) - } else { - Poll::Pending - } - } - - /// Marks this timer as being moved to the pending list, if its scheduled - /// time is not after `not_after`. - /// - /// If the timer is scheduled for a time after `not_after`, returns an Err - /// containing the current scheduled time. - /// - /// SAFETY: Must hold the driver lock. - unsafe fn mark_pending(&self, not_after: u64) -> Result<(), u64> { - // Quick initial debug check to see if the timer is already fired. Since - // firing the timer can only happen with the driver lock held, we know - // we shouldn't be able to "miss" a transition to a fired state, even - // with relaxed ordering. - let mut cur_state = self.state.load(Ordering::Relaxed); - - loop { - // improve the error message for things like - // https://github.com/tokio-rs/tokio/issues/3675 - assert!( - cur_state < STATE_MIN_VALUE, - "mark_pending called when the timer entry is in an invalid state" - ); - - if cur_state > not_after { - break Err(cur_state); - } - - match self.state.compare_exchange_weak( - cur_state, - STATE_PENDING_FIRE, - Ordering::AcqRel, - Ordering::Acquire, - ) { - Ok(_) => break Ok(()), - Err(actual_state) => cur_state = actual_state, - } - } - } - - /// Fires the timer, setting the result to the provided result. - /// - /// Returns: - /// * `Some(waker)` - if fired and a waker needs to be invoked once the - /// driver lock is released - /// * `None` - if fired and a waker does not need to be invoked, or if - /// already fired - /// - /// SAFETY: The driver lock must be held. - unsafe fn fire(&self, result: TimerResult) -> Option { - // Quick initial check to see if the timer is already fired. Since - // firing the timer can only happen with the driver lock held, we know - // we shouldn't be able to "miss" a transition to a fired state, even - // with relaxed ordering. - let cur_state = self.state.load(Ordering::Relaxed); - if cur_state == STATE_DEREGISTERED { - return None; - } - - // SAFETY: We assume the driver lock is held and the timer is not - // fired, so only the driver is accessing this field. - // - // We perform a release-ordered store to state below, to ensure this - // write is visible before the state update is visible. - unsafe { self.result.with_mut(|p| *p = result) }; - - self.state.store(STATE_DEREGISTERED, Ordering::Release); - - self.waker.take_waker() - } - - /// Marks the timer as registered (poll will return None) and sets the - /// expiration time. - /// - /// While this function is memory-safe, it should only be called from a - /// context holding both `&mut TimerEntry` and the driver lock. - fn set_expiration(&self, timestamp: u64) { - debug_assert!(timestamp < STATE_MIN_VALUE); - - // We can use relaxed ordering because we hold the driver lock and will - // fence when we release the lock. - self.state.store(timestamp, Ordering::Relaxed); - } - - /// Attempts to adjust the timer to a new timestamp. - /// - /// If the timer has already been fired, is pending firing, or the new - /// timestamp is earlier than the old timestamp, (or occasionally - /// spuriously) returns Err without changing the timer's state. In this - /// case, the timer must be deregistered and re-registered. - fn extend_expiration(&self, new_timestamp: u64) -> Result<(), ()> { - let mut prior = self.state.load(Ordering::Relaxed); - loop { - if new_timestamp < prior || prior >= STATE_MIN_VALUE { - return Err(()); - } - - match self.state.compare_exchange_weak( - prior, - new_timestamp, - Ordering::AcqRel, - Ordering::Acquire, - ) { - Ok(_) => return Ok(()), - Err(true_prior) => prior = true_prior, - } - } - } - - /// Returns true if the state of this timer indicates that the timer might - /// be registered with the driver. This check is performed with relaxed - /// ordering, but is conservative - if it returns false, the timer is - /// definitely _not_ registered. - pub(super) fn might_be_registered(&self) -> bool { - self.state.load(Ordering::Relaxed) != STATE_DEREGISTERED - } -} - -pin_project! { - // A timer entry. - // - // This is the handle to a timer that is controlled by the requester of the - // timer. As this participates in intrusive data structures, it must be pinned - // before polling. - #[derive(Debug)] - pub(crate) struct TimerEntry { - // Arc reference to the runtime handle. We can only free the driver after - // deregistering everything from their respective timer wheels. - driver: scheduler::Handle, - // Shared inner structure; this is part of an intrusive linked list, and - // therefore other references can exist to it while mutable references to - // Entry exist. - // - // This is manipulated only under the inner mutex. - #[pin] - inner: Option, - // Deadline for the timer. This is used to register on the first - // poll, as we can't register prior to being pinned. - deadline: Instant, - // Whether the deadline has been registered. - registered: bool, - } - - impl PinnedDrop for TimerEntry { - fn drop(this: Pin<&mut Self>) { - this.cancel(); - } - } -} - -unsafe impl Send for TimerEntry {} -unsafe impl Sync for TimerEntry {} - -/// An `TimerHandle` is the (non-enforced) "unique" pointer from the driver to the -/// timer entry. Generally, at most one `TimerHandle` exists for a timer at a time -/// (enforced by the timer state machine). -/// -/// SAFETY: An `TimerHandle` is essentially a raw pointer, and the usual caveats -/// of pointer safety apply. In particular, `TimerHandle` does not itself enforce -/// that the timer does still exist; however, normally an `TimerHandle` is created -/// immediately before registering the timer, and is consumed when firing the -/// timer, to help minimize mistakes. Still, because `TimerHandle` cannot enforce -/// memory safety, all operations are unsafe. -#[derive(Debug)] -pub(crate) struct TimerHandle { - inner: NonNull, -} - -pub(super) type EntryList = crate::util::linked_list::LinkedList; - -/// The shared state structure of a timer. This structure is shared between the -/// frontend (`Entry`) and driver backend. -/// -/// Note that this structure is located inside the `TimerEntry` structure. -pub(crate) struct TimerShared { - /// A link within the doubly-linked list of timers on a particular level and - /// slot. Valid only if state is equal to Registered. - /// - /// Only accessed under the entry lock. - pointers: linked_list::Pointers, - - /// The time when the [`TimerEntry`] was registered into the Wheel, - /// [`STATE_DEREGISTERED`] means it is not registered. - /// - /// Generally owned by the driver, but is accessed by the entry when not - /// registered. - /// - /// We use relaxed ordering for both loading and storing since this value - /// is only accessed either when holding the driver lock or through mutable - /// references to [`TimerEntry`]. - registered_when: AtomicU64, - - /// Current state. This records whether the timer entry is currently under - /// the ownership of the driver, and if not, its current state (not - /// complete, fired, error, etc). - state: StateCell, - - _p: PhantomPinned, -} - -unsafe impl Send for TimerShared {} -unsafe impl Sync for TimerShared {} - -impl std::fmt::Debug for TimerShared { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("TimerShared") - .field( - "registered_when", - &self.registered_when.load(Ordering::Relaxed), - ) - .field("state", &self.state) - .finish() - } -} - -generate_addr_of_methods! { - impl<> TimerShared { - unsafe fn addr_of_pointers(self: NonNull) -> NonNull> { - &self.pointers - } - } -} - -impl TimerShared { - pub(super) fn new() -> Self { - Self { - registered_when: AtomicU64::new(0), - pointers: linked_list::Pointers::new(), - state: StateCell::default(), - _p: PhantomPinned, - } - } - - /// Gets the cached time-of-expiration value. - pub(super) fn registered_when(&self) -> u64 { - // Cached-when is only accessed under the driver lock, so we can use relaxed - self.registered_when.load(Ordering::Relaxed) - } - - /// Gets the true time-of-expiration value, and copies it into the cached - /// time-of-expiration value. - /// - /// SAFETY: Must be called with the driver lock held, and when this entry is - /// not in any timer wheel lists. - pub(super) unsafe fn sync_when(&self) -> u64 { - let true_when = self.true_when(); - - self.registered_when.store(true_when, Ordering::Relaxed); - - true_when - } - - /// Sets the cached time-of-expiration value. - /// - /// SAFETY: Must be called with the driver lock held, and when this entry is - /// not in any timer wheel lists. - unsafe fn set_registered_when(&self, when: u64) { - self.registered_when.store(when, Ordering::Relaxed); - } - - /// Returns the true time-of-expiration value, with relaxed memory ordering. - pub(super) fn true_when(&self) -> u64 { - self.state.when().expect("Timer already fired") - } - - /// Sets the true time-of-expiration value, even if it is less than the - /// current expiration or the timer is deregistered. - /// - /// SAFETY: Must only be called with the driver lock held and the entry not - /// in the timer wheel. - pub(super) unsafe fn set_expiration(&self, t: u64) { - self.state.set_expiration(t); - self.registered_when.store(t, Ordering::Relaxed); - } - - /// Sets the true time-of-expiration only if it is after the current. - pub(super) fn extend_expiration(&self, t: u64) -> Result<(), ()> { - self.state.extend_expiration(t) - } - - /// Returns a `TimerHandle` for this timer. - pub(super) fn handle(&self) -> TimerHandle { - TimerHandle { - inner: NonNull::from(self), - } - } - - /// Returns true if the state of this timer indicates that the timer might - /// be registered with the driver. This check is performed with relaxed - /// ordering, but is conservative - if it returns false, the timer is - /// definitely _not_ registered. - pub(super) fn might_be_registered(&self) -> bool { - self.state.might_be_registered() - } -} - -unsafe impl linked_list::Link for TimerShared { - type Handle = TimerHandle; - - type Target = TimerShared; - - fn as_raw(handle: &Self::Handle) -> NonNull { - handle.inner - } - - unsafe fn from_raw(ptr: NonNull) -> Self::Handle { - TimerHandle { inner: ptr } - } - - unsafe fn pointers( - target: NonNull, - ) -> NonNull> { - TimerShared::addr_of_pointers(target) - } -} - -// ===== impl Entry ===== - -impl TimerEntry { - #[track_caller] - pub(crate) fn new(handle: scheduler::Handle, deadline: Instant) -> Self { - // Panic if the time driver is not enabled - let _ = handle.driver().time(); - - Self { - driver: handle, - inner: None, - deadline, - registered: false, - } - } - - fn inner(&self) -> Option<&TimerShared> { - self.inner.as_ref() - } - - fn init_inner(self: Pin<&mut Self>) { - match self.inner { - Some(_) => {} - None => self.project().inner.set(Some(TimerShared::new())), - } - } - - pub(crate) fn deadline(&self) -> Instant { - self.deadline - } - - pub(crate) fn is_elapsed(&self) -> bool { - let Some(inner) = self.inner() else { - return false; - }; - - // Is this timer still in the timer wheel? - let deregistered = !inner.might_be_registered(); - - // Once the timer has expired, - // it will be taken out of the wheel and be fired. - // - // So if we have already registered the timer into the wheel, - // but now it is not in the wheel, it means that it has been - // fired. - // - // +--------------+-----------------+----------+ - // | deregistered | self.registered | output | - // +--------------+-----------------+----------+ - // | true | false | false | <- never been registered - // +--------------+-----------------+----------+ - // | false | false | false | <- never been registered - // +--------------+-----------------+----------+ - // | true | true | true | <- registered into the wheel, - // | | | | and then taken out of the wheel. - // +--------------+-----------------+----------+ - // | false | true | false | <- still registered in the wheel - // +--------------+-----------------+----------+ - deregistered && self.registered - } - - /// Cancels and deregisters the timer. This operation is irreversible. - pub(crate) fn cancel(self: Pin<&mut Self>) { - // Avoid calling the `clear_entry` method, because it has not been initialized yet. - let Some(inner) = self.inner() else { - return; - }; - - // We need to perform an acq/rel fence with the driver thread, and the - // simplest way to do so is to grab the driver lock. - // - // Why is this necessary? We're about to release this timer's memory for - // some other non-timer use. However, we've been doing a bunch of - // relaxed (or even non-atomic) writes from the driver thread, and we'll - // be doing more from _this thread_ (as this memory is interpreted as - // something else). - // - // It is critical to ensure that, from the point of view of the driver, - // those future non-timer writes happen-after the timer is fully fired, - // and from the purpose of this thread, the driver's writes all - // happen-before we drop the timer. This in turn requires us to perform - // an acquire-release barrier in _both_ directions between the driver - // and dropping thread. - // - // The lock acquisition in clear_entry serves this purpose. All of the - // driver manipulations happen with the lock held, so we can just take - // the lock and be sure that this drop happens-after everything the - // driver did so far and happens-before everything the driver does in - // the future. While we have the lock held, we also go ahead and - // deregister the entry if necessary. - unsafe { self.driver().clear_entry(NonNull::from(inner)) }; - } - - pub(crate) fn reset(mut self: Pin<&mut Self>, new_time: Instant, reregister: bool) { - let this = self.as_mut().project(); - *this.deadline = new_time; - *this.registered = reregister; - - let tick = self.driver().time_source().deadline_to_tick(new_time); - let inner = match self.inner() { - Some(inner) => inner, - None => { - self.as_mut().init_inner(); - self.inner() - .expect("inner should already be initialized by `this.init_inner()`") - } - }; - - if inner.extend_expiration(tick).is_ok() { - return; - } - - if reregister { - unsafe { - self.driver() - .reregister(&self.driver.driver().io, tick, inner.into()); - } - } - } - - pub(crate) fn poll_elapsed( - mut self: Pin<&mut Self>, - cx: &mut Context<'_>, - ) -> Poll> { - assert!( - !self.driver().is_shutdown(), - "{}", - crate::util::error::RUNTIME_SHUTTING_DOWN_ERROR - ); - - if !self.registered { - let deadline = self.deadline; - self.as_mut().reset(deadline, true); - } - - let inner = self - .inner() - .expect("inner should already be initialized by `self.reset()`"); - inner.state.poll(cx.waker()) - } - - pub(crate) fn driver(&self) -> &super::Handle { - self.driver.driver().time() - } - - #[cfg(all(tokio_unstable, feature = "tracing"))] - pub(crate) fn clock(&self) -> &super::Clock { - self.driver.driver().clock() - } -} - -impl TimerHandle { - pub(super) unsafe fn registered_when(&self) -> u64 { - unsafe { self.inner.as_ref().registered_when() } - } - - pub(super) unsafe fn sync_when(&self) -> u64 { - unsafe { self.inner.as_ref().sync_when() } - } - - pub(super) unsafe fn is_pending(&self) -> bool { - unsafe { self.inner.as_ref().state.is_pending() } - } - - /// Forcibly sets the true and cached expiration times to the given tick. - /// - /// SAFETY: The caller must ensure that the handle remains valid, the driver - /// lock is held, and that the timer is not in any wheel linked lists. - pub(super) unsafe fn set_expiration(&self, tick: u64) { - self.inner.as_ref().set_expiration(tick); - } - - /// Attempts to mark this entry as pending. If the expiration time is after - /// `not_after`, however, returns an Err with the current expiration time. - /// - /// If an `Err` is returned, the `registered_when` value will be updated to this - /// new expiration time. - /// - /// SAFETY: The caller must ensure that the handle remains valid, the driver - /// lock is held, and that the timer is not in any wheel linked lists. - /// After returning Ok, the entry must be added to the pending list. - pub(super) unsafe fn mark_pending(&self, not_after: u64) -> Result<(), u64> { - match self.inner.as_ref().state.mark_pending(not_after) { - Ok(()) => { - // mark this as being on the pending queue in registered_when - self.inner.as_ref().set_registered_when(STATE_DEREGISTERED); - Ok(()) - } - Err(tick) => { - self.inner.as_ref().set_registered_when(tick); - Err(tick) - } - } - } - - /// Attempts to transition to a terminal state. If the state is already a - /// terminal state, does nothing. - /// - /// Because the entry might be dropped after the state is moved to a - /// terminal state, this function consumes the handle to ensure we don't - /// access the entry afterwards. - /// - /// Returns the last-registered waker, if any. - /// - /// SAFETY: The driver lock must be held while invoking this function, and - /// the entry must not be in any wheel linked lists. - pub(super) unsafe fn fire(self, completed_state: TimerResult) -> Option { - self.inner.as_ref().state.fire(completed_state) - } -} diff --git a/tokio/src/runtime/time/wheel/entry.rs b/tokio/src/runtime/time/wheel/entry.rs index 7e280989c2b..abdf1df6838 100644 --- a/tokio/src/runtime/time/wheel/entry.rs +++ b/tokio/src/runtime/time/wheel/entry.rs @@ -1,7 +1,6 @@ use crate::loom::sync::atomic::{AtomicU8, Ordering::*}; use crate::loom::sync::{Arc, Mutex}; use crate::{sync::AtomicWaker, util::linked_list}; -use std::marker::PhantomPinned; use std::ptr::NonNull; use std::sync::mpsc::Sender; use std::task::Waker; @@ -16,12 +15,14 @@ const STATE_UNREGISTERED: u8 = 0; const STATE_REGISTERED: u8 = 2; /// The entry is in the pending queue of the timer wheel, -/// and not in any wheel level. +/// and not in any wheel level, which means that +/// the entry is reached its deadline and waiting to be woken up. const STATE_PENDING: u8 = 3; /// The waker has been called, and the entry is no longer in the timer wheel -/// (both each wheel level and the pending queue). -const STATE_FIRED: u8 = 4;/// The entry in the timer wheel. +/// (both each wheel level and the pending queue), which means that +/// the entry is reached its deadline and woken up. +const STATE_WOKEN_UP: u8 = 4; #[derive(Debug)] struct Inner { @@ -32,13 +33,11 @@ struct Inner { waker: AtomicWaker, /// The mpsc channel used to cancel the entry. - // Since the race is very unlikely, we use `Mutex` here + // Since the contention is very unlikely, we use `Mutex` here // for lower complexity. cancel_tx: Mutex>>, state: AtomicU8, - - _pin: PhantomPinned, } /// The entry in the timer wheel. @@ -47,8 +46,6 @@ pub(crate) struct Entry { pointers: linked_list::Pointers, inner: Arc, - - _pin: PhantomPinned, } generate_addr_of_methods! { @@ -78,8 +75,10 @@ unsafe impl linked_list::Link for Entry { } } -/// Another version of [`Handle`] which doesn't [`Arc::clone`] -/// the [`Inner`], this is used for intrusive linked list. +/// Raw handle used by the intrusive linked list. +// It makes no sense to `Arc::clone()` the `Inner` +// while operating on the linked list, +// so we only use a raw pointer here. pub(crate) struct RawHandle { ptr: NonNull, } @@ -105,8 +104,18 @@ pub(crate) struct Handle { inner: Arc, } -/// Safety: [`Inner`] is protected by atomic variables and [`Mutex`], +/// Safety: +/// +/// 1. [`Self::inner`] is clearly [`Send`]. +/// 2. AND caller guarantees that the [`Self::drop_entry`] is only called +/// when the entry is no longer in the timer wheel and still valid. unsafe impl Send for Handle {} + +/// Safety: +/// +/// 1. [`Self::inner`] is clearly [`Sync`]. +/// 2. AND caller guarantees that the [`Self::drop_entry`] is only called +/// when the entry is no longer in the timer wheel and still valid. unsafe impl Sync for Handle {} impl Handle { @@ -116,15 +125,14 @@ impl Handle { waker: AtomicWaker::new(), cancel_tx: Mutex::new(None), state: AtomicU8::new(STATE_UNREGISTERED), - _pin: PhantomPinned, }); inner.waker.register_by_ref(waker); let ptr = Box::into_raw(Box::new(Entry { pointers: linked_list::Pointers::new(), inner: Arc::clone(&inner), - _pin: PhantomPinned, })); + // Safety: `Box::into_raw` always returns a valid pointer let ptr = unsafe { NonNull::new_unchecked(ptr) }; Handle { ptr, inner } @@ -136,7 +144,7 @@ impl Handle { /// /// Panics if the entry is not transitioned to the pending state. pub(crate) fn wake(&self) { - let old = self.inner.state.swap(STATE_FIRED, SeqCst); + let old = self.inner.state.swap(STATE_WOKEN_UP, SeqCst); assert!(old == STATE_PENDING); self.inner.waker.wake(); } @@ -147,7 +155,7 @@ impl Handle { /// /// Panics if the entry is not in the unregistered state. pub(crate) fn wake_unregistered(&self) { - let old = self.inner.state.swap(STATE_FIRED, SeqCst); + let old = self.inner.state.swap(STATE_WOKEN_UP, SeqCst); assert!(old == STATE_UNREGISTERED); self.inner.waker.wake(); } @@ -213,8 +221,8 @@ impl Handle { self.inner.state.fetch_or(0, SeqCst) == STATE_PENDING } - pub(crate) fn is_fired(&self) -> bool { - self.inner.state.fetch_or(0, SeqCst) == STATE_FIRED + pub(crate) fn is_woken_up(&self) -> bool { + self.inner.state.fetch_or(0, SeqCst) == STATE_WOKEN_UP } pub(crate) fn as_raw(&self) -> RawHandle { diff --git a/tokio/src/runtime/time/wheel/level.rs b/tokio/src/runtime/time/wheel/level.rs index 99b1d41c71d..e9a02801c49 100644 --- a/tokio/src/runtime/time/wheel/level.rs +++ b/tokio/src/runtime/time/wheel/level.rs @@ -120,7 +120,7 @@ impl Level { pub(crate) unsafe fn add_entry(&mut self, hdl: EntryHandle) { // Safety: the associated entry must be valid. - let deadline = unsafe { hdl.deadline() }; + let deadline = hdl.deadline(); let slot = slot_for(deadline, self.level); self.slot[slot].push_front(hdl.as_raw()); @@ -129,9 +129,7 @@ impl Level { } pub(crate) unsafe fn remove_entry(&mut self, hdl: EntryHandle) { - // Safety: the associated entry must be valid. - let deadline = unsafe { hdl.deadline() }; - let slot = slot_for(deadline, self.level); + let slot = slot_for(hdl.deadline(), self.level); unsafe { self.slot[slot].remove(hdl.as_entry_ptr()) }; if self.slot[slot].is_empty() { diff --git a/tokio/src/runtime/time/wheel/mod.rs b/tokio/src/runtime/time/wheel/mod.rs index 570b06f942b..72ecdfcaf4d 100644 --- a/tokio/src/runtime/time/wheel/mod.rs +++ b/tokio/src/runtime/time/wheel/mod.rs @@ -88,10 +88,10 @@ impl Wheel { hdl: EntryHandle, cancel_tx: mpsc::Sender, ) -> bool { - // Safety: the associated entry must be valid. let deadline = hdl.deadline(); if deadline <= self.elapsed { + // Safety: caller guarantees that the entry is valid. unsafe { hdl.drop_entry(); } @@ -125,26 +125,28 @@ impl Wheel { /// * The associated entry is valid. /// * AND the entry is already registered in the wheel. pub(crate) unsafe fn remove(&mut self, hdl: EntryHandle) { - unsafe { - if hdl.is_pending() { - self.pending.remove(hdl.as_entry_ptr()); - unsafe { - hdl.drop_entry(); - } - } else { - let deadline = hdl.deadline(); - debug_assert!( - self.elapsed <= deadline, - "elapsed={}; deadline={}", - self.elapsed, - deadline - ); - - let level = self.level_for(deadline); - self.levels[level].remove_entry(hdl.clone()); - unsafe { - hdl.drop_entry(); - } + if hdl.is_pending() { + self.pending.remove(hdl.as_entry_ptr()); + // Safety: the entry is still valid as it was just popped + // from the pending list. + unsafe { + hdl.drop_entry(); + } + } else { + let deadline = hdl.deadline(); + debug_assert!( + self.elapsed <= deadline, + "elapsed={}; deadline={}", + self.elapsed, + deadline + ); + + let level = self.level_for(deadline); + self.levels[level].remove_entry(hdl.clone()); + // Safety: the entry is still valid as it was just popped + // from the pending list. + unsafe { + hdl.drop_entry(); } } } @@ -153,7 +155,11 @@ impl Wheel { pub(crate) fn poll(&mut self, now: u64) -> Option { loop { if let Some(raw_hdl) = self.pending.pop_back() { + // Safety: the entry is still valid as it was just popped + // from the pending list. let hdl = unsafe { raw_hdl.upgrade() }; + // Safety: the entry is still valid as it was just popped + // from the pending list. unsafe { hdl.drop_entry(); } @@ -178,8 +184,15 @@ impl Wheel { } self.pending.pop_back().map(|raw_hdl| { - // Safety: the handle is valid as it was just popped from the pending list. - unsafe { raw_hdl.upgrade() } + // Safety: the entry is still valid as it was just popped + // from the pending list. + let hdl = unsafe { raw_hdl.upgrade() }; + // Safety: the entry is still valid as it was just popped + // from the pending list. + unsafe { + hdl.drop_entry(); + } + hdl }) } @@ -247,6 +260,8 @@ impl Wheel { let mut entries = self.take_entries(expiration); while let Some(raw_hdl) = entries.pop_back() { + // Safety: the entry is still valid as it was just popped + // from the list let hdl = unsafe { raw_hdl.upgrade() }; if expiration.level == 0 { @@ -255,7 +270,7 @@ impl Wheel { // Try to expire the entry; this is cheap (doesn't synchronize) if // the timer is not expired, and updates registered_when. - match unsafe { hdl.transition_to_pending(expiration.deadline) } { + match hdl.transition_to_pending(expiration.deadline) { Ok(()) => { // Item was expired self.pending.push_front(hdl.as_raw()); From d7f9c728fe63762965bd18ab3a47436325ac80a0 Mon Sep 17 00:00:00 2001 From: Qi Date: Sun, 13 Jul 2025 16:24:37 +0800 Subject: [PATCH 007/100] impl `tokio/src/runtime/time/timer.rs` Signed-off-by: ADD-SP --- tokio/src/runtime/time/mod.rs | 2 + tokio/src/runtime/time/timer.rs | 179 ++++++++++++++++++++++++++++ tokio/src/runtime/time/wheel/mod.rs | 1 - 3 files changed, 181 insertions(+), 1 deletion(-) create mode 100644 tokio/src/runtime/time/timer.rs diff --git a/tokio/src/runtime/time/mod.rs b/tokio/src/runtime/time/mod.rs index c8459ea2b89..02a99194015 100644 --- a/tokio/src/runtime/time/mod.rs +++ b/tokio/src/runtime/time/mod.rs @@ -6,6 +6,8 @@ //! Time driver. +mod timer; + mod handle; pub(crate) use self::handle::Handle; diff --git a/tokio/src/runtime/time/timer.rs b/tokio/src/runtime/time/timer.rs new file mode 100644 index 00000000000..13bb519965a --- /dev/null +++ b/tokio/src/runtime/time/timer.rs @@ -0,0 +1,179 @@ +use super::wheel::EntryHandle; +use crate::runtime::scheduler::Handle as SchedulerHandle; +use crate::runtime::time::wheel::Insert; +use crate::{runtime::time::Wheel, time::Instant, util::error::RUNTIME_SHUTTING_DOWN_ERROR}; +use std::{ + pin::Pin, + sync::mpsc, + task::{Context, Poll}, +}; + +pub(crate) struct Timer { + sched_handle: SchedulerHandle, + + /// The entry in the timing wheel. + /// + /// This is `None` if the timer has been deregistered. + entry: Option, + + /// The deadline for the timer. + deadline: Instant, +} + +impl std::fmt::Debug for Timer { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Timer") + .field("deadline", &self.deadline) + .finish() + } +} + +impl Drop for Timer { + fn drop(&mut self) { + if let Some(entry) = self.entry.take() { + entry.transition_to_cancelling(); + } + } +} + +impl Timer { + #[track_caller] + pub(crate) fn new(sched_hdl: SchedulerHandle, deadline: Instant) -> Self { + // Panic if the time driver is not enabled + let _ = sched_hdl.driver().time(); + Timer { + sched_handle: sched_hdl, + entry: None, + deadline, + } + } + + pub(crate) fn deadline(&self) -> Instant { + self.deadline + } + + pub(crate) fn is_elapsed(&self) -> bool { + self.entry.as_ref().is_some_and(|entry| entry.is_woken_up()) + } + + fn register(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<()> { + let this = self.get_mut(); + + with_current_wheel(&this.sched_handle, |maybe_wheel| { + let deadline = deadline_to_tick(&this.sched_handle, this.deadline); + let hdl = EntryHandle::new(deadline, cx.waker()); + if let Some((wheel, tx)) = maybe_wheel { + // Safety: the entry is not registered yet + match unsafe { wheel.insert(hdl.clone(), tx) } { + Insert::Success => { + this.entry = Some(hdl); + Poll::Pending + } + Insert::Elapsed => Poll::Ready(()), + Insert::Cancelling => Poll::Pending, + } + } else { + this.entry = Some(hdl.clone()); + push_from_remote(&this.sched_handle, hdl); + Poll::Pending + } + }) + } + + pub(crate) fn poll_elapsed(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<()> { + match self.entry.as_ref() { + Some(entry) if entry.is_woken_up() => Poll::Ready(()), + Some(entry) => { + entry.register_waker(cx.waker()); + Poll::Pending + } + None => self.register(cx), + } + } +} + +pub(super) fn with_current_wheel(hdl: &SchedulerHandle, f: F) -> R +where + F: FnOnce(Option<(&mut Wheel, mpsc::Sender)>) -> R, +{ + #[cfg(not(feature = "rt"))] + { + let (_, _) = (hdl, f); + panic!("Tokio runtime is not enabled, cannot access the current wheel"); + } + + #[cfg(feature = "rt")] + { + use crate::loom::sync::Arc; + use crate::runtime::context; + use crate::runtime::scheduler::Context; + use crate::runtime::scheduler::Handle::CurrentThread; + #[cfg(feature = "rt-multi-thread")] + use crate::runtime::scheduler::Handle::MultiThread; + + let is_same_rt = context::with_current(|cur_hdl| match (cur_hdl, hdl) { + (CurrentThread(cur_hdl), CurrentThread(hdl)) => Arc::ptr_eq(cur_hdl, hdl), + #[cfg(feature = "rt-multi-thread")] + (MultiThread(cur_hdl), MultiThread(hdl)) => Arc::ptr_eq(cur_hdl, hdl), + #[cfg(feature = "rt-multi-thread")] + // this above cfg is needed to avoid the compiler warning reported by: + // cargo check -Zbuild-std --target target-specs/i686-unknown-linux-gnu.json \ + // --manifest-path tokio/Cargo.toml --no-default-features \ + // --features test-util` + // error: unreachable pattern + // --> tokio/src/runtime/time/timer.rs:118:13 + // | + // 115 | (CurrentThread(cur_hdl), CurrentThread(hdl)) => Arc::ptr_eq(cur_hdl, hdl), + // | -------------------------------------------- matches all the relevant values + // ... + // 118 | _ => false, + // | ^ no value can reach this + _ => false, + }) + .unwrap_or_default(); + + if !is_same_rt { + // We don't want to create the timer in one runtime, + // but register it in a different runtime's timer wheel. + f(None) + } else { + context::with_scheduler(|maybe_cx| match maybe_cx { + Some(Context::CurrentThread(cx)) => cx.with_wheel(f), + #[cfg(feature = "rt-multi-thread")] + Some(Context::MultiThread(cx)) => cx.with_wheel(f), + None => f(None), + }) + } + } +} + +fn push_from_remote(sched_hdl: &SchedulerHandle, entry_hdl: EntryHandle) { + #[cfg(not(feature = "rt"))] + { + let (_, _) = (sched_hdl, entry_hdl); + panic!("Tokio runtime is not enabled, cannot access the current wheel"); + } + + #[cfg(feature = "rt")] + { + use crate::runtime::scheduler::Handle::CurrentThread; + #[cfg(feature = "rt-multi-thread")] + use crate::runtime::scheduler::Handle::MultiThread; + + match sched_hdl { + CurrentThread(hdl) => hdl.push_remote_timer(entry_hdl), + #[cfg(feature = "rt-multi-thread")] + MultiThread(hdl) => hdl.push_remote_timer(entry_hdl), + } + } +} + +fn deadline_to_tick(sched_hdl: &SchedulerHandle, deadline: Instant) -> u64 { + let time_hdl = sched_hdl.driver().time(); + + if time_hdl.is_shutdown() { + panic!("{RUNTIME_SHUTTING_DOWN_ERROR}"); + } + + time_hdl.time_source().deadline_to_tick(deadline) +} diff --git a/tokio/src/runtime/time/wheel/mod.rs b/tokio/src/runtime/time/wheel/mod.rs index 72ecdfcaf4d..3277c8e1981 100644 --- a/tokio/src/runtime/time/wheel/mod.rs +++ b/tokio/src/runtime/time/wheel/mod.rs @@ -4,7 +4,6 @@ use self::level::Level; mod entry; use entry::EntryList; -pub(crate) use entry::Entry; pub(crate) use entry::Handle as EntryHandle; use std::{array, sync::mpsc}; From 161ca911567934be5e008ee2ee7b4ebfc0867083 Mon Sep 17 00:00:00 2001 From: Qi Date: Sun, 13 Jul 2025 16:27:25 +0800 Subject: [PATCH 008/100] adapt the impl of `tokio::time::sleep` Signed-off-by: ADD-SP --- tokio/src/runtime/time/mod.rs | 1 + tokio/src/time/sleep.rs | 54 +++++++++++++---------------------- 2 files changed, 21 insertions(+), 34 deletions(-) diff --git a/tokio/src/runtime/time/mod.rs b/tokio/src/runtime/time/mod.rs index 02a99194015..a04f9656a46 100644 --- a/tokio/src/runtime/time/mod.rs +++ b/tokio/src/runtime/time/mod.rs @@ -7,6 +7,7 @@ //! Time driver. mod timer; +pub(crate) use timer::Timer; mod handle; pub(crate) use self::handle::Handle; diff --git a/tokio/src/time/sleep.rs b/tokio/src/time/sleep.rs index 1e3fe80d127..d8c39e1024a 100644 --- a/tokio/src/time/sleep.rs +++ b/tokio/src/time/sleep.rs @@ -1,5 +1,5 @@ -use crate::runtime::time::TimerEntry; -use crate::time::{error::Error, Duration, Instant}; +use crate::runtime::time::Timer; +use crate::time::{Duration, Instant}; use crate::util::trace; use pin_project_lite::pin_project; @@ -227,7 +227,7 @@ pin_project! { // The link between the `Sleep` instance and the timer that drives it. #[pin] - entry: TimerEntry, + entry: Timer, } } @@ -252,14 +252,14 @@ impl Sleep { location: Option<&'static Location<'static>>, ) -> Sleep { use crate::runtime::scheduler; - let handle = scheduler::Handle::current(); - let entry = TimerEntry::new(handle, deadline); + let sched_hdl = scheduler::Handle::current(); + let entry = Timer::new(sched_hdl, deadline); #[cfg(all(tokio_unstable, feature = "tracing"))] let inner = { - let handle = scheduler::Handle::current(); - let clock = handle.driver().clock(); - let handle = &handle.driver().time(); - let time_source = handle.time_source(); + let sched_hdl = scheduler::Handle::current(); + let clock = sched_hdl.driver().clock(); + let time_hdl = sched_hdl.driver().time(); + let time_source = time_hdl.time_source(); let deadline_tick = time_source.deadline_to_tick(deadline); let duration = deadline_tick.saturating_sub(time_source.now(clock)); @@ -349,25 +349,10 @@ impl Sleep { /// /// [`Pin::as_mut`]: fn@std::pin::Pin::as_mut pub fn reset(self: Pin<&mut Self>, deadline: Instant) { - self.reset_inner(deadline); - } - - /// Resets the `Sleep` instance to a new deadline without reregistering it - /// to be woken up. - /// - /// Calling this function allows changing the instant at which the `Sleep` - /// future completes without having to create new associated state and - /// without having it registered. This is required in e.g. the - /// [`crate::time::Interval`] where we want to reset the internal [Sleep] - /// without having it wake up the last task that polled it. - pub(crate) fn reset_without_reregister(self: Pin<&mut Self>, deadline: Instant) { - let mut me = self.project(); - me.entry.as_mut().reset(deadline, false); - } - - fn reset_inner(self: Pin<&mut Self>, deadline: Instant) { + use crate::runtime::scheduler; let mut me = self.project(); - me.entry.as_mut().reset(deadline, true); + me.entry + .set(Timer::new(scheduler::Handle::current(), deadline)); #[cfg(all(tokio_unstable, feature = "tracing"))] { @@ -380,8 +365,12 @@ impl Sleep { tracing::trace_span!("runtime.resource.async_op.poll"); let duration = { - let clock = me.entry.clock(); - let time_source = me.entry.driver().time_source(); + use crate::runtime::scheduler; + + let handle = scheduler::Handle::current(); + let clock = handle.driver().clock(); + let handle = &handle.driver().time(); + let time_source = handle.time_source(); let now = time_source.now(clock); let deadline_tick = time_source.deadline_to_tick(deadline); deadline_tick.saturating_sub(now) @@ -396,7 +385,7 @@ impl Sleep { } } - fn poll_elapsed(self: Pin<&mut Self>, cx: &mut task::Context<'_>) -> Poll> { + fn poll_elapsed(self: Pin<&mut Self>, cx: &mut task::Context<'_>) -> Poll<()> { let me = self.project(); ready!(crate::trace::trace_leaf(cx)); @@ -443,9 +432,6 @@ impl Future for Sleep { let _ao_span = self.inner.ctx.async_op_span.clone().entered(); #[cfg(all(tokio_unstable, feature = "tracing"))] let _ao_poll_span = self.inner.ctx.async_op_poll_span.clone().entered(); - match ready!(self.as_mut().poll_elapsed(cx)) { - Ok(()) => Poll::Ready(()), - Err(e) => panic!("timer error: {e}"), - } + self.as_mut().poll_elapsed(cx) } } From 09b27cf32c2e52657239ea813c0af59e0c63fac5 Mon Sep 17 00:00:00 2001 From: Qi Date: Sun, 13 Jul 2025 16:30:37 +0800 Subject: [PATCH 009/100] adapt the impl of `tokio::time::interval` Signed-off-by: ADD-SP --- tokio/src/time/interval.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tokio/src/time/interval.rs b/tokio/src/time/interval.rs index 0153a567f10..cf83e6703bd 100644 --- a/tokio/src/time/interval.rs +++ b/tokio/src/time/interval.rs @@ -484,10 +484,7 @@ impl Interval { .unwrap_or_else(Instant::far_future) }; - // When we arrive here, the internal delay returned `Poll::Ready`. - // Reset the delay but do not register it. It should be registered with - // the next call to [`poll_tick`]. - self.delay.as_mut().reset_without_reregister(next); + self.delay.as_mut().reset(next); // Return the time when we were scheduled to tick Poll::Ready(timeout) From 58a13e306af699f999cc85ad74d34b67443e8419 Mon Sep 17 00:00:00 2001 From: Qi Date: Sun, 13 Jul 2025 18:37:03 +0800 Subject: [PATCH 010/100] let current_thread scheduler process timers Signed-off-by: ADD-SP --- tokio/src/runtime/driver.rs | 7 + .../runtime/scheduler/current_thread/mod.rs | 176 ++++++++++++++---- tokio/src/runtime/scheduler/mod.rs | 2 + tokio/src/runtime/scheduler/util.rs | 163 ++++++++++++++++ tokio/src/runtime/time/wheel/mod.rs | 2 +- 5 files changed, 310 insertions(+), 40 deletions(-) create mode 100644 tokio/src/runtime/scheduler/util.rs diff --git a/tokio/src/runtime/driver.rs b/tokio/src/runtime/driver.rs index f06b70427ce..2ad00d3ae52 100644 --- a/tokio/src/runtime/driver.rs +++ b/tokio/src/runtime/driver.rs @@ -113,6 +113,13 @@ impl Handle { .expect("A Tokio 1.x context was found, but timers are disabled. Call `enable_time` on the runtime builder to enable timers.") } + pub(crate) fn with_time(&self, f: F) -> R + where + F: FnOnce(Option<&crate::runtime::time::Handle>) -> R, + { + f(self.time.as_ref()) + } + pub(crate) fn clock(&self) -> &Clock { &self.clock } diff --git a/tokio/src/runtime/scheduler/current_thread/mod.rs b/tokio/src/runtime/scheduler/current_thread/mod.rs index 58b559485cc..c6b33fd92ab 100644 --- a/tokio/src/runtime/scheduler/current_thread/mod.rs +++ b/tokio/src/runtime/scheduler/current_thread/mod.rs @@ -1,11 +1,10 @@ use crate::loom::sync::atomic::AtomicBool; -use crate::loom::sync::{Arc, Mutex}; +use crate::loom::sync::Arc; use crate::runtime::driver::{self, Driver}; use crate::runtime::scheduler::{self, Defer, Inject}; use crate::runtime::task::{ self, JoinHandle, OwnedTasks, Schedule, SpawnLocation, Task, TaskHarnessScheduleHooks, }; -use crate::runtime::time::{EntryHandle, Wheel}; use crate::runtime::{ blocking, context, Config, MetricsBatch, SchedulerMetrics, TaskHooks, TaskMeta, WorkerMetrics, }; @@ -17,13 +16,20 @@ use std::cell::RefCell; use std::collections::VecDeque; use std::future::{poll_fn, Future}; use std::sync::atomic::Ordering::{AcqRel, Release}; -use std::sync::mpsc; use std::task::Poll::{Pending, Ready}; use std::task::Waker; use std::thread::ThreadId; use std::time::Duration; use std::{fmt, thread}; +cfg_time! { + use crate::runtime::scheduler::util; + use crate::runtime::time::{EntryHandle, Wheel}; + use crate::loom::sync::Mutex; + + use std::sync::mpsc; +} + /// Executes tasks on the current thread pub(crate) struct CurrentThread { /// Core scheduler data is acquired by a thread entering `block_on`. @@ -64,12 +70,15 @@ struct Core { /// Current tick tick: u32, + #[cfg(feature = "time")] /// Worker local timer wheel wheel: Wheel, + #[cfg(feature = "time")] /// Channel for sending timers that need to be cancelled timer_cancel_tx: mpsc::Sender, + #[cfg(feature = "time")] /// Channel for receiving timers that need to be cancelled timer_cancel_rx: mpsc::Receiver, @@ -94,6 +103,7 @@ struct Shared { /// Remote run queue inject: Inject>, + #[cfg(feature = "time")] /// Timers pending to be registered. /// This is used to register a timer but the [`Core`] /// is not available in the current thread. @@ -168,6 +178,7 @@ impl CurrentThread { }, shared: Shared { inject: Inject::new(), + #[cfg(feature = "time")] inject_timers: Mutex::new(Vec::new()), owned: OwnedTasks::new(1), woken: AtomicBool::new(false), @@ -181,12 +192,16 @@ impl CurrentThread { local_tid, }); + #[cfg(feature = "time")] let (timer_cancel_tx, timer_cancel_rx) = mpsc::channel(); let core = AtomicCell::new(Some(Box::new(Core { tasks: VecDeque::with_capacity(INITIAL_CAPACITY), tick: 0, + #[cfg(feature = "time")] wheel: Wheel::new(), + #[cfg(feature = "time")] timer_cancel_tx, + #[cfg(feature = "time")] timer_cancel_rx, driver: Some(driver), metrics: MetricsBatch::new(&handle.shared.worker_metrics), @@ -296,7 +311,16 @@ fn shutdown2(mut core: Box, handle: &Handle) -> Box { // call returns. handle.shared.owned.close_and_shutdown_all(0); - // Drain local queue + #[cfg(feature = "time")] + util::time::shutdown_local_timers( + &mut core.wheel, + core.timer_cancel_tx.clone(), + &core.timer_cancel_rx, + handle.take_remote_timers(), + &handle.driver, + ); + + // Drain the local queue // We already shut down every task, so we just need to drop the task. while let Some(task) = core.next_local_task(handle) { drop(task); @@ -408,12 +432,7 @@ impl Context { core.metrics.about_to_park(); core.submit_metrics(handle); - let (c, ()) = self.enter(core, || { - driver.park(&handle.driver); - self.defer.wake(); - }); - - core = c; + core = self.park_internal(core, handle, &mut driver, None); core.metrics.unparked(); core.submit_metrics(handle); @@ -434,15 +453,77 @@ impl Context { core.submit_metrics(handle); - let (mut core, ()) = self.enter(core, || { - driver.park_timeout(&handle.driver, Duration::from_millis(0)); - self.defer.wake(); - }); + core = self.park_internal(core, handle, &mut driver, Some(Duration::from_millis(0))); core.driver = Some(driver); core } + fn park_internal( + &self, + #[cfg_attr(not(feature = "time"), allow(unused_mut))] mut core: Box, + handle: &Handle, + driver: &mut Driver, + duration: Option, + ) -> Box { + debug_assert!(core.driver.is_none()); + + #[cfg(feature = "time")] + let (duration, maybe_advance_duration) = { + util::time::remove_cancelled_timers(&mut core.wheel, &core.timer_cancel_rx); + let should_yield = util::time::insert_inject_timers( + &mut core.wheel, + core.timer_cancel_tx.clone(), + handle.take_remote_timers(), + ); + let next_timer = util::time::next_expiration_time(&core.wheel, &handle.driver); + if should_yield { + (Some(Duration::from_millis(0)), None) + } else { + let dur = match (next_timer, duration) { + (Some(next_timer), Some(park_duration)) => Some(next_timer.min(park_duration)), + (Some(next_timer), None) => Some(next_timer), + (None, Some(park_duration)) => Some(park_duration), + (None, None) => None, + }; + if util::time::pre_auto_advance(&handle.driver, dur) { + (Some(Duration::ZERO), dur) + } else { + (dur, None) + } + } + }; + + let (core, ()) = self.enter(core, || { + if let Some(duration) = duration { + driver.park_timeout(&handle.driver, duration); + } else { + driver.park(&handle.driver); + } + }); + + self.defer.wake(); + + #[cfg(feature = "time")] + let core = { + // declare as mutable to avoid compiler warning + // error: variable does not need to be mutable + // --> tokio/src/runtime/scheduler/current_thread/mod.rs:497:14 + // | + // 497 | let (mut core, ()) = self.enter(core, || { + // | ----^^^^ + // | | + // | help: remove this `mut` + // | + let mut core = core; + util::time::post_auto_advance(&handle.driver, maybe_advance_duration); + util::time::process_expired_timers(&mut core.wheel, &handle.driver); + core + }; + + core + } + fn enter(&self, core: Box, f: impl FnOnce() -> R) -> (Box, R) { // Store the scheduler core in the thread-local context // @@ -461,25 +542,27 @@ impl Context { self.defer.defer(waker); } - fn with_core(&self, f: F) -> R - where - F: FnOnce(Option<&mut Core>) -> R, - { - let mut core = self.core.borrow_mut(); - f(core.as_mut().map(|c| c.as_mut())) - } + cfg_time! { + fn with_core(&self, f: F) -> R + where + F: FnOnce(Option<&mut Core>) -> R, + { + let mut core = self.core.borrow_mut(); + f(core.as_mut().map(|c| c.as_mut())) + } - pub(crate) fn with_wheel(&self, f: F) -> R - where - F: FnOnce(Option<(&mut Wheel, mpsc::Sender)>) -> R, - { - self.with_core(|maybe_core| { - if let Some(core) = maybe_core { - f(Some((&mut core.wheel, core.timer_cancel_tx.clone()))) - } else { - f(None) - } - }) + pub(crate) fn with_wheel(&self, f: F) -> R + where + F: FnOnce(Option<(&mut Wheel, mpsc::Sender)>) -> R, + { + self.with_core(|maybe_core| { + if let Some(core) = maybe_core { + f(Some((&mut core.wheel, core.timer_cancel_tx.clone()))) + } else { + f(None) + } + }) + } } } @@ -627,13 +710,20 @@ impl Handle { &self.shared.worker_metrics } - /// Push a timer handle from the remote thread. - pub(crate) fn push_remote_timer(&self, entry: EntryHandle) { - { + cfg_time! { + /// Push a timer handle from the remote thread. + pub(crate) fn push_remote_timer(&self, entry: EntryHandle) { + { + let mut inject_timers = self.shared.inject_timers.lock(); + inject_timers.push(entry); + } + self.driver.unpark(); + } + + pub(crate) fn take_remote_timers(&self) -> Vec { let mut inject_timers = self.shared.inject_timers.lock(); - inject_timers.push(entry); + std::mem::take(&mut inject_timers) } - self.driver.unpark(); } } @@ -697,10 +787,18 @@ impl Schedule for Arc { Some(CurrentThread(cx)) if Arc::ptr_eq(self, &cx.handle) => { let mut core = cx.core.borrow_mut(); - // If `None`, the runtime is shutting down, so there is no need - // to schedule the task. if let Some(core) = core.as_mut() { core.push_task(self, task); + } else { + // runtime is shutting down + // OR waking up expired timers + + // Track that a task was scheduled from **outside** of the runtime. + self.shared.scheduler_metrics.inc_remote_schedule_count(); + + // Schedule the task + self.shared.inject.push(task); + self.driver.unpark(); } } _ => { diff --git a/tokio/src/runtime/scheduler/mod.rs b/tokio/src/runtime/scheduler/mod.rs index ecd56aeee10..d0b36f893d2 100644 --- a/tokio/src/runtime/scheduler/mod.rs +++ b/tokio/src/runtime/scheduler/mod.rs @@ -24,6 +24,8 @@ cfg_rt_multi_thread! { pub(crate) use multi_thread::MultiThread; } +mod util; + use crate::runtime::driver; #[derive(Debug, Clone)] diff --git a/tokio/src/runtime/scheduler/util.rs b/tokio/src/runtime/scheduler/util.rs new file mode 100644 index 00000000000..66ad6e6651a --- /dev/null +++ b/tokio/src/runtime/scheduler/util.rs @@ -0,0 +1,163 @@ +cfg_rt_and_time! { + pub(crate) mod time { + use crate::runtime::{scheduler::driver}; + use crate::runtime::time::{EntryHandle, Wheel}; + use std::time::Duration; + use std::sync::mpsc; + + pub(crate) fn insert_inject_timers( + wheel: &mut Wheel, + tx: mpsc::Sender, + inject: Vec, + ) -> bool { + let mut fired = false; + // process injected timers + for hdl in inject { + unsafe { + if !wheel.insert(hdl.clone(), tx.clone()) { + // timer is already elapsed, wake it up + hdl.wake_unregistered(); + fired = true; + } + } + } + + fired + } + + pub(crate) fn remove_cancelled_timers( + wheel: &mut Wheel, + rx: &mpsc::Receiver, + ) { + while let Ok(hdl) = rx.try_recv() { + unsafe { + let is_registered = hdl.is_registered(); + let is_pending = hdl.is_pending(); + if is_registered && !is_pending { + wheel.remove(hdl); + } + } + } + } + + pub(crate) fn next_expiration_time( + wheel: &Wheel, + drv_hdl: &driver::Handle, + ) -> Option { + drv_hdl.with_time(|maybe_time_hdl| { + let Some(time_hdl) = maybe_time_hdl else { + // time driver is not enabled, nothing to do. + return None; + }; + + let clock = drv_hdl.clock(); + let time_source = time_hdl.time_source(); + + wheel.next_expiration_time().map(|tick| { + let now = time_source.now(clock); + time_source.tick_to_duration(tick.saturating_sub(now)) + }) + }) + } + + cfg_test_util! { + pub(crate) fn pre_auto_advance( + drv_hdl: &driver::Handle, + duration: Option, + ) -> bool { + drv_hdl.with_time(|maybe_time_hdl| { + if maybe_time_hdl.is_none() { + // time driver is not enabled, nothing to do. + return false; + } + + if duration.is_some() { + let clock = drv_hdl.clock(); + if clock.can_auto_advance() { + return true; + } + + false + } else { + false + } + }) + } + + pub(crate) fn post_auto_advance( + drv_hdl: &driver::Handle, + duration: Option, + ) { + drv_hdl.with_time(|maybe_time_hdl| { + let Some(time_hdl) = maybe_time_hdl else { + // time driver is not enabled, nothing to do. + return; + }; + + if let Some(park_duration) = duration { + let clock = drv_hdl.clock(); + if clock.can_auto_advance() + && !time_hdl.did_wake() { + if let Err(msg) = clock.advance(park_duration) { + panic!("{msg}"); + } + } + } + }) + } + } + + cfg_not_test_util! { + pub(crate) fn pre_auto_advance( + _drv_hdl: &driver::Handle, + _duration: Option, + ) -> bool { + false + } + + pub(crate) fn post_auto_advance( + _drv_hdl: &driver::Handle, + _duration: Option, + ) { + // No-op in non-test util builds + } + } + + pub(crate) fn process_expired_timers( + wheel: &mut Wheel, + drv_hdl: &driver::Handle, + ) { + drv_hdl.with_time(|maybe_time_hdl| { + let Some(time_hdl) = maybe_time_hdl else { + // time driver is not enabled, nothing to do. + return; + }; + + let clock = drv_hdl.clock(); + let time_source = time_hdl.time_source(); + + let now = time_source.now(clock); + time_hdl.process_at_time(wheel, now); + }); + } + + pub(crate) fn shutdown_local_timers( + wheel: &mut Wheel, + tx: mpsc::Sender, + rx: &mpsc::Receiver, + inject: Vec, + drv_hdl: &driver::Handle, + ) { + drv_hdl.with_time(|maybe_time_hdl| { + let Some(time_hdl) = maybe_time_hdl else { + // time driver is not enabled, nothing to do. + return; + }; + + remove_cancelled_timers(wheel, rx); + insert_inject_timers(wheel, tx, inject); + time_hdl.shutdown(wheel); + }); + } + } +} diff --git a/tokio/src/runtime/time/wheel/mod.rs b/tokio/src/runtime/time/wheel/mod.rs index 3277c8e1981..8beb7dff6e8 100644 --- a/tokio/src/runtime/time/wheel/mod.rs +++ b/tokio/src/runtime/time/wheel/mod.rs @@ -222,7 +222,7 @@ impl Wheel { /// Returns the tick at which this timer wheel next needs to perform some /// processing, or None if there are no timers registered. - pub(super) fn next_expiration_time(&self) -> Option { + pub(crate) fn next_expiration_time(&self) -> Option { self.next_expiration().map(|ex| ex.deadline) } From 8407dd6cd239027ca88a3dfca533cc3460ab36ed Mon Sep 17 00:00:00 2001 From: Qi Date: Sun, 13 Jul 2025 18:50:32 +0800 Subject: [PATCH 011/100] let multi_thread scheduler process timers Signed-off-by: ADD-SP --- .../runtime/scheduler/current_thread/mod.rs | 14 +- .../runtime/scheduler/multi_thread/park.rs | 28 +++- .../runtime/scheduler/multi_thread/worker.rs | 134 +++++++++++++----- 3 files changed, 132 insertions(+), 44 deletions(-) diff --git a/tokio/src/runtime/scheduler/current_thread/mod.rs b/tokio/src/runtime/scheduler/current_thread/mod.rs index c6b33fd92ab..76060176355 100644 --- a/tokio/src/runtime/scheduler/current_thread/mod.rs +++ b/tokio/src/runtime/scheduler/current_thread/mod.rs @@ -461,7 +461,7 @@ impl Context { fn park_internal( &self, - #[cfg_attr(not(feature = "time"), allow(unused_mut))] mut core: Box, + core: Box, handle: &Handle, driver: &mut Driver, duration: Option, @@ -469,7 +469,11 @@ impl Context { debug_assert!(core.driver.is_none()); #[cfg(feature = "time")] - let (duration, maybe_advance_duration) = { + let (core, duration, maybe_advance_duration) = { + // declare as mutable to avoid compiler warning, + // otherwise the compiler will complain that the `core` parameter does not need to be mutable + // if the 'time' feature is not enabled. + let mut core = core; util::time::remove_cancelled_timers(&mut core.wheel, &core.timer_cancel_rx); let should_yield = util::time::insert_inject_timers( &mut core.wheel, @@ -478,7 +482,7 @@ impl Context { ); let next_timer = util::time::next_expiration_time(&core.wheel, &handle.driver); if should_yield { - (Some(Duration::from_millis(0)), None) + (core, Some(Duration::from_millis(0)), None) } else { let dur = match (next_timer, duration) { (Some(next_timer), Some(park_duration)) => Some(next_timer.min(park_duration)), @@ -487,9 +491,9 @@ impl Context { (None, None) => None, }; if util::time::pre_auto_advance(&handle.driver, dur) { - (Some(Duration::ZERO), dur) + (core, Some(Duration::ZERO), dur) } else { - (dur, None) + (core, dur, None) } } }; diff --git a/tokio/src/runtime/scheduler/multi_thread/park.rs b/tokio/src/runtime/scheduler/multi_thread/park.rs index b00c648e6d3..fab35ce8283 100644 --- a/tokio/src/runtime/scheduler/multi_thread/park.rs +++ b/tokio/src/runtime/scheduler/multi_thread/park.rs @@ -71,11 +71,8 @@ impl Parker { } pub(crate) fn park_timeout(&mut self, handle: &driver::Handle, duration: Duration) { - // Only parking with zero is supported... - assert_eq!(duration, Duration::from_millis(0)); - if let Some(mut driver) = self.inner.shared.driver.try_lock() { - driver.park_timeout(handle, duration); + self.inner.park_driver(&mut driver, handle, Some(duration)); } else { // https://github.com/tokio-rs/tokio/issues/6536 // Hacky, but it's just for loom tests. The counter gets incremented during @@ -124,7 +121,7 @@ impl Inner { } if let Some(mut driver) = self.shared.driver.try_lock() { - self.park_driver(&mut driver, handle); + self.park_driver(&mut driver, handle, None); } else { self.park_condvar(); } @@ -170,7 +167,19 @@ impl Inner { } } - fn park_driver(&self, driver: &mut Driver, handle: &driver::Handle) { + fn park_driver( + &self, + driver: &mut Driver, + handle: &driver::Handle, + duration: Option, + ) { + if duration.as_ref().is_some_and(Duration::is_zero) { + // zero duration doesn't actually park the thread, it just + // polls the I/O events, timers, etc. + driver.park_timeout(handle, Duration::ZERO); + return; + } + match self .state .compare_exchange(EMPTY, PARKED_DRIVER, SeqCst, SeqCst) @@ -191,7 +200,12 @@ impl Inner { Err(actual) => panic!("inconsistent park state; actual = {actual}"), } - driver.park(handle); + if let Some(duration) = duration { + debug_assert_ne!(duration, Duration::ZERO); + driver.park_timeout(handle, duration); + } else { + driver.park(handle); + } match self.state.swap(EMPTY, SeqCst) { NOTIFIED => {} // got a notification, hurray! diff --git a/tokio/src/runtime/scheduler/multi_thread/worker.rs b/tokio/src/runtime/scheduler/multi_thread/worker.rs index 5ae0bdce79b..4d50fc54715 100644 --- a/tokio/src/runtime/scheduler/multi_thread/worker.rs +++ b/tokio/src/runtime/scheduler/multi_thread/worker.rs @@ -63,7 +63,6 @@ use crate::runtime::scheduler::multi_thread::{ }; use crate::runtime::scheduler::{inject, Defer, Lock}; use crate::runtime::task::OwnedTasks; -use crate::runtime::time::{EntryHandle, Wheel}; use crate::runtime::{blocking, driver, scheduler, task, Config, SchedulerMetrics, WorkerMetrics}; use crate::runtime::{context, TaskHooks}; use crate::task::coop; @@ -74,7 +73,13 @@ use std::cell::RefCell; use std::task::Waker; use std::thread; use std::time::Duration; -use std::sync::mpsc; + +cfg_time! { + use crate::runtime::scheduler::util; + use crate::runtime::time::{EntryHandle, Wheel}; + + use std::sync::mpsc; +} mod metrics; @@ -117,12 +122,15 @@ struct Core { /// The worker-local run queue. run_queue: queue::Local>, + #[cfg(feature = "time")] /// Worker local timer wheel wheel: Wheel, + #[cfg(feature = "time")] /// Channel for sending timers that need to be cancelled timer_cancel_tx: mpsc::Sender, + #[cfg(feature = "time")] /// Channel for receiving timers that need to be cancelled timer_cancel_rx: mpsc::Receiver, @@ -205,6 +213,7 @@ pub(crate) struct Synced { /// Synchronized state for `Inject`. pub(crate) inject: inject::Synced, + #[cfg(feature = "time")] /// Timers pending to be registered. /// This is used to register a timer but the [`Core`] /// is not available in the current thread. @@ -270,7 +279,7 @@ pub(super) fn create( let unpark = park.unpark(); let metrics = WorkerMetrics::from_config(&config); let stats = Stats::new(&metrics); - let wheel = Wheel::new(); + #[cfg(feature = "time")] let (timer_cancel_tx, timer_cancel_rx) = mpsc::channel(); cores.push(Box::new(Core { @@ -278,8 +287,11 @@ pub(super) fn create( lifo_slot: None, lifo_enabled: !config.disable_lifo_slot, run_queue, - wheel, + #[cfg(feature = "time")] + wheel: Wheel::new(), + #[cfg(feature = "time")] timer_cancel_tx, + #[cfg(feature = "time")] timer_cancel_rx, is_searching: false, is_shutdown: false, @@ -308,6 +320,7 @@ pub(super) fn create( synced: Mutex::new(Synced { idle: idle_synced, inject: inject_synced, + #[cfg(feature = "time")] inject_timers: vec![], }), shutdown_cores: Mutex::new(vec![]), @@ -574,7 +587,7 @@ impl Context { } else { // Wait for work core = if !self.defer.is_empty() { - self.park_timeout(core, Some(Duration::from_millis(0))) + self.park_yield(core) } else { self.park(core) }; @@ -582,6 +595,15 @@ impl Context { } } + #[cfg(feature = "time")] + util::time::shutdown_local_timers( + &mut core.wheel, + core.timer_cancel_tx.clone(), + &core.timer_cancel_rx, + self.worker.handle.take_remote_timers(), + &self.worker.handle.driver, + ); + core.pre_shutdown(&self.worker); // Signal shutdown self.worker.handle.shutdown_core(core); @@ -723,7 +745,7 @@ impl Context { // Call `park` with a 0 timeout. This enables the I/O driver, timer, ... // to run without actually putting the thread to sleep. - core = self.park_timeout(core, Some(Duration::from_millis(0))); + core = self.park_yield(core); // Run regularly scheduled maintenance core.maintenance(&self.worker); @@ -756,7 +778,7 @@ impl Context { core.stats .submit(&self.worker.handle.shared.worker_metrics[self.worker.index]); - core = self.park_timeout(core, None); + core = self.park_internal(core, None); core.stats.unparked(); @@ -775,9 +797,41 @@ impl Context { core } - fn park_timeout(&self, mut core: Box, duration: Option) -> Box { + fn park_yield(&self, core: Box) -> Box { + self.park_internal(core, Some(Duration::from_millis(0))) + } + + fn park_internal(&self, mut core: Box, duration: Option) -> Box { self.assert_lifo_enabled_is_correct(&core); + #[cfg(feature = "time")] + let (duration, maybe_advance_duration) = { + let handle = &self.worker.handle; + + util::time::remove_cancelled_timers(&mut core.wheel, &core.timer_cancel_rx); + let should_yield = util::time::insert_inject_timers( + &mut core.wheel, + core.timer_cancel_tx.clone(), + handle.take_remote_timers(), + ); + let next_timer = util::time::next_expiration_time(&core.wheel, &handle.driver); + if should_yield { + (Some(Duration::from_millis(0)), None) + } else { + let dur = match (next_timer, duration) { + (Some(next_timer), Some(park_duration)) => Some(next_timer.min(park_duration)), + (Some(next_timer), None) => Some(next_timer), + (None, Some(park_duration)) => Some(park_duration), + (None, None) => None, + }; + if util::time::pre_auto_advance(&handle.driver, dur) { + (Some(Duration::ZERO), dur) + } else { + (dur, None) + } + } + }; + // Take the parker out of core let mut park = core.park.take().expect("park missing"); @@ -796,6 +850,13 @@ impl Context { // Remove `core` from context core = self.core.borrow_mut().take().expect("core missing"); + #[cfg(feature = "time")] + { + let handle = &self.worker.handle; + util::time::post_auto_advance(&handle.driver, maybe_advance_duration); + util::time::process_expired_timers(&mut core.wheel, &handle.driver); + } + // Place `park` back in `core` core.park = Some(park); @@ -816,27 +877,29 @@ impl Context { } } - fn with_core(&self, f: F) -> R - where - F: FnOnce(Option<&mut Core>) -> R, - { - match self.core.borrow_mut().as_mut() { - Some(core) => f(Some(core)), - None => f(None), + cfg_time! { + fn with_core(&self, f: F) -> R + where + F: FnOnce(Option<&mut Core>) -> R, + { + match self.core.borrow_mut().as_mut() { + Some(core) => f(Some(core)), + None => f(None), + } } - } - pub(crate) fn with_wheel(&self, f: F) -> R - where - F: FnOnce(Option<(&mut Wheel, mpsc::Sender)>) -> R, - { - self.with_core(|core| { - if let Some(core) = core { - f(Some((&mut core.wheel, core.timer_cancel_tx.clone()))) - } else { - f(None) - } - }) + pub(in crate::runtime) fn with_wheel(&self, f: F) -> R + where + F: FnOnce(Option<(&mut Wheel, mpsc::Sender)>) -> R, + { + self.with_core(|core| { + if let Some(core) = core { + f(Some((&mut core.wheel, core.timer_cancel_tx.clone()))) + } else { + f(None) + } + }) + } } } @@ -1176,13 +1239,20 @@ impl Handle { } } - /// Push a timer handle from the remote thread. - pub(crate) fn push_remote_timer(&self, hdl: EntryHandle) { - { + cfg_time! { + /// Push a timer handle from the remote thread. + pub(in crate::runtime) fn push_remote_timer(&self, hdl: EntryHandle) { + { + let mut synced = self.shared.synced.lock(); + synced.inject_timers.push(hdl); + } + self.notify_parked_remote(); + } + + pub(in crate::runtime) fn take_remote_timers(&self) -> Vec { let mut synced = self.shared.synced.lock(); - synced.inject_timers.push(hdl); + std::mem::take(&mut synced.inject_timers) } - self.notify_parked_remote(); } pub(super) fn close(&self) { From 89e132f15e664325e17cfa08bb09f5c635a86304 Mon Sep 17 00:00:00 2001 From: Qi Date: Thu, 17 Jul 2025 20:29:47 +0800 Subject: [PATCH 012/100] remove the `InsertError` Signed-off-by: ADD-SP --- tokio/src/time/error.rs | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tokio/src/time/error.rs b/tokio/src/time/error.rs index 21920059090..aaf8847b81a 100644 --- a/tokio/src/time/error.rs +++ b/tokio/src/time/error.rs @@ -46,11 +46,6 @@ impl From for Error { #[derive(Debug, PartialEq, Eq)] pub struct Elapsed(()); -#[derive(Debug)] -pub(crate) enum InsertError { - Elapsed, -} - // ===== impl Error ===== impl Error { From 9b336b19c276ddf3adf3392267df063ce9ff26a3 Mon Sep 17 00:00:00 2001 From: Qi Date: Sat, 19 Jul 2025 19:35:45 +0800 Subject: [PATCH 013/100] fix `tokio-util/tests/time_delay_queue.rs` Signed-off-by: ADD-SP --- tokio-util/tests/time_delay_queue.rs | 88 +--------------------------- 1 file changed, 3 insertions(+), 85 deletions(-) diff --git a/tokio-util/tests/time_delay_queue.rs b/tokio-util/tests/time_delay_queue.rs index fdd0844c8c3..dcbf2d0e7c9 100644 --- a/tokio-util/tests/time_delay_queue.rs +++ b/tokio-util/tests/time_delay_queue.rs @@ -3,7 +3,7 @@ #![cfg(feature = "full")] use futures::StreamExt; -use tokio::time::{self, sleep, sleep_until, Duration, Instant}; +use tokio::time::{self, sleep, Duration, Instant}; use tokio_test::{assert_pending, assert_ready, task}; use tokio_util::time::DelayQueue; @@ -82,8 +82,6 @@ async fn single_short_delay() { sleep(ms(5)).await; - assert!(queue.is_woken()); - let entry = assert_ready_some!(poll!(queue)); assert_eq!(*entry.get_ref(), "foo"); @@ -221,7 +219,7 @@ async fn reset_much_later() { sleep(ms(20)).await; - assert!(queue.is_woken()); + assert_ready_some!(poll!(queue)); } // Reproduces tokio-rs/tokio#849. @@ -248,7 +246,7 @@ async fn reset_twice() { sleep(ms(20)).await; - assert!(queue.is_woken()); + assert_ready_some!(poll!(queue)); } /// Regression test: Given an entry inserted with a deadline in the past, so @@ -412,8 +410,6 @@ async fn expire_first_key_when_reset_to_expire_earlier() { sleep(ms(100)).await; - assert!(queue.is_woken()); - let entry = assert_ready_some!(poll!(queue)).into_inner(); assert_eq!(entry, "one"); } @@ -435,8 +431,6 @@ async fn expire_second_key_when_reset_to_expire_earlier() { sleep(ms(100)).await; - assert!(queue.is_woken()); - let entry = assert_ready_some!(poll!(queue)).into_inner(); assert_eq!(entry, "two"); } @@ -457,8 +451,6 @@ async fn reset_first_expiring_item_to_expire_later() { queue.reset_at(&one, now + ms(300)); sleep(ms(250)).await; - assert!(queue.is_woken()); - let entry = assert_ready_some!(poll!(queue)).into_inner(); assert_eq!(entry, "two"); } @@ -522,43 +514,6 @@ async fn insert_after_ready_poll() { assert_eq!("3", res[2]); } -#[tokio::test] -async fn reset_later_after_slot_starts() { - time::pause(); - - let mut queue = task::spawn(DelayQueue::new()); - - let now = Instant::now(); - - let foo = queue.insert_at("foo", now + ms(100)); - - assert_pending!(poll!(queue)); - - sleep_until(now + Duration::from_millis(80)).await; - - assert!(!queue.is_woken()); - - // At this point the queue hasn't been polled, so `elapsed` on the wheel - // for the queue is still at 0 and hence the 1ms resolution slots cover - // [0-64). Resetting the time on the entry to 120 causes it to get put in - // the [64-128) slot. As the queue knows that the first entry is within - // that slot, but doesn't know when, it must wake immediately to advance - // the wheel. - queue.reset_at(&foo, now + ms(120)); - assert!(queue.is_woken()); - - assert_pending!(poll!(queue)); - - sleep_until(now + Duration::from_millis(119)).await; - assert!(!queue.is_woken()); - - sleep(ms(1)).await; - assert!(queue.is_woken()); - - let entry = assert_ready_some!(poll!(queue)).into_inner(); - assert_eq!(entry, "foo"); -} - #[tokio::test] async fn reset_inserted_expired() { time::pause(); @@ -584,43 +539,6 @@ async fn reset_inserted_expired() { assert_eq!(queue.len(), 0); } -#[tokio::test] -async fn reset_earlier_after_slot_starts() { - time::pause(); - - let mut queue = task::spawn(DelayQueue::new()); - - let now = Instant::now(); - - let foo = queue.insert_at("foo", now + ms(200)); - - assert_pending!(poll!(queue)); - - sleep_until(now + Duration::from_millis(80)).await; - - assert!(!queue.is_woken()); - - // At this point the queue hasn't been polled, so `elapsed` on the wheel - // for the queue is still at 0 and hence the 1ms resolution slots cover - // [0-64). Resetting the time on the entry to 120 causes it to get put in - // the [64-128) slot. As the queue knows that the first entry is within - // that slot, but doesn't know when, it must wake immediately to advance - // the wheel. - queue.reset_at(&foo, now + ms(120)); - assert!(queue.is_woken()); - - assert_pending!(poll!(queue)); - - sleep_until(now + Duration::from_millis(119)).await; - assert!(!queue.is_woken()); - - sleep(ms(1)).await; - assert!(queue.is_woken()); - - let entry = assert_ready_some!(poll!(queue)).into_inner(); - assert_eq!(entry, "foo"); -} - #[tokio::test] async fn insert_in_past_after_poll_fires_immediately() { time::pause(); From cf7211a1dfc77d218d15d7d54b5016fd68013e52 Mon Sep 17 00:00:00 2001 From: Qi Date: Sat, 19 Jul 2025 21:16:32 +0800 Subject: [PATCH 014/100] fix unused import `wake_list::WakeList` Signed-off-by: ADD-SP --- tokio/src/util/mod.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/tokio/src/util/mod.rs b/tokio/src/util/mod.rs index eeddd0af2e8..3988d03047e 100644 --- a/tokio/src/util/mod.rs +++ b/tokio/src/util/mod.rs @@ -30,8 +30,6 @@ pub(crate) use wake::{waker, Wake}; // rt and signal use `Notify`, which requires `WakeList`. feature = "rt", feature = "signal", - // time driver uses `WakeList` in `Handle::process_at_time`. - feature = "time", ))] mod wake_list; #[cfg(any( @@ -41,7 +39,6 @@ mod wake_list; feature = "fs", feature = "rt", feature = "signal", - feature = "time", ))] pub(crate) use wake_list::WakeList; From 8ac5933c2d80e65d7853ca21ac89585923a26376 Mon Sep 17 00:00:00 2001 From: Qi Date: Sat, 2 Aug 2025 14:03:53 +0800 Subject: [PATCH 015/100] re-enable loom tests for timers Signed-off-by: ADD-SP --- tokio/src/runtime/time/mod.rs | 4 +- tokio/src/runtime/time/tests/mod.rs | 232 +++++++++++++--------------- 2 files changed, 111 insertions(+), 125 deletions(-) diff --git a/tokio/src/runtime/time/mod.rs b/tokio/src/runtime/time/mod.rs index a04f9656a46..1a98b150a86 100644 --- a/tokio/src/runtime/time/mod.rs +++ b/tokio/src/runtime/time/mod.rs @@ -134,5 +134,5 @@ impl Driver { } } -// #[cfg(test)] -// mod tests; +#[cfg(test)] +mod tests; diff --git a/tokio/src/runtime/time/tests/mod.rs b/tokio/src/runtime/time/tests/mod.rs index 33c4a5366d1..1038a182b8e 100644 --- a/tokio/src/runtime/time/tests/mod.rs +++ b/tokio/src/runtime/time/tests/mod.rs @@ -1,15 +1,17 @@ #![cfg(not(target_os = "wasi"))] +use std::future::poll_fn; use std::{task::Context, time::Duration}; #[cfg(not(loom))] use futures::task::noop_waker_ref; -use crate::loom::sync::atomic::{AtomicBool, Ordering}; -use crate::loom::sync::Arc; use crate::loom::thread; +use crate::runtime::time::timer::with_current_wheel; +use crate::runtime::Handle; +use crate::sync::oneshot; -use super::TimerEntry; +use super::Timer; fn block_on(f: impl std::future::Future) -> T { #[cfg(loom)] @@ -21,6 +23,7 @@ fn block_on(f: impl std::future::Future) -> T { .build() .unwrap(); rt.block_on(f) + // futures::executor::block_on(f) } } @@ -32,9 +35,43 @@ fn model(f: impl Fn() + Send + Sync + 'static) { f(); } +async fn fire_all_timers(handle: &Handle, exit_rx: oneshot::Receiver<()>) { + loop { + // Keep the worker thread busy, so that it can process injected + // timers. + crate::task::yield_now().await; + if !exit_rx.is_empty() { + // break the loop if the thread is exiting + break; + } + + // In the `block_on` context, we can get the current wheel + // fire all timers. + with_current_wheel(&handle.inner, |maybe_wheel| { + let (wheel, _tx) = maybe_wheel.unwrap(); + let time = handle.inner.driver().time(); + time.process_at_time(wheel, u64::MAX); // 2 seconds + }); + + thread::yield_now(); + eprintln!("yielding to allow timers to fire"); + } +} + +// This function must be called inside the `rt.block_on`. +fn process_at_time(handle: &Handle, at: u64) { + let handle = &handle.inner; + with_current_wheel(handle, |maybe_wheel| { + let (wheel, _tx) = maybe_wheel.unwrap(); + let time = handle.driver().time(); + time.process_at_time(wheel, at); + }); +} + fn rt(start_paused: bool) -> crate::runtime::Runtime { crate::runtime::Builder::new_current_thread() .enable_time() + .event_interval(1) .start_paused(start_paused) .build() .unwrap() @@ -45,25 +82,23 @@ fn single_timer() { model(|| { let rt = rt(false); let handle = rt.handle(); + let (exit_tx, exit_rx) = oneshot::channel(); let handle_ = handle.clone(); let jh = thread::spawn(move || { - let entry = TimerEntry::new( + let entry = Timer::new( handle_.inner.clone(), handle_.inner.driver().clock().now() + Duration::from_secs(1), ); pin!(entry); - block_on(std::future::poll_fn(|cx| entry.as_mut().poll_elapsed(cx))).unwrap(); + block_on(poll_fn(|cx| entry.as_mut().poll_elapsed(cx))); + exit_tx.send(()).unwrap(); }); - thread::yield_now(); - - let time = handle.inner.driver().time(); - let clock = handle.inner.driver().clock(); - - // advance 2s - time.process_at_time(time.time_source().now(clock) + 2_000_000_000); + rt.block_on(async move { + fire_all_timers(handle, exit_rx).await; + }); jh.join().unwrap(); }) @@ -74,10 +109,11 @@ fn drop_timer() { model(|| { let rt = rt(false); let handle = rt.handle(); + let (exit_tx, exit_rx) = oneshot::channel(); let handle_ = handle.clone(); let jh = thread::spawn(move || { - let entry = TimerEntry::new( + let entry = Timer::new( handle_.inner.clone(), handle_.inner.driver().clock().now() + Duration::from_secs(1), ); @@ -89,15 +125,12 @@ fn drop_timer() { let _ = entry .as_mut() .poll_elapsed(&mut Context::from_waker(futures::task::noop_waker_ref())); + exit_tx.send(()).unwrap(); }); - thread::yield_now(); - - let time = handle.inner.driver().time(); - let clock = handle.inner.driver().clock(); - - // advance 2s in the future. - time.process_at_time(time.time_source().now(clock) + 2_000_000_000); + rt.block_on(async move { + fire_all_timers(handle, exit_rx).await; + }); jh.join().unwrap(); }) @@ -108,10 +141,12 @@ fn change_waker() { model(|| { let rt = rt(false); let handle = rt.handle(); + let (exit_tx, exit_rx) = oneshot::channel(); + let (change_waker_tx, change_waker_rx) = oneshot::channel(); let handle_ = handle.clone(); let jh = thread::spawn(move || { - let entry = TimerEntry::new( + let entry = Timer::new( handle_.inner.clone(), handle_.inner.driver().clock().now() + Duration::from_secs(1), ); @@ -121,70 +156,33 @@ fn change_waker() { .as_mut() .poll_elapsed(&mut Context::from_waker(futures::task::noop_waker_ref())); - block_on(std::future::poll_fn(|cx| entry.as_mut().poll_elapsed(cx))).unwrap(); + // At this point, we cannot let worker thread to wake up + // the timer because the waker is a noop. + // Let's say the timer has been woken up at this point, + // the following poll is basically polling a future that has completed + // (already returned `Ready`),which is not encouraged. + + let mut maybe_change_waker_tx = Some(change_waker_tx); + block_on(poll_fn(|cx| { + let p = entry.as_mut().poll_elapsed(cx); + if let Some(tx) = maybe_change_waker_tx.take() { + // notify the worker thread that the waker is useable now + tx.send(()).unwrap(); + } + p + })); + + // notify the worker thread to exit + exit_tx.send(()).unwrap(); }); - thread::yield_now(); - - let time = handle.inner.driver().time(); - let clock = handle.inner.driver().clock(); - - // advance 2s - time.process_at_time(time.time_source().now(clock) + 2_000_000_000); + change_waker_rx.blocking_recv().unwrap(); - jh.join().unwrap(); - }) -} - -#[test] -fn reset_future() { - model(|| { - let finished_early = Arc::new(AtomicBool::new(false)); - - let rt = rt(false); - let handle = rt.handle(); - - let handle_ = handle.clone(); - let finished_early_ = finished_early.clone(); - let start = handle.inner.driver().clock().now(); - - let jh = thread::spawn(move || { - let entry = TimerEntry::new(handle_.inner.clone(), start + Duration::from_secs(1)); - pin!(entry); - - let _ = entry - .as_mut() - .poll_elapsed(&mut Context::from_waker(futures::task::noop_waker_ref())); - - entry.as_mut().reset(start + Duration::from_secs(2), true); - - // shouldn't complete before 2s - block_on(std::future::poll_fn(|cx| entry.as_mut().poll_elapsed(cx))).unwrap(); - - finished_early_.store(true, Ordering::Relaxed); + rt.block_on(async move { + fire_all_timers(handle, exit_rx).await; }); - thread::yield_now(); - - let handle = handle.inner.driver().time(); - - handle.process_at_time( - handle - .time_source() - .instant_to_tick(start + Duration::from_millis(1500)), - ); - - assert!(!finished_early.load(Ordering::Relaxed)); - - handle.process_at_time( - handle - .time_source() - .instant_to_tick(start + Duration::from_millis(2500)), - ); - jh.join().unwrap(); - - assert!(finished_early.load(Ordering::Relaxed)); }) } @@ -205,31 +203,33 @@ fn poll_process_levels() { let mut entries = vec![]; - for i in 0..normal_or_miri(1024, 64) { - let mut entry = Box::pin(TimerEntry::new( - handle.inner.clone(), - handle.inner.driver().clock().now() + Duration::from_millis(i), - )); + rt.block_on(async { + for i in 0..normal_or_miri(1024, 64) { + let mut entry = Box::pin(Timer::new( + handle.inner.clone(), + handle.inner.driver().clock().now() + Duration::from_millis(i), + )); - let _ = entry - .as_mut() - .poll_elapsed(&mut Context::from_waker(noop_waker_ref())); + let _ = entry + .as_mut() + .poll_elapsed(&mut Context::from_waker(noop_waker_ref())); - entries.push(entry); - } + entries.push(entry); + } - for t in 1..normal_or_miri(1024, 64) { - handle.inner.driver().time().process_at_time(t as u64); + for t in 1..normal_or_miri(1024, 64) { + process_at_time(handle, t); - for (deadline, future) in entries.iter_mut().enumerate() { - let mut context = Context::from_waker(noop_waker_ref()); - if deadline <= t { - assert!(future.as_mut().poll_elapsed(&mut context).is_ready()); - } else { - assert!(future.as_mut().poll_elapsed(&mut context).is_pending()); + for (deadline, future) in entries.iter_mut().enumerate() { + let mut context = Context::from_waker(noop_waker_ref()); + if deadline <= t as usize { + assert!(future.as_mut().poll_elapsed(&mut context).is_ready()); + } else { + assert!(future.as_mut().poll_elapsed(&mut context).is_pending()); + } } } - } + }); } #[test] @@ -240,30 +240,16 @@ fn poll_process_levels_targeted() { let rt = rt(true); let handle = rt.handle(); - let e1 = TimerEntry::new( - handle.inner.clone(), - handle.inner.driver().clock().now() + Duration::from_millis(193), - ); - pin!(e1); - - let handle = handle.inner.driver().time(); - - handle.process_at_time(62); - assert!(e1.as_mut().poll_elapsed(&mut context).is_pending()); - handle.process_at_time(192); - handle.process_at_time(192); -} - -#[test] -#[cfg(not(loom))] -fn instant_to_tick_max() { - use crate::runtime::time::entry::MAX_SAFE_MILLIS_DURATION; - - let rt = rt(true); - let handle = rt.handle().inner.driver().time(); - - let start_time = handle.time_source.start_time(); - let long_future = start_time + std::time::Duration::from_millis(MAX_SAFE_MILLIS_DURATION + 1); + rt.block_on(async { + let e1 = Timer::new( + handle.inner.clone(), + handle.inner.driver().clock().now() + Duration::from_millis(193), + ); + pin!(e1); - assert!(handle.time_source.instant_to_tick(long_future) <= MAX_SAFE_MILLIS_DURATION); + process_at_time(handle, 62); + assert!(e1.as_mut().poll_elapsed(&mut context).is_pending()); + process_at_time(handle, 192); + process_at_time(handle, 192); + }) } From ca49b995aed98a6e42139109fca55f830f06c6e0 Mon Sep 17 00:00:00 2001 From: Qi Date: Tue, 5 Aug 2025 00:09:41 +0800 Subject: [PATCH 016/100] fix race conditions while operating on `EntryHandle` Signed-off-by: ADD-SP --- .../runtime/scheduler/multi_thread/worker.rs | 6 +- tokio/src/runtime/scheduler/util.rs | 8 +- tokio/src/runtime/time/mod.rs | 2 +- tokio/src/runtime/time/wheel/entry.rs | 319 ++++++++++-------- tokio/src/runtime/time/wheel/level.rs | 4 +- tokio/src/runtime/time/wheel/mod.rs | 106 +++--- 6 files changed, 239 insertions(+), 206 deletions(-) diff --git a/tokio/src/runtime/scheduler/multi_thread/worker.rs b/tokio/src/runtime/scheduler/multi_thread/worker.rs index 4d50fc54715..b96c87ea5fe 100644 --- a/tokio/src/runtime/scheduler/multi_thread/worker.rs +++ b/tokio/src/runtime/scheduler/multi_thread/worker.rs @@ -888,7 +888,7 @@ impl Context { } } - pub(in crate::runtime) fn with_wheel(&self, f: F) -> R + pub(crate) fn with_wheel(&self, f: F) -> R where F: FnOnce(Option<(&mut Wheel, mpsc::Sender)>) -> R, { @@ -1241,7 +1241,7 @@ impl Handle { cfg_time! { /// Push a timer handle from the remote thread. - pub(in crate::runtime) fn push_remote_timer(&self, hdl: EntryHandle) { + pub(crate) fn push_remote_timer(&self, hdl: EntryHandle) { { let mut synced = self.shared.synced.lock(); synced.inject_timers.push(hdl); @@ -1249,7 +1249,7 @@ impl Handle { self.notify_parked_remote(); } - pub(in crate::runtime) fn take_remote_timers(&self) -> Vec { + pub(crate) fn take_remote_timers(&self) -> Vec { let mut synced = self.shared.synced.lock(); std::mem::take(&mut synced.inject_timers) } diff --git a/tokio/src/runtime/scheduler/util.rs b/tokio/src/runtime/scheduler/util.rs index 66ad6e6651a..339f48ecff8 100644 --- a/tokio/src/runtime/scheduler/util.rs +++ b/tokio/src/runtime/scheduler/util.rs @@ -10,15 +10,17 @@ cfg_rt_and_time! { tx: mpsc::Sender, inject: Vec, ) -> bool { + use crate::runtime::time::Insert; let mut fired = false; // process injected timers for hdl in inject { - unsafe { - if !wheel.insert(hdl.clone(), tx.clone()) { - // timer is already elapsed, wake it up + match unsafe { wheel.insert(hdl.clone(), tx.clone()) } { + Insert::Success => {} + Insert::Elapsed => { hdl.wake_unregistered(); fired = true; } + Insert::Cancelling => {} } } diff --git a/tokio/src/runtime/time/mod.rs b/tokio/src/runtime/time/mod.rs index 1a98b150a86..74eeda11530 100644 --- a/tokio/src/runtime/time/mod.rs +++ b/tokio/src/runtime/time/mod.rs @@ -17,7 +17,7 @@ pub(crate) use source::TimeSource; mod wheel; cfg_rt_and_time! { - pub(crate) use wheel::EntryHandle; + pub(crate) use wheel::{Insert, EntryHandle}; } cfg_rt_or_time! { pub(crate) use wheel::Wheel; diff --git a/tokio/src/runtime/time/wheel/entry.rs b/tokio/src/runtime/time/wheel/entry.rs index abdf1df6838..ec18df012c2 100644 --- a/tokio/src/runtime/time/wheel/entry.rs +++ b/tokio/src/runtime/time/wheel/entry.rs @@ -1,5 +1,6 @@ +use crate::loom::cell::UnsafeCell; use crate::loom::sync::atomic::{AtomicU8, Ordering::*}; -use crate::loom::sync::{Arc, Mutex}; +use crate::loom::sync::Arc; use crate::{sync::AtomicWaker, util::linked_list}; use std::ptr::NonNull; use std::sync::mpsc::Sender; @@ -10,6 +11,10 @@ pub(crate) type EntryList = linked_list::LinkedList; /// A pure new entry, no any changes to the state. const STATE_UNREGISTERED: u8 = 0; +/// The entry is being registered to the timer wheel, +/// and also saving the `cancel_tx` to the entry. +const STATE_BUSY_REGISTERING: u8 = 1; + /// The entry is registered to the timer wheel, /// but not in the pending queue of the timer wheel. const STATE_REGISTERED: u8 = 2; @@ -24,8 +29,16 @@ const STATE_PENDING: u8 = 3; /// the entry is reached its deadline and woken up. const STATE_WOKEN_UP: u8 = 4; +/// The [`Handle`] has been sent to the [`mpsc`] channel. +/// +/// [`mpsc`]: std::sync::mpsc +const STATE_CANCELLING: u8 = 5; + #[derive(Debug)] -struct Inner { +pub(crate) struct Entry { + /// The pointers used by the intrusive linked list. + pointers: linked_list::Pointers, + /// The tick when this entry is scheduled to expire. deadline: u64, @@ -33,20 +46,18 @@ struct Inner { waker: AtomicWaker, /// The mpsc channel used to cancel the entry. - // Since the contention is very unlikely, we use `Mutex` here - // for lower complexity. - cancel_tx: Mutex>>, + // Since `mpsc::Sender` doesn't have `Drop` implementation, + // we don't need to `drop_in_place` it when the entry is dropped. + cancel_tx: UnsafeCell>>, state: AtomicU8, } -/// The entry in the timer wheel. -pub(crate) struct Entry { - /// The pointers used by the intrusive linked list. - pointers: linked_list::Pointers, - - inner: Arc, -} +// Safety: +// +// * Caller guarantees the `Self::pointers` is used correctly. +// * AND `Self::cancel_tx` is protected by `Self::state`. +unsafe impl Sync for Entry {} generate_addr_of_methods! { impl<> Entry { @@ -56,16 +67,19 @@ generate_addr_of_methods! { } } +// Safety: `Entry` is always in an `Arc`. unsafe impl linked_list::Link for Entry { - type Handle = RawHandle; + type Handle = Handle; type Target = Entry; fn as_raw(hdl: &Self::Handle) -> NonNull { - hdl.ptr + unsafe { NonNull::new_unchecked(Arc::as_ptr(&hdl.entry).cast_mut()) } } unsafe fn from_raw(ptr: NonNull) -> Self::Handle { - RawHandle { ptr } + Handle { + entry: Arc::from_raw(ptr.as_ptr()), + } } unsafe fn pointers( @@ -75,168 +89,209 @@ unsafe impl linked_list::Link for Entry { } } -/// Raw handle used by the intrusive linked list. -// It makes no sense to `Arc::clone()` the `Inner` -// while operating on the linked list, -// so we only use a raw pointer here. -pub(crate) struct RawHandle { - ptr: NonNull, -} - -impl RawHandle { - /// # Safety - /// - /// [`Self::ptr`] must be a valid pointer to an [`Entry`]. - pub(crate) unsafe fn upgrade(self) -> Handle { - let inner = Arc::clone(&self.ptr.as_ref().inner); - Handle { - ptr: self.ptr, - inner, - } - } -} - #[derive(Debug, Clone)] pub(crate) struct Handle { - /// A pointer to the entry in the timer wheel. - ptr: NonNull, - - inner: Arc, + entry: Arc, } -/// Safety: -/// -/// 1. [`Self::inner`] is clearly [`Send`]. -/// 2. AND caller guarantees that the [`Self::drop_entry`] is only called -/// when the entry is no longer in the timer wheel and still valid. -unsafe impl Send for Handle {} - -/// Safety: -/// -/// 1. [`Self::inner`] is clearly [`Sync`]. -/// 2. AND caller guarantees that the [`Self::drop_entry`] is only called -/// when the entry is no longer in the timer wheel and still valid. -unsafe impl Sync for Handle {} +impl From for NonNull { + fn from(handle: Handle) -> NonNull { + let ptr = Arc::as_ptr(&handle.entry); + unsafe { NonNull::new_unchecked(ptr.cast_mut()) } + } +} impl Handle { pub(crate) fn new(deadline: u64, waker: &Waker) -> Self { - let inner = Arc::new(Inner { + let entry = Arc::new(Entry { + pointers: linked_list::Pointers::new(), deadline, waker: AtomicWaker::new(), - cancel_tx: Mutex::new(None), + cancel_tx: UnsafeCell::new(None), state: AtomicU8::new(STATE_UNREGISTERED), }); - inner.waker.register_by_ref(waker); + entry.waker.register_by_ref(waker); - let ptr = Box::into_raw(Box::new(Entry { - pointers: linked_list::Pointers::new(), - inner: Arc::clone(&inner), - })); - // Safety: `Box::into_raw` always returns a valid pointer - let ptr = unsafe { NonNull::new_unchecked(ptr) }; - - Handle { ptr, inner } + Handle { entry } } /// Wake the entry if it is already in the pending queue of the timer wheel. - /// - /// # Panic - /// - /// Panics if the entry is not transitioned to the pending state. pub(crate) fn wake(&self) { - let old = self.inner.state.swap(STATE_WOKEN_UP, SeqCst); - assert!(old == STATE_PENDING); - self.inner.waker.wake(); + match self + .entry + .state + .compare_exchange(STATE_PENDING, STATE_WOKEN_UP, SeqCst, SeqCst) + { + Ok(_) => self.entry.waker.wake(), + Err(STATE_UNREGISTERED) => { + panic!("entry is not registered, please call `wake_unregistered` instead") + } + Err(STATE_BUSY_REGISTERING) => { + panic!("should be be called concurrently with `transition_to_registered`") + } + Err(STATE_REGISTERED) => panic!("should not be called on non-pending entry"), + Err(STATE_WOKEN_UP) => panic!("should not be called on woken up entry"), + Err(STATE_CANCELLING) => (), // no need to wake up cancelling entries + Err(actual) => panic!("state is corrupted ({actual})"), + } } /// Wake the entry if it has already elapsed before registering to the timer wheel. - /// - /// # Panic - /// - /// Panics if the entry is not in the unregistered state. pub(crate) fn wake_unregistered(&self) { - let old = self.inner.state.swap(STATE_WOKEN_UP, SeqCst); - assert!(old == STATE_UNREGISTERED); - self.inner.waker.wake(); + match self + .entry + .state + .compare_exchange(STATE_UNREGISTERED, STATE_WOKEN_UP, SeqCst, SeqCst) + { + Ok(_) => self.entry.waker.wake(), + Err(STATE_REGISTERED) => { + panic!("entry is already registered, please call `wake` instead") + } + Err(STATE_BUSY_REGISTERING) => { + panic!("should be be called concurrently with `transition_to_registered`") + } + Err(STATE_PENDING) => { + panic!("entry is already pending, please call `wake` instead") + } + Err(STATE_WOKEN_UP) => panic!("entry is already woken up"), + Err(STATE_CANCELLING) => (), // no need to wake up cancelling entries + Err(actual) => panic!("state is corrupted ({actual})"), + } } pub(crate) fn register_waker(&self, waker: &Waker) { - self.inner.waker.register_by_ref(waker); + self.entry.waker.register_by_ref(waker); } - /// # Panic - /// - /// Panics if the entry is not in the unregistered state. - pub(crate) fn transition_to_registered(&self, cancel_tx: Sender) { - { - let mut maybe_tx = self.inner.cancel_tx.lock(); - assert!(maybe_tx.is_none(), "cancel sender already set"); - *maybe_tx = Some(cancel_tx); - // lock is dropped here + pub(crate) fn transition_to_registered( + &self, + cancel_tx: Sender, + ) -> TransitionToRegistered { + match self.entry.state.compare_exchange( + STATE_UNREGISTERED, + STATE_BUSY_REGISTERING, + SeqCst, + SeqCst, + ) { + Ok(_) => (), // successfully locked the `self.cancel_tx` + Err(STATE_BUSY_REGISTERING) => panic!("should not be called concurrently"), + Err(STATE_REGISTERED) => panic!("should not be called twice"), + Err(STATE_PENDING) => panic!("entry is already pending, cannot register again"), + Err(STATE_WOKEN_UP) => panic!("already woken up, cannot register again"), + Err(STATE_CANCELLING) => return TransitionToRegistered::Cancelling, + Err(actual) => panic!("state is corrupted ({actual})"), + } + + self.entry.cancel_tx.with_mut(|tx| { + // Safety: we have claimed the `STATE_BUSY_REGISTERING` state + let tx = unsafe { tx.as_mut().unwrap_unchecked() }; + assert!(tx.replace(cancel_tx).is_none(), "duplicate registration"); + }); + + match self.entry.state.compare_exchange( + STATE_BUSY_REGISTERING, + STATE_REGISTERED, + SeqCst, + SeqCst, + ) { + Ok(_) => TransitionToRegistered::Success, + Err(actual) => panic!("state is corrupted ({actual})"), } - let old = self.inner.state.swap(STATE_REGISTERED, SeqCst); - assert_eq!(old, STATE_UNREGISTERED, "Entry not unregistered"); } - /// # Panic - /// - /// Panics if the entry is not in the registered state. - pub(crate) fn transition_to_pending(&self, not_after: u64) -> Result<(), u64> { - if self.inner.deadline > not_after { - return Err(self.inner.deadline); + pub(crate) fn transition_to_pending(&self, not_after: u64) -> TransitionToPending { + if self.entry.deadline > not_after { + return TransitionToPending::NotElapsed(self.entry.deadline); } - let old = self.inner.state.swap(STATE_PENDING, SeqCst); - assert_eq!(old, STATE_REGISTERED, "Entry not registered"); - Ok(()) - } - - /// # Panic - /// - /// Panics if receiver side is closed, this is usually caused by - /// the shutdown logic dropping the receiver side too early. - pub(crate) fn cancel(&self) { - let state = self.inner.state.fetch_or(0, SeqCst); - if state & STATE_REGISTERED != 0 { - let maybe_tx = { - let mut lock = self.inner.cancel_tx.lock(); - lock.take() - // lock is dropped here to avoid poisoning the Mutex - }; - if let Some(tx) = maybe_tx { - tx.send(self.clone()) - .expect("cancel sender should not be closed"); + match self + .entry + .state + .compare_exchange(STATE_REGISTERED, STATE_PENDING, SeqCst, SeqCst) + { + Ok(_) => TransitionToPending::Success, + Err(STATE_UNREGISTERED) => panic!("should not be called on unregistered entry"), + Err(STATE_BUSY_REGISTERING) => { + panic!("should not be called concurrently with `transition_to_registered`") } + Err(STATE_PENDING) => panic!("should not be called twice"), + Err(STATE_WOKEN_UP) => panic!("should not be called on woken up entry"), + Err(STATE_CANCELLING) => TransitionToPending::Cancelling, + Err(actual) => panic!("state is corrupted ({actual})"), } } + pub(crate) fn transition_to_cancelling(&self) { + loop { + match self.entry.state.compare_exchange( + STATE_REGISTERED, + STATE_CANCELLING, + SeqCst, + SeqCst, + ) { + Ok(_) => break, + Err(STATE_UNREGISTERED) => return, // no need to cancel unregistered entries. + Err(STATE_BUSY_REGISTERING) => { + // Entry is being registered, wait for it to finish. + std::hint::spin_loop(); + continue; + } + Err(STATE_PENDING) => return, // no need to cancel pending entries + Err(STATE_WOKEN_UP) => return, // no need to cancel woken up entries + Err(STATE_CANCELLING) => panic!("should not be called twice"), + Err(actual) => panic!("state is corrupted ({actual})"), + } + } + self.entry.cancel_tx.with_mut(|tx| { + // Safety: Since previous state is `STATE_REGISTERED`, + // this is synchronized with the `transition_to_registered` call, + // and the `cancel_tx` should be already stored. + let tx = unsafe { tx.as_mut().unwrap_unchecked() }; + tx.take() + .unwrap() + .send(self.clone()) + .expect("receiver side is closed"); + }); + } + pub(crate) fn deadline(&self) -> u64 { - self.inner.deadline + self.entry.deadline } pub(crate) fn is_registered(&self) -> bool { - self.inner.state.fetch_or(0, SeqCst) == STATE_REGISTERED + self.entry.state.fetch_or(0, SeqCst) == STATE_REGISTERED } pub(crate) fn is_pending(&self) -> bool { - self.inner.state.fetch_or(0, SeqCst) == STATE_PENDING + self.entry.state.fetch_or(0, SeqCst) == STATE_PENDING } pub(crate) fn is_woken_up(&self) -> bool { - self.inner.state.fetch_or(0, SeqCst) == STATE_WOKEN_UP + self.entry.state.fetch_or(0, SeqCst) == STATE_WOKEN_UP } +} - pub(crate) fn as_raw(&self) -> RawHandle { - RawHandle { ptr: self.ptr } - } +/// An error returned when trying to transition +/// an being cancelled entry to the registered state. +pub(crate) enum TransitionToRegistered { + /// The entry is being cancelled, no need to register it. + Success, - pub(crate) fn as_entry_ptr(&self) -> NonNull { - self.ptr - } + /// The entry is being cancelled, + /// no need to transition it to the registered state. + Cancelling, +} - /// # Safety - /// - /// [`Self::ptr`] must be a valid pointer to an [`Entry`]. - pub(crate) unsafe fn drop_entry(&self) { - drop(Box::from_raw(self.ptr.as_ptr())); - } +/// An result of the `transition_to_pending` method. +pub(crate) enum TransitionToPending { + /// The entry was successfully transitioned + /// to the pending state. + Success, + + /// The entry doesn't reached its deadline yet, + /// and the tick when it should be woken up is returned. + NotElapsed(u64), + + /// The entry is being cancelled, + /// no need to transition it to the pending state. + Cancelling, } diff --git a/tokio/src/runtime/time/wheel/level.rs b/tokio/src/runtime/time/wheel/level.rs index e9a02801c49..cea17a6fb8b 100644 --- a/tokio/src/runtime/time/wheel/level.rs +++ b/tokio/src/runtime/time/wheel/level.rs @@ -123,7 +123,7 @@ impl Level { let deadline = hdl.deadline(); let slot = slot_for(deadline, self.level); - self.slot[slot].push_front(hdl.as_raw()); + self.slot[slot].push_front(hdl); self.occupied |= occupied_bit(slot); } @@ -131,7 +131,7 @@ impl Level { pub(crate) unsafe fn remove_entry(&mut self, hdl: EntryHandle) { let slot = slot_for(hdl.deadline(), self.level); - unsafe { self.slot[slot].remove(hdl.as_entry_ptr()) }; + unsafe { self.slot[slot].remove(hdl.into()) }; if self.slot[slot].is_empty() { // The bit is currently set debug_assert!(self.occupied & occupied_bit(slot) != 0); diff --git a/tokio/src/runtime/time/wheel/mod.rs b/tokio/src/runtime/time/wheel/mod.rs index 8beb7dff6e8..e7cc5d5cb5e 100644 --- a/tokio/src/runtime/time/wheel/mod.rs +++ b/tokio/src/runtime/time/wheel/mod.rs @@ -5,6 +5,8 @@ use self::level::Level; mod entry; use entry::EntryList; pub(crate) use entry::Handle as EntryHandle; +use entry::TransitionToPending; +use entry::TransitionToRegistered; use std::{array, sync::mpsc}; @@ -80,39 +82,38 @@ impl Wheel { /// /// The caller must ensure: /// - /// * The associated entry is valid. - /// * AND the entry is not already registered in the wheel. + /// * The entry is not already registered in ANY wheel. pub(crate) unsafe fn insert( &mut self, hdl: EntryHandle, cancel_tx: mpsc::Sender, - ) -> bool { + ) -> Insert { let deadline = hdl.deadline(); if deadline <= self.elapsed { - // Safety: caller guarantees that the entry is valid. - unsafe { - hdl.drop_entry(); - } - return false; + return Insert::Elapsed; } // Get the level at which the entry should be stored let level = self.level_for(deadline); - hdl.transition_to_registered(cancel_tx); - unsafe { - self.levels[level].add_entry(hdl); - } + match hdl.transition_to_registered(cancel_tx) { + TransitionToRegistered::Success => { + unsafe { + self.levels[level].add_entry(hdl); + } - debug_assert!({ - self.levels[level] - .next_expiration(self.elapsed) - .map(|e| e.deadline >= self.elapsed) - .unwrap_or(true) - }); + debug_assert!({ + self.levels[level] + .next_expiration(self.elapsed) + .map(|e| e.deadline >= self.elapsed) + .unwrap_or(true) + }); - true + Insert::Success + } + TransitionToRegistered::Cancelling => Insert::Cancelling, + } } /// Removes `item` from the timing wheel. @@ -121,16 +122,10 @@ impl Wheel { /// /// The caller must ensure: /// - /// * The associated entry is valid. - /// * AND the entry is already registered in the wheel. + /// * The entry is already registered in THIS wheel. pub(crate) unsafe fn remove(&mut self, hdl: EntryHandle) { if hdl.is_pending() { - self.pending.remove(hdl.as_entry_ptr()); - // Safety: the entry is still valid as it was just popped - // from the pending list. - unsafe { - hdl.drop_entry(); - } + self.pending.remove(hdl.into()); } else { let deadline = hdl.deadline(); debug_assert!( @@ -142,26 +137,13 @@ impl Wheel { let level = self.level_for(deadline); self.levels[level].remove_entry(hdl.clone()); - // Safety: the entry is still valid as it was just popped - // from the pending list. - unsafe { - hdl.drop_entry(); - } } } /// Advances the timer up to the instant represented by `now`. pub(crate) fn poll(&mut self, now: u64) -> Option { loop { - if let Some(raw_hdl) = self.pending.pop_back() { - // Safety: the entry is still valid as it was just popped - // from the pending list. - let hdl = unsafe { raw_hdl.upgrade() }; - // Safety: the entry is still valid as it was just popped - // from the pending list. - unsafe { - hdl.drop_entry(); - } + if let Some(hdl) = self.pending.pop_back() { return Some(hdl); } @@ -182,17 +164,7 @@ impl Wheel { } } - self.pending.pop_back().map(|raw_hdl| { - // Safety: the entry is still valid as it was just popped - // from the pending list. - let hdl = unsafe { raw_hdl.upgrade() }; - // Safety: the entry is still valid as it was just popped - // from the pending list. - unsafe { - hdl.drop_entry(); - } - hdl - }) + self.pending.pop_back() } /// Returns the instant at which the next timeout expires. @@ -258,28 +230,20 @@ impl Wheel { // those entries again or we'll end up in an infinite loop. let mut entries = self.take_entries(expiration); - while let Some(raw_hdl) = entries.pop_back() { - // Safety: the entry is still valid as it was just popped - // from the list - let hdl = unsafe { raw_hdl.upgrade() }; - + while let Some(hdl) = entries.pop_back() { if expiration.level == 0 { debug_assert_eq!(hdl.deadline(), expiration.deadline); } - // Try to expire the entry; this is cheap (doesn't synchronize) if - // the timer is not expired, and updates registered_when. match hdl.transition_to_pending(expiration.deadline) { - Ok(()) => { - // Item was expired - self.pending.push_front(hdl.as_raw()); - } - Err(expiration_tick) => { - let level = level_for(expiration.deadline, expiration_tick); + TransitionToPending::Success => self.pending.push_front(hdl), + TransitionToPending::NotElapsed(when) => { + let level = level_for(expiration.deadline, when); unsafe { self.levels[level].add_entry(hdl); } } + TransitionToPending::Cancelling => {} } } } @@ -325,6 +289,18 @@ fn level_for(elapsed: u64, when: u64) -> usize { significant / NUM_LEVELS } +pub(crate) enum Insert { + /// The entry was successfully inserted. + Success, + + /// The entry has already expired, in this case, + /// the entry is not inserted into the wheel. + Elapsed, + + /// The entry is being cancelled, no need to register it. + Cancelling, +} + #[cfg(all(test, not(loom)))] mod test { use super::*; From fb027a6c493579239af5bc26066e60a0a7e0ab51 Mon Sep 17 00:00:00 2001 From: Qi Date: Sun, 10 Aug 2025 15:00:42 +0800 Subject: [PATCH 017/100] cross-thread cancellation queue Signed-off-by: ADD-SP --- .github/workflows/loom.yml | 2 +- spellcheck.dic | 4 +- .../runtime/scheduler/current_thread/mod.rs | 20 +- .../runtime/scheduler/multi_thread/worker.rs | 20 +- tokio/src/runtime/scheduler/util.rs | 14 +- tokio/src/runtime/time/mod.rs | 1 + tokio/src/runtime/time/tests/mod.rs | 1 - tokio/src/runtime/time/timer.rs | 15 +- .../runtime/time/wheel/cancellation_queue.rs | 254 ++++++++++++++++++ .../time/wheel/cancellation_queue/tests.rs | 85 ++++++ tokio/src/runtime/time/wheel/entry.rs | 50 ++-- tokio/src/runtime/time/wheel/mod.rs | 13 +- 12 files changed, 417 insertions(+), 62 deletions(-) create mode 100644 tokio/src/runtime/time/wheel/cancellation_queue.rs create mode 100644 tokio/src/runtime/time/wheel/cancellation_queue/tests.rs diff --git a/.github/workflows/loom.yml b/.github/workflows/loom.yml index 5efa0aca74b..3cbcfaa2515 100644 --- a/.github/workflows/loom.yml +++ b/.github/workflows/loom.yml @@ -52,7 +52,7 @@ jobs: toolchain: ${{ env.rust_stable }} - uses: Swatinem/rust-cache@v2 - name: run tests - run: cargo test --lib --release --features full -- --nocapture runtime::time::tests + run: cargo test --lib --release --features full -- --nocapture runtime::time working-directory: tokio loom-current-thread: diff --git a/spellcheck.dic b/spellcheck.dic index a0829f934a7..9ff45b691fc 100644 --- a/spellcheck.dic +++ b/spellcheck.dic @@ -1,4 +1,4 @@ -306 +308 & + < @@ -99,6 +99,7 @@ destructors destructure Destructures Dev +Dmitry dns DNS DoS @@ -299,6 +300,7 @@ versa versioned versioning vtable +Vyukov's waker wakers Wakers diff --git a/tokio/src/runtime/scheduler/current_thread/mod.rs b/tokio/src/runtime/scheduler/current_thread/mod.rs index 76060176355..459ebb40eed 100644 --- a/tokio/src/runtime/scheduler/current_thread/mod.rs +++ b/tokio/src/runtime/scheduler/current_thread/mod.rs @@ -24,10 +24,8 @@ use std::{fmt, thread}; cfg_time! { use crate::runtime::scheduler::util; - use crate::runtime::time::{EntryHandle, Wheel}; + use crate::runtime::time::{EntryHandle, Wheel, cancellation_queue}; use crate::loom::sync::Mutex; - - use std::sync::mpsc; } /// Executes tasks on the current thread @@ -76,11 +74,11 @@ struct Core { #[cfg(feature = "time")] /// Channel for sending timers that need to be cancelled - timer_cancel_tx: mpsc::Sender, + timer_cancel_tx: cancellation_queue::Sender, #[cfg(feature = "time")] /// Channel for receiving timers that need to be cancelled - timer_cancel_rx: mpsc::Receiver, + timer_cancel_rx: cancellation_queue::Receiver, /// Runtime driver /// @@ -193,7 +191,7 @@ impl CurrentThread { }); #[cfg(feature = "time")] - let (timer_cancel_tx, timer_cancel_rx) = mpsc::channel(); + let (timer_cancel_tx, timer_cancel_rx) = cancellation_queue::new(); let core = AtomicCell::new(Some(Box::new(Core { tasks: VecDeque::with_capacity(INITIAL_CAPACITY), tick: 0, @@ -314,8 +312,8 @@ fn shutdown2(mut core: Box, handle: &Handle) -> Box { #[cfg(feature = "time")] util::time::shutdown_local_timers( &mut core.wheel, - core.timer_cancel_tx.clone(), - &core.timer_cancel_rx, + &core.timer_cancel_tx, + &mut core.timer_cancel_rx, handle.take_remote_timers(), &handle.driver, ); @@ -474,10 +472,10 @@ impl Context { // otherwise the compiler will complain that the `core` parameter does not need to be mutable // if the 'time' feature is not enabled. let mut core = core; - util::time::remove_cancelled_timers(&mut core.wheel, &core.timer_cancel_rx); + util::time::remove_cancelled_timers(&mut core.wheel, &mut core.timer_cancel_rx); let should_yield = util::time::insert_inject_timers( &mut core.wheel, - core.timer_cancel_tx.clone(), + &core.timer_cancel_tx, handle.take_remote_timers(), ); let next_timer = util::time::next_expiration_time(&core.wheel, &handle.driver); @@ -557,7 +555,7 @@ impl Context { pub(crate) fn with_wheel(&self, f: F) -> R where - F: FnOnce(Option<(&mut Wheel, mpsc::Sender)>) -> R, + F: FnOnce(Option<(&mut Wheel, cancellation_queue::Sender)>) -> R, { self.with_core(|maybe_core| { if let Some(core) = maybe_core { diff --git a/tokio/src/runtime/scheduler/multi_thread/worker.rs b/tokio/src/runtime/scheduler/multi_thread/worker.rs index b96c87ea5fe..1fed740bbb0 100644 --- a/tokio/src/runtime/scheduler/multi_thread/worker.rs +++ b/tokio/src/runtime/scheduler/multi_thread/worker.rs @@ -76,9 +76,7 @@ use std::time::Duration; cfg_time! { use crate::runtime::scheduler::util; - use crate::runtime::time::{EntryHandle, Wheel}; - - use std::sync::mpsc; + use crate::runtime::time::{EntryHandle, Wheel, cancellation_queue}; } mod metrics; @@ -128,11 +126,11 @@ struct Core { #[cfg(feature = "time")] /// Channel for sending timers that need to be cancelled - timer_cancel_tx: mpsc::Sender, + timer_cancel_tx: cancellation_queue::Sender, #[cfg(feature = "time")] /// Channel for receiving timers that need to be cancelled - timer_cancel_rx: mpsc::Receiver, + timer_cancel_rx: cancellation_queue::Receiver, /// True if the worker is currently searching for more work. Searching /// involves attempting to steal from other workers. @@ -280,7 +278,7 @@ pub(super) fn create( let metrics = WorkerMetrics::from_config(&config); let stats = Stats::new(&metrics); #[cfg(feature = "time")] - let (timer_cancel_tx, timer_cancel_rx) = mpsc::channel(); + let (timer_cancel_tx, timer_cancel_rx) = cancellation_queue::new(); cores.push(Box::new(Core { tick: 0, @@ -598,8 +596,8 @@ impl Context { #[cfg(feature = "time")] util::time::shutdown_local_timers( &mut core.wheel, - core.timer_cancel_tx.clone(), - &core.timer_cancel_rx, + &core.timer_cancel_tx, + &mut core.timer_cancel_rx, self.worker.handle.take_remote_timers(), &self.worker.handle.driver, ); @@ -808,10 +806,10 @@ impl Context { let (duration, maybe_advance_duration) = { let handle = &self.worker.handle; - util::time::remove_cancelled_timers(&mut core.wheel, &core.timer_cancel_rx); + util::time::remove_cancelled_timers(&mut core.wheel, &mut core.timer_cancel_rx); let should_yield = util::time::insert_inject_timers( &mut core.wheel, - core.timer_cancel_tx.clone(), + &core.timer_cancel_tx, handle.take_remote_timers(), ); let next_timer = util::time::next_expiration_time(&core.wheel, &handle.driver); @@ -890,7 +888,7 @@ impl Context { pub(crate) fn with_wheel(&self, f: F) -> R where - F: FnOnce(Option<(&mut Wheel, mpsc::Sender)>) -> R, + F: FnOnce(Option<(&mut Wheel, cancellation_queue::Sender)>) -> R, { self.with_core(|core| { if let Some(core) = core { diff --git a/tokio/src/runtime/scheduler/util.rs b/tokio/src/runtime/scheduler/util.rs index 339f48ecff8..306e3292dd3 100644 --- a/tokio/src/runtime/scheduler/util.rs +++ b/tokio/src/runtime/scheduler/util.rs @@ -1,13 +1,12 @@ cfg_rt_and_time! { pub(crate) mod time { use crate::runtime::{scheduler::driver}; - use crate::runtime::time::{EntryHandle, Wheel}; + use crate::runtime::time::{EntryHandle, Wheel, cancellation_queue::{Sender, Receiver}}; use std::time::Duration; - use std::sync::mpsc; pub(crate) fn insert_inject_timers( wheel: &mut Wheel, - tx: mpsc::Sender, + tx: &Sender, inject: Vec, ) -> bool { use crate::runtime::time::Insert; @@ -29,9 +28,10 @@ cfg_rt_and_time! { pub(crate) fn remove_cancelled_timers( wheel: &mut Wheel, - rx: &mpsc::Receiver, + rx: &mut Receiver, ) { - while let Ok(hdl) = rx.try_recv() { + let iter = unsafe { rx.recv_all() }; + for hdl in iter { unsafe { let is_registered = hdl.is_registered(); let is_pending = hdl.is_pending(); @@ -145,8 +145,8 @@ cfg_rt_and_time! { pub(crate) fn shutdown_local_timers( wheel: &mut Wheel, - tx: mpsc::Sender, - rx: &mpsc::Receiver, + tx: &Sender, + rx: &mut Receiver, inject: Vec, drv_hdl: &driver::Handle, ) { diff --git a/tokio/src/runtime/time/mod.rs b/tokio/src/runtime/time/mod.rs index 74eeda11530..b7876b074d5 100644 --- a/tokio/src/runtime/time/mod.rs +++ b/tokio/src/runtime/time/mod.rs @@ -18,6 +18,7 @@ pub(crate) use source::TimeSource; mod wheel; cfg_rt_and_time! { pub(crate) use wheel::{Insert, EntryHandle}; + pub(crate) use wheel::cancellation_queue; } cfg_rt_or_time! { pub(crate) use wheel::Wheel; diff --git a/tokio/src/runtime/time/tests/mod.rs b/tokio/src/runtime/time/tests/mod.rs index 1038a182b8e..d018059ec32 100644 --- a/tokio/src/runtime/time/tests/mod.rs +++ b/tokio/src/runtime/time/tests/mod.rs @@ -54,7 +54,6 @@ async fn fire_all_timers(handle: &Handle, exit_rx: oneshot::Receiver<()>) { }); thread::yield_now(); - eprintln!("yielding to allow timers to fire"); } } diff --git a/tokio/src/runtime/time/timer.rs b/tokio/src/runtime/time/timer.rs index 13bb519965a..03c01b6297d 100644 --- a/tokio/src/runtime/time/timer.rs +++ b/tokio/src/runtime/time/timer.rs @@ -1,12 +1,13 @@ use super::wheel::EntryHandle; use crate::runtime::scheduler::Handle as SchedulerHandle; +use crate::runtime::time::wheel::cancellation_queue::Sender; use crate::runtime::time::wheel::Insert; -use crate::{runtime::time::Wheel, time::Instant, util::error::RUNTIME_SHUTTING_DOWN_ERROR}; -use std::{ - pin::Pin, - sync::mpsc, - task::{Context, Poll}, -}; +use crate::runtime::time::Wheel; +use crate::time::Instant; +use crate::util::error::RUNTIME_SHUTTING_DOWN_ERROR; + +use std::pin::Pin; +use std::task::{Context, Poll}; pub(crate) struct Timer { sched_handle: SchedulerHandle, @@ -94,7 +95,7 @@ impl Timer { pub(super) fn with_current_wheel(hdl: &SchedulerHandle, f: F) -> R where - F: FnOnce(Option<(&mut Wheel, mpsc::Sender)>) -> R, + F: FnOnce(Option<(&mut Wheel, Sender)>) -> R, { #[cfg(not(feature = "rt"))] { diff --git a/tokio/src/runtime/time/wheel/cancellation_queue.rs b/tokio/src/runtime/time/wheel/cancellation_queue.rs new file mode 100644 index 00000000000..27aa832d65b --- /dev/null +++ b/tokio/src/runtime/time/wheel/cancellation_queue.rs @@ -0,0 +1,254 @@ +//! MPSC Intrusive Linked List +//! +//! This is a highly customized implementation based on +//! Dmitry Vyukov's [Intrusive MPSC node-based queue]. +//! +//! This major difference is that the [`Receiver`] +//! always returns all items in the queue, +//! instead of just one item at a time. +//! +//! [Intrusive MPSC node-based queue]: https://www.1024cores.net/home/lock-free-algorithms/queues/intrusive-mpsc-node-based-queue + +use super::{Entry, EntryHandle}; +use crate::loom::cell::UnsafeCell; +use crate::loom::sync::atomic::{AtomicPtr, Ordering::*}; +use crate::loom::sync::Arc; + +use std::iter::Iterator; +use std::marker::PhantomData; +use std::mem::ManuallyDrop; +use std::ptr::{null, null_mut, NonNull}; +use std::task::{RawWaker, RawWakerVTable, Waker}; + +fn spin_loop() { + #[cfg(loom)] + crate::loom::thread::yield_now(); + + #[cfg(not(loom))] + std::hint::spin_loop(); +} + +#[derive(Debug)] +struct Inner { + head: UnsafeCell>, + tail: AtomicPtr, + stub: AtomicPtr, +} + +unsafe impl Send for Inner {} +unsafe impl Sync for Inner {} + +impl Drop for Inner { + fn drop(&mut self) { + // Drop the stub pointer + let stub = NonNull::new(self.stub.load(SeqCst)).unwrap(); + drop_stub(stub); + } +} + +impl Inner { + pub(crate) fn new() -> Self { + let stub = new_stub(); + + Self { + head: UnsafeCell::new(NonNull::new(stub.as_ptr()).unwrap()), + tail: AtomicPtr::new(stub.as_ptr()), + stub: AtomicPtr::new(stub.as_ptr()), + } + } + + /// # Safety + /// + /// Violating any of the following constraints can lead to + /// undefined behavior: + /// + /// - `hdl` must not be in any queue. + unsafe fn push(&self, hdl: EntryHandle) { + // Since all items in the queue must be alive until they are removed, + // so we should not decrease the reference count. + let node = ManuallyDrop::new(hdl.into_entry()); + + let next = node.cancel_pointer(); + next.store(null_mut(), SeqCst); + + let old_tail = self.tail.swap(Arc::as_ptr(&node).cast_mut(), SeqCst); + old_tail + .as_ref() + .expect("tail pointer should never be null") + .cancel_pointer() + .store(Arc::as_ptr(&node).cast_mut(), SeqCst); + } + + /// # Safety + /// + /// Violating any of the following constraints can lead to + /// undefined behavior: + /// + /// - This method must not be called concurrently. + unsafe fn take_all(&self) -> impl Iterator { + // TODO: Using `Option` for both head and tail is a bad design, + // imagine a case where the head is None, but the tail is Some, + // which is very confusing. + struct Iter { + head: Option>, + tail: Option>, + } + + impl Drop for Iter { + fn drop(&mut self) { + for hdl in self { + drop(hdl) + } + } + } + + impl Iterator for Iter { + type Item = EntryHandle; + + fn next(&mut self) -> Option { + match self.head { + Some(head) => unsafe { + let atomic_next = head.as_ref().cancel_pointer(); + let mut next = atomic_next.load(SeqCst); + while head != self.tail.unwrap() && next.is_null() { + spin_loop(); + next = atomic_next.load(SeqCst); + } + self.head = NonNull::new(next); + Some(Self::Item::from(NonNull::new_unchecked(head.as_ptr()))) + }, + None => None, + } + } + } + let new_stub = new_stub(); + + let old_tail = self.tail.swap(new_stub.as_ptr(), SeqCst); + + // At this point, `self.push` will link the new node to `new_stub`. + + // Safety: `self.head` is only access by single thread. + let old_head = unsafe { + self.head.with_mut(|head| { + let old_head = *head; + *head = NonNull::new(new_stub.as_ptr()).expect( + "head pointer is always equals to stub pointer, so it should never be null", + ); + old_head + }) + }; + let old_stub = self.stub.swap(new_stub.as_ptr(), SeqCst); + + if old_head.as_ptr() == old_tail { + // queue is empty + drop_stub(NonNull::new(old_stub).expect("stub pointer should never be null")); + return Iter { + head: None, + tail: None, + }; + } + + // Safety: The head pointer always equals to stub, and stub is always valid. + let old_head_entry_cancel_pointer = unsafe { old_head.as_ref() }.cancel_pointer(); + let mut first = old_head_entry_cancel_pointer.load(SeqCst); + while first.is_null() { + // We enter this loop if and only if there is only one item in the queue, + // AND the `cancel_pointer` is being set to non-null. + spin_loop(); + first = old_head_entry_cancel_pointer.load(SeqCst); + } + + drop_stub(NonNull::new(old_stub).expect("stub pointer should never be null")); + + // Safety: + // + // - We have checked `first` before. + // - `old_tail` is is not null as `self.tail` is always not null. + unsafe { + Iter { + head: Some(NonNull::new_unchecked(first)), + tail: Some(NonNull::new_unchecked(old_tail)), + } + } + } +} + +#[derive(Debug, Clone)] +pub(crate) struct Sender { + inner: Arc, +} + +/// Safety: [`Sender`] is protected by [`AtomicPtr`] +unsafe impl Send for Sender {} + +/// Safety: [`Sender`] is protected by [`AtomicPtr`] +unsafe impl Sync for Sender {} + +impl Sender { + pub(crate) unsafe fn send(&self, hdl: EntryHandle) { + self.inner.push(hdl); + } +} + +#[derive(Debug)] +pub(crate) struct Receiver { + inner: Arc, + + // make sure Receiver is `!Sync` + _p: PhantomData<*const ()>, +} + +/// Safety: [`Receiver`] can only be accessed from a single thread. +unsafe impl Send for Receiver {} + +impl Receiver { + pub(crate) unsafe fn recv_all(&mut self) -> impl Iterator { + self.inner.take_all() + } +} + +pub(crate) fn new() -> (Sender, Receiver) { + let inner = Arc::new(Inner::new()); + ( + Sender { + inner: inner.clone(), + }, + Receiver { + inner, + _p: PhantomData, + }, + ) +} + +fn new_stub() -> NonNull { + let hdl = EntryHandle::new(0, &noop_waker()); + let ptr = Arc::into_raw(hdl.into_entry()); + NonNull::new(ptr.cast_mut()).expect("stub pointer should never be null") +} + +fn drop_stub(stub: NonNull) { + let hdl = EntryHandle::from(stub); + drop(hdl); +} + +// The following noop waker implementation is from crate `futures`. +// https://docs.rs/futures/latest/futures/ + +unsafe fn noop_clone(_data: *const ()) -> RawWaker { + noop_raw_waker() +} + +unsafe fn noop(_data: *const ()) {} + +const NOOP_WAKER_VTABLE: RawWakerVTable = RawWakerVTable::new(noop_clone, noop, noop, noop); + +const fn noop_raw_waker() -> RawWaker { + RawWaker::new(null(), &NOOP_WAKER_VTABLE) +} + +fn noop_waker() -> Waker { + unsafe { Waker::from_raw(noop_raw_waker()) } +} + +#[cfg(test)] +mod tests; diff --git a/tokio/src/runtime/time/wheel/cancellation_queue/tests.rs b/tokio/src/runtime/time/wheel/cancellation_queue/tests.rs new file mode 100644 index 00000000000..4ebfc620297 --- /dev/null +++ b/tokio/src/runtime/time/wheel/cancellation_queue/tests.rs @@ -0,0 +1,85 @@ +use super::*; + +use futures::task::noop_waker; + +#[cfg(loom)] +const NUM_ITEMS: usize = 16; + +#[cfg(not(loom))] +const NUM_ITEMS: usize = 64; + +fn new_handle() -> EntryHandle { + EntryHandle::new(0, &noop_waker()) +} + +fn model(f: F) { + #[cfg(loom)] + loom::model(f); + + #[cfg(not(loom))] + f(); +} + +#[test] +fn single_thread() { + model(|| { + for i in 0..NUM_ITEMS { + let (tx, mut rx) = new(); + + for _ in 0..i { + unsafe { tx.send(new_handle()) }; + } + + let all = unsafe { rx.recv_all() }; + assert_eq!(all.count(), i); + } + }); +} + +#[test] +#[cfg(not(target_os = "wasi"))] +fn multi_thread() { + use crate::loom::sync::atomic::{AtomicUsize, Ordering::SeqCst}; + use crate::loom::sync::Arc; + use crate::loom::thread; + + #[cfg(loom)] + // '-1' is for the main thread that runs `loom::model` + const NUM_THREADS: usize = 2; + #[cfg(not(loom))] + const NUM_THREADS: usize = 8; + + model(|| { + let (tx, mut rx) = new(); + let mut jhs = Vec::new(); + let sent = Arc::new(AtomicUsize::new(0)); + + for _ in 0..NUM_THREADS { + let tx = tx.clone(); + let sent = sent.clone(); + jhs.push(thread::spawn(move || { + for _ in 0..NUM_ITEMS { + unsafe { tx.send(new_handle()) }; + sent.fetch_add(1, SeqCst); + } + })); + } + + let mut count = 0; + loop { + let all = unsafe { rx.recv_all() }; + count += all.count(); + if sent.fetch_add(0, SeqCst) == NUM_ITEMS * NUM_THREADS { + jhs.into_iter().for_each(|jh| { + jh.join().unwrap(); + }); + let all = unsafe { rx.recv_all() }; + count += all.count(); + break; + } + thread::yield_now(); + } + + assert_eq!(count, NUM_ITEMS * NUM_THREADS); + }) +} diff --git a/tokio/src/runtime/time/wheel/entry.rs b/tokio/src/runtime/time/wheel/entry.rs index ec18df012c2..f0b9ffeafc0 100644 --- a/tokio/src/runtime/time/wheel/entry.rs +++ b/tokio/src/runtime/time/wheel/entry.rs @@ -1,9 +1,10 @@ +use super::cancellation_queue::Sender; use crate::loom::cell::UnsafeCell; -use crate::loom::sync::atomic::{AtomicU8, Ordering::*}; +use crate::loom::sync::atomic::{AtomicPtr, AtomicU8, Ordering::*}; use crate::loom::sync::Arc; use crate::{sync::AtomicWaker, util::linked_list}; -use std::ptr::NonNull; -use std::sync::mpsc::Sender; + +use std::ptr::{null_mut, NonNull}; use std::task::Waker; pub(crate) type EntryList = linked_list::LinkedList; @@ -29,16 +30,17 @@ const STATE_PENDING: u8 = 3; /// the entry is reached its deadline and woken up. const STATE_WOKEN_UP: u8 = 4; -/// The [`Handle`] has been sent to the [`mpsc`] channel. -/// -/// [`mpsc`]: std::sync::mpsc +/// The [`Handle`] has been sent to the [`Sender`]. const STATE_CANCELLING: u8 = 5; #[derive(Debug)] pub(crate) struct Entry { - /// The pointers used by the intrusive linked list. + /// The intrusive pointers used by timer wheel. pointers: linked_list::Pointers, + /// The intrusive pointer used by cancellation queue. + cancel_pointer: AtomicPtr, + /// The tick when this entry is scheduled to expire. deadline: u64, @@ -48,7 +50,7 @@ pub(crate) struct Entry { /// The mpsc channel used to cancel the entry. // Since `mpsc::Sender` doesn't have `Drop` implementation, // we don't need to `drop_in_place` it when the entry is dropped. - cancel_tx: UnsafeCell>>, + cancel_tx: UnsafeCell>, state: AtomicU8, } @@ -89,6 +91,12 @@ unsafe impl linked_list::Link for Entry { } } +impl Entry { + pub(super) fn cancel_pointer(&self) -> &AtomicPtr { + &self.cancel_pointer + } +} + #[derive(Debug, Clone)] pub(crate) struct Handle { entry: Arc, @@ -101,10 +109,19 @@ impl From for NonNull { } } +impl From> for Handle { + fn from(ptr: NonNull) -> Self { + // Safety: `ptr` is guaranteed to be non-null by the caller. + let ptr = unsafe { Arc::from_raw(ptr.as_ptr()) }; + Handle { entry: ptr } + } +} + impl Handle { pub(crate) fn new(deadline: u64, waker: &Waker) -> Self { let entry = Arc::new(Entry { pointers: linked_list::Pointers::new(), + cancel_pointer: AtomicPtr::new(null_mut()), deadline, waker: AtomicWaker::new(), cancel_tx: UnsafeCell::new(None), @@ -163,10 +180,7 @@ impl Handle { self.entry.waker.register_by_ref(waker); } - pub(crate) fn transition_to_registered( - &self, - cancel_tx: Sender, - ) -> TransitionToRegistered { + pub(crate) fn transition_to_registered(&self, cancel_tx: Sender) -> TransitionToRegistered { match self.entry.state.compare_exchange( STATE_UNREGISTERED, STATE_BUSY_REGISTERING, @@ -246,10 +260,10 @@ impl Handle { // this is synchronized with the `transition_to_registered` call, // and the `cancel_tx` should be already stored. let tx = unsafe { tx.as_mut().unwrap_unchecked() }; - tx.take() - .unwrap() - .send(self.clone()) - .expect("receiver side is closed"); + let tx = tx.take().unwrap(); + unsafe { + tx.send(self.clone()); + } }); } @@ -268,6 +282,10 @@ impl Handle { pub(crate) fn is_woken_up(&self) -> bool { self.entry.state.fetch_or(0, SeqCst) == STATE_WOKEN_UP } + + pub(super) fn into_entry(self) -> Arc { + self.entry + } } /// An error returned when trying to transition diff --git a/tokio/src/runtime/time/wheel/mod.rs b/tokio/src/runtime/time/wheel/mod.rs index e7cc5d5cb5e..a0e38456fbe 100644 --- a/tokio/src/runtime/time/wheel/mod.rs +++ b/tokio/src/runtime/time/wheel/mod.rs @@ -3,12 +3,15 @@ pub(crate) use self::level::Expiration; use self::level::Level; mod entry; -use entry::EntryList; pub(crate) use entry::Handle as EntryHandle; use entry::TransitionToPending; use entry::TransitionToRegistered; +use entry::{Entry, EntryList}; -use std::{array, sync::mpsc}; +pub(crate) mod cancellation_queue; +use cancellation_queue::Sender; + +use std::array; /// Timing wheel implementation. /// @@ -83,11 +86,7 @@ impl Wheel { /// The caller must ensure: /// /// * The entry is not already registered in ANY wheel. - pub(crate) unsafe fn insert( - &mut self, - hdl: EntryHandle, - cancel_tx: mpsc::Sender, - ) -> Insert { + pub(crate) unsafe fn insert(&mut self, hdl: EntryHandle, cancel_tx: Sender) -> Insert { let deadline = hdl.deadline(); if deadline <= self.elapsed { From 9e294d5cd9451657de9c0a73fc05c6326b6b93d3 Mon Sep 17 00:00:00 2001 From: Qi Date: Thu, 14 Aug 2025 20:29:21 +0800 Subject: [PATCH 018/100] drop all items while dropping the cancellation queue Signed-off-by: ADD-SP --- tokio/src/runtime/time/wheel/cancellation_queue.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tokio/src/runtime/time/wheel/cancellation_queue.rs b/tokio/src/runtime/time/wheel/cancellation_queue.rs index 27aa832d65b..8b35e8b3014 100644 --- a/tokio/src/runtime/time/wheel/cancellation_queue.rs +++ b/tokio/src/runtime/time/wheel/cancellation_queue.rs @@ -40,6 +40,9 @@ unsafe impl Sync for Inner {} impl Drop for Inner { fn drop(&mut self) { + unsafe { + let _ = self.take_all(); + } // Drop the stub pointer let stub = NonNull::new(self.stub.load(SeqCst)).unwrap(); drop_stub(stub); From abffd4677dee09c466caa36fbe26d1ae7db845e5 Mon Sep 17 00:00:00 2001 From: Qi Date: Sat, 16 Aug 2025 00:15:23 +0800 Subject: [PATCH 019/100] switch back to general intrusive node based MPSC Signed-off-by: ADD-SP --- tokio/src/runtime/scheduler/util.rs | 3 +- .../runtime/time/wheel/cancellation_queue.rs | 151 ++++++------------ .../time/wheel/cancellation_queue/tests.rs | 22 +-- 3 files changed, 59 insertions(+), 117 deletions(-) diff --git a/tokio/src/runtime/scheduler/util.rs b/tokio/src/runtime/scheduler/util.rs index 306e3292dd3..f0877abbf94 100644 --- a/tokio/src/runtime/scheduler/util.rs +++ b/tokio/src/runtime/scheduler/util.rs @@ -30,8 +30,7 @@ cfg_rt_and_time! { wheel: &mut Wheel, rx: &mut Receiver, ) { - let iter = unsafe { rx.recv_all() }; - for hdl in iter { + while let Some(hdl) = unsafe { rx.try_recv() } { unsafe { let is_registered = hdl.is_registered(); let is_pending = hdl.is_pending(); diff --git a/tokio/src/runtime/time/wheel/cancellation_queue.rs b/tokio/src/runtime/time/wheel/cancellation_queue.rs index 8b35e8b3014..9db13655192 100644 --- a/tokio/src/runtime/time/wheel/cancellation_queue.rs +++ b/tokio/src/runtime/time/wheel/cancellation_queue.rs @@ -1,11 +1,6 @@ //! MPSC Intrusive Linked List //! -//! This is a highly customized implementation based on -//! Dmitry Vyukov's [Intrusive MPSC node-based queue]. -//! -//! This major difference is that the [`Receiver`] -//! always returns all items in the queue, -//! instead of just one item at a time. +//! This implementation is based on Dmitry Vyukov's [Intrusive MPSC node-based queue]. //! //! [Intrusive MPSC node-based queue]: https://www.1024cores.net/home/lock-free-algorithms/queues/intrusive-mpsc-node-based-queue @@ -14,25 +9,16 @@ use crate::loom::cell::UnsafeCell; use crate::loom::sync::atomic::{AtomicPtr, Ordering::*}; use crate::loom::sync::Arc; -use std::iter::Iterator; use std::marker::PhantomData; use std::mem::ManuallyDrop; use std::ptr::{null, null_mut, NonNull}; use std::task::{RawWaker, RawWakerVTable, Waker}; -fn spin_loop() { - #[cfg(loom)] - crate::loom::thread::yield_now(); - - #[cfg(not(loom))] - std::hint::spin_loop(); -} - #[derive(Debug)] struct Inner { - head: UnsafeCell>, - tail: AtomicPtr, - stub: AtomicPtr, + head: AtomicPtr, + tail: UnsafeCell>, + stub: NonNull, } unsafe impl Send for Inner {} @@ -41,11 +27,11 @@ unsafe impl Sync for Inner {} impl Drop for Inner { fn drop(&mut self) { unsafe { - let _ = self.take_all(); + while let Some(hdl) = self.try_recv() { + drop(hdl); + } } - // Drop the stub pointer - let stub = NonNull::new(self.stub.load(SeqCst)).unwrap(); - drop_stub(stub); + drop_stub(self.stub); } } @@ -54,9 +40,9 @@ impl Inner { let stub = new_stub(); Self { - head: UnsafeCell::new(NonNull::new(stub.as_ptr()).unwrap()), - tail: AtomicPtr::new(stub.as_ptr()), - stub: AtomicPtr::new(stub.as_ptr()), + head: AtomicPtr::new(stub.as_ptr()), + tail: UnsafeCell::new(stub), + stub, } } @@ -74,10 +60,10 @@ impl Inner { let next = node.cancel_pointer(); next.store(null_mut(), SeqCst); - let old_tail = self.tail.swap(Arc::as_ptr(&node).cast_mut(), SeqCst); - old_tail + let old_head = self.head.swap(Arc::as_ptr(&node).cast_mut(), SeqCst); + old_head .as_ref() - .expect("tail pointer should never be null") + .expect("head pointer should never be null") .cancel_pointer() .store(Arc::as_ptr(&node).cast_mut(), SeqCst); } @@ -88,91 +74,44 @@ impl Inner { /// undefined behavior: /// /// - This method must not be called concurrently. - unsafe fn take_all(&self) -> impl Iterator { - // TODO: Using `Option` for both head and tail is a bad design, - // imagine a case where the head is None, but the tail is Some, - // which is very confusing. - struct Iter { - head: Option>, - tail: Option>, - } - - impl Drop for Iter { - fn drop(&mut self) { - for hdl in self { - drop(hdl) - } + unsafe fn try_recv(&self) -> Option { + let mut tail = self.tail.with(|t| *t); + let mut next = tail.as_ref().cancel_pointer().load(SeqCst); + if tail == self.stub { + if next.is_null() { + return None; } - } - impl Iterator for Iter { - type Item = EntryHandle; - - fn next(&mut self) -> Option { - match self.head { - Some(head) => unsafe { - let atomic_next = head.as_ref().cancel_pointer(); - let mut next = atomic_next.load(SeqCst); - while head != self.tail.unwrap() && next.is_null() { - spin_loop(); - next = atomic_next.load(SeqCst); - } - self.head = NonNull::new(next); - Some(Self::Item::from(NonNull::new_unchecked(head.as_ptr()))) - }, - None => None, - } - } + self.tail.with_mut(|t| { + *t = NonNull::new(next).unwrap(); + }); + tail = NonNull::new(next).unwrap(); + next = next.as_ref().unwrap().cancel_pointer().load(SeqCst); } - let new_stub = new_stub(); - - let old_tail = self.tail.swap(new_stub.as_ptr(), SeqCst); - - // At this point, `self.push` will link the new node to `new_stub`. - - // Safety: `self.head` is only access by single thread. - let old_head = unsafe { - self.head.with_mut(|head| { - let old_head = *head; - *head = NonNull::new(new_stub.as_ptr()).expect( - "head pointer is always equals to stub pointer, so it should never be null", - ); - old_head - }) - }; - let old_stub = self.stub.swap(new_stub.as_ptr(), SeqCst); - - if old_head.as_ptr() == old_tail { - // queue is empty - drop_stub(NonNull::new(old_stub).expect("stub pointer should never be null")); - return Iter { - head: None, - tail: None, - }; + + if !next.is_null() { + self.tail.with_mut(|t| { + *t = NonNull::new(next).unwrap(); + }); + return Some(EntryHandle::from(tail)); } - // Safety: The head pointer always equals to stub, and stub is always valid. - let old_head_entry_cancel_pointer = unsafe { old_head.as_ref() }.cancel_pointer(); - let mut first = old_head_entry_cancel_pointer.load(SeqCst); - while first.is_null() { - // We enter this loop if and only if there is only one item in the queue, - // AND the `cancel_pointer` is being set to non-null. - spin_loop(); - first = old_head_entry_cancel_pointer.load(SeqCst); + let head = self.head.load(SeqCst); + if tail.as_ptr() != head { + return None; } - drop_stub(NonNull::new(old_stub).expect("stub pointer should never be null")); + self.push(EntryHandle::from(self.stub)); + next = tail.as_ref().cancel_pointer().load(SeqCst); - // Safety: - // - // - We have checked `first` before. - // - `old_tail` is is not null as `self.tail` is always not null. - unsafe { - Iter { - head: Some(NonNull::new_unchecked(first)), - tail: Some(NonNull::new_unchecked(old_tail)), - } + if !next.is_null() { + self.tail.with_mut(|t| { + *t = NonNull::new(next).unwrap(); + }); + return Some(EntryHandle::from(tail)); } + + None } } @@ -205,8 +144,8 @@ pub(crate) struct Receiver { unsafe impl Send for Receiver {} impl Receiver { - pub(crate) unsafe fn recv_all(&mut self) -> impl Iterator { - self.inner.take_all() + pub(crate) unsafe fn try_recv(&mut self) -> Option { + self.inner.try_recv() } } diff --git a/tokio/src/runtime/time/wheel/cancellation_queue/tests.rs b/tokio/src/runtime/time/wheel/cancellation_queue/tests.rs index 4ebfc620297..32548be19ce 100644 --- a/tokio/src/runtime/time/wheel/cancellation_queue/tests.rs +++ b/tokio/src/runtime/time/wheel/cancellation_queue/tests.rs @@ -30,22 +30,24 @@ fn single_thread() { unsafe { tx.send(new_handle()) }; } - let all = unsafe { rx.recv_all() }; - assert_eq!(all.count(), i); + for _ in 0..i { + unsafe { rx.try_recv() }.unwrap(); + } + + assert!(unsafe { rx.try_recv() }.is_none()); } }); } #[test] -#[cfg(not(target_os = "wasi"))] +#[cfg(not(target_os = "wasi"))] // No thread on wasi. fn multi_thread() { use crate::loom::sync::atomic::{AtomicUsize, Ordering::SeqCst}; use crate::loom::sync::Arc; use crate::loom::thread; #[cfg(loom)] - // '-1' is for the main thread that runs `loom::model` - const NUM_THREADS: usize = 2; + const NUM_THREADS: usize = 3; #[cfg(not(loom))] const NUM_THREADS: usize = 8; @@ -67,14 +69,16 @@ fn multi_thread() { let mut count = 0; loop { - let all = unsafe { rx.recv_all() }; - count += all.count(); + while unsafe { rx.try_recv() }.is_some() { + count += 1; + } if sent.fetch_add(0, SeqCst) == NUM_ITEMS * NUM_THREADS { jhs.into_iter().for_each(|jh| { jh.join().unwrap(); }); - let all = unsafe { rx.recv_all() }; - count += all.count(); + while unsafe { rx.try_recv() }.is_some() { + count += 1; + } break; } thread::yield_now(); From ccd0ae8a3a217735f0c41123d78383833e2f1b8e Mon Sep 17 00:00:00 2001 From: Qi Date: Sat, 16 Aug 2025 13:32:26 +0800 Subject: [PATCH 020/100] relax the memory ordering Signed-off-by: ADD-SP --- tokio/src/runtime/time/wheel/entry.rs | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tokio/src/runtime/time/wheel/entry.rs b/tokio/src/runtime/time/wheel/entry.rs index f0b9ffeafc0..b79f7867f1a 100644 --- a/tokio/src/runtime/time/wheel/entry.rs +++ b/tokio/src/runtime/time/wheel/entry.rs @@ -137,7 +137,7 @@ impl Handle { match self .entry .state - .compare_exchange(STATE_PENDING, STATE_WOKEN_UP, SeqCst, SeqCst) + .compare_exchange(STATE_PENDING, STATE_WOKEN_UP, Relaxed, Relaxed) { Ok(_) => self.entry.waker.wake(), Err(STATE_UNREGISTERED) => { @@ -158,7 +158,7 @@ impl Handle { match self .entry .state - .compare_exchange(STATE_UNREGISTERED, STATE_WOKEN_UP, SeqCst, SeqCst) + .compare_exchange(STATE_UNREGISTERED, STATE_WOKEN_UP, Relaxed, Relaxed) { Ok(_) => self.entry.waker.wake(), Err(STATE_REGISTERED) => { @@ -184,8 +184,8 @@ impl Handle { match self.entry.state.compare_exchange( STATE_UNREGISTERED, STATE_BUSY_REGISTERING, - SeqCst, - SeqCst, + Relaxed, + Relaxed, ) { Ok(_) => (), // successfully locked the `self.cancel_tx` Err(STATE_BUSY_REGISTERING) => panic!("should not be called concurrently"), @@ -205,8 +205,8 @@ impl Handle { match self.entry.state.compare_exchange( STATE_BUSY_REGISTERING, STATE_REGISTERED, - SeqCst, - SeqCst, + Release, // `Release` the `cancel_tx` to other threads + Relaxed, ) { Ok(_) => TransitionToRegistered::Success, Err(actual) => panic!("state is corrupted ({actual})"), @@ -220,7 +220,7 @@ impl Handle { match self .entry .state - .compare_exchange(STATE_REGISTERED, STATE_PENDING, SeqCst, SeqCst) + .compare_exchange(STATE_REGISTERED, STATE_PENDING, Relaxed, Relaxed) { Ok(_) => TransitionToPending::Success, Err(STATE_UNREGISTERED) => panic!("should not be called on unregistered entry"), @@ -239,8 +239,8 @@ impl Handle { match self.entry.state.compare_exchange( STATE_REGISTERED, STATE_CANCELLING, - SeqCst, - SeqCst, + Acquire, // `Acquire` the side-effects of `transition_to_registered` + Relaxed, ) { Ok(_) => break, Err(STATE_UNREGISTERED) => return, // no need to cancel unregistered entries. @@ -272,15 +272,15 @@ impl Handle { } pub(crate) fn is_registered(&self) -> bool { - self.entry.state.fetch_or(0, SeqCst) == STATE_REGISTERED + self.entry.state.fetch_or(0, Relaxed) == STATE_REGISTERED } pub(crate) fn is_pending(&self) -> bool { - self.entry.state.fetch_or(0, SeqCst) == STATE_PENDING + self.entry.state.fetch_or(0, Relaxed) == STATE_PENDING } pub(crate) fn is_woken_up(&self) -> bool { - self.entry.state.fetch_or(0, SeqCst) == STATE_WOKEN_UP + self.entry.state.fetch_or(0, Relaxed) == STATE_WOKEN_UP } pub(super) fn into_entry(self) -> Arc { From f91caf7d0b05f3b457b9ac42503f3b9959ff497b Mon Sep 17 00:00:00 2001 From: Qi Date: Sat, 16 Aug 2025 01:04:16 +0800 Subject: [PATCH 021/100] switch to `Mutex` version of cancellation queue Signed-off-by: ADD-SP --- tokio/src/runtime/scheduler/util.rs | 2 +- .../runtime/time/wheel/cancellation_queue.rs | 202 +++++++----------- .../time/wheel/cancellation_queue/tests.rs | 14 +- tokio/src/runtime/time/wheel/entry.rs | 41 ++-- 4 files changed, 106 insertions(+), 153 deletions(-) diff --git a/tokio/src/runtime/scheduler/util.rs b/tokio/src/runtime/scheduler/util.rs index f0877abbf94..54c9de4a9f6 100644 --- a/tokio/src/runtime/scheduler/util.rs +++ b/tokio/src/runtime/scheduler/util.rs @@ -30,7 +30,7 @@ cfg_rt_and_time! { wheel: &mut Wheel, rx: &mut Receiver, ) { - while let Some(hdl) = unsafe { rx.try_recv() } { + for hdl in rx.recv_all() { unsafe { let is_registered = hdl.is_registered(); let is_pending = hdl.is_pending(); diff --git a/tokio/src/runtime/time/wheel/cancellation_queue.rs b/tokio/src/runtime/time/wheel/cancellation_queue.rs index 9db13655192..d96e686de4b 100644 --- a/tokio/src/runtime/time/wheel/cancellation_queue.rs +++ b/tokio/src/runtime/time/wheel/cancellation_queue.rs @@ -1,196 +1,148 @@ -//! MPSC Intrusive Linked List -//! -//! This implementation is based on Dmitry Vyukov's [Intrusive MPSC node-based queue]. -//! -//! [Intrusive MPSC node-based queue]: https://www.1024cores.net/home/lock-free-algorithms/queues/intrusive-mpsc-node-based-queue - use super::{Entry, EntryHandle}; -use crate::loom::cell::UnsafeCell; -use crate::loom::sync::atomic::{AtomicPtr, Ordering::*}; -use crate::loom::sync::Arc; +use crate::loom::sync::{Arc, Mutex}; use std::marker::PhantomData; use std::mem::ManuallyDrop; -use std::ptr::{null, null_mut, NonNull}; -use std::task::{RawWaker, RawWakerVTable, Waker}; +use std::ptr::NonNull; #[derive(Debug)] struct Inner { - head: AtomicPtr, - tail: UnsafeCell>, - stub: NonNull, + head: Option>, + tail: Option>, } +/// Safety: [`Inner`] is protected by [`Mutex`]. unsafe impl Send for Inner {} + +/// Safety: [`Inner`] is protected by [`Mutex`]. unsafe impl Sync for Inner {} impl Drop for Inner { fn drop(&mut self) { unsafe { - while let Some(hdl) = self.try_recv() { - drop(hdl); + while let Some(head) = self.head { + self.head = head.as_ref().cancel_pointer().with(|p| *p); + drop(EntryHandle::from(head)); } } - drop_stub(self.stub); } } impl Inner { - pub(crate) fn new() -> Self { - let stub = new_stub(); - + fn new() -> Self { Self { - head: AtomicPtr::new(stub.as_ptr()), - tail: UnsafeCell::new(stub), - stub, + head: None, + tail: None, } } /// # Safety /// - /// Violating any of the following constraints can lead to - /// undefined behavior: + /// Behavior is undefined if any of the following conditions are violated: /// - /// - `hdl` must not be in any queue. - unsafe fn push(&self, hdl: EntryHandle) { - // Since all items in the queue must be alive until they are removed, - // so we should not decrease the reference count. - let node = ManuallyDrop::new(hdl.into_entry()); - - let next = node.cancel_pointer(); - next.store(null_mut(), SeqCst); - - let old_head = self.head.swap(Arc::as_ptr(&node).cast_mut(), SeqCst); - old_head - .as_ref() - .expect("head pointer should never be null") - .cancel_pointer() - .store(Arc::as_ptr(&node).cast_mut(), SeqCst); - } - - /// # Safety - /// - /// Violating any of the following constraints can lead to - /// undefined behavior: - /// - /// - This method must not be called concurrently. - unsafe fn try_recv(&self) -> Option { - let mut tail = self.tail.with(|t| *t); - let mut next = tail.as_ref().cancel_pointer().load(SeqCst); - if tail == self.stub { - if next.is_null() { - return None; + /// - `hdl` must not in any cancellation queue. + unsafe fn push_back(&mut self, hdl: EntryHandle) { + // Since we need to access the intrusive pointer, we must not drop the entry. + let entry = ManuallyDrop::new(hdl.into_entry()); + + entry.cancel_pointer().with_mut(|p| { + // Safety: this UnsafeCell is only accessed with the mutex locked. + let p = unsafe { p.as_mut() }.unwrap(); + *p = None; + }); + + let entry_ptr = Arc::as_ptr(&entry).cast_mut(); + + if self.head.is_none() { + self.head = NonNull::new(entry_ptr); + self.tail = self.head; + } else { + let tail = self.tail.unwrap(); + unsafe { + tail.as_ref().cancel_pointer().with_mut(|p| { + *p = Some(NonNull::new(entry_ptr).unwrap()); + }); } - - self.tail.with_mut(|t| { - *t = NonNull::new(next).unwrap(); - }); - tail = NonNull::new(next).unwrap(); - next = next.as_ref().unwrap().cancel_pointer().load(SeqCst); - } - - if !next.is_null() { - self.tail.with_mut(|t| { - *t = NonNull::new(next).unwrap(); - }); - return Some(EntryHandle::from(tail)); - } - - let head = self.head.load(SeqCst); - if tail.as_ptr() != head { - return None; - } - - self.push(EntryHandle::from(self.stub)); - next = tail.as_ref().cancel_pointer().load(SeqCst); - - if !next.is_null() { - self.tail.with_mut(|t| { - *t = NonNull::new(next).unwrap(); - }); - return Some(EntryHandle::from(tail)); + self.tail = Some(NonNull::new(entry_ptr).unwrap()); } + } - None + fn iter(&mut self) -> impl Iterator { + let mut head = self.head.take(); + let _ = self.tail.take(); + + std::iter::from_fn(move || match head { + Some(ptr) => { + // Safety: We wrap the `hdl` using `ManuallyDrop` in `self.push_back`, + // so the ptr is still valid. + head = unsafe { ptr.as_ref() } + .cancel_pointer() + // Safety: All side effects have been synchronized + // by the mutex. + .with(|p| unsafe { *p }); + let hdl = EntryHandle::from(ptr); + Some(hdl) + } + None => None, + }) } } #[derive(Debug, Clone)] pub(crate) struct Sender { - inner: Arc, + inner: Arc>, } -/// Safety: [`Sender`] is protected by [`AtomicPtr`] +/// Safety: [`Inner`] is protected by [`Mutex`]. unsafe impl Send for Sender {} -/// Safety: [`Sender`] is protected by [`AtomicPtr`] +/// Safety: [`Inner`] is protected by [`Mutex`]. unsafe impl Sync for Sender {} impl Sender { + /// # Safety + /// + /// Behavior is undefined if any of the following conditions are violated: + /// + /// - `hdl` must not in any cancellation queue. pub(crate) unsafe fn send(&self, hdl: EntryHandle) { - self.inner.push(hdl); + self.inner.lock().push_back(hdl); } } #[derive(Debug)] pub(crate) struct Receiver { - inner: Arc, + inner: Arc>, - // make sure Receiver is `!Sync` - _p: PhantomData<*const ()>, + // Technically, receiver is `Sync`, however, we only + // need single receiver for cancellation purpose, + // so we make it `!Sync` to prevent abusing. + _not_sync: PhantomData<*const ()>, } -/// Safety: [`Receiver`] can only be accessed from a single thread. +/// Safety: [`Inner`] is protected by [`Mutex`]. +// We need the `Receiver` to be `Send` because the `Core` struct for multi-thread +// runtime will be send to another thread during the shutdown. unsafe impl Send for Receiver {} impl Receiver { - pub(crate) unsafe fn try_recv(&mut self) -> Option { - self.inner.try_recv() + pub(crate) fn recv_all(&mut self) -> impl Iterator { + self.inner.lock().iter() } } pub(crate) fn new() -> (Sender, Receiver) { - let inner = Arc::new(Inner::new()); + let inner = Arc::new(Mutex::new(Inner::new())); ( Sender { inner: inner.clone(), }, Receiver { inner, - _p: PhantomData, + _not_sync: PhantomData, }, ) } -fn new_stub() -> NonNull { - let hdl = EntryHandle::new(0, &noop_waker()); - let ptr = Arc::into_raw(hdl.into_entry()); - NonNull::new(ptr.cast_mut()).expect("stub pointer should never be null") -} - -fn drop_stub(stub: NonNull) { - let hdl = EntryHandle::from(stub); - drop(hdl); -} - -// The following noop waker implementation is from crate `futures`. -// https://docs.rs/futures/latest/futures/ - -unsafe fn noop_clone(_data: *const ()) -> RawWaker { - noop_raw_waker() -} - -unsafe fn noop(_data: *const ()) {} - -const NOOP_WAKER_VTABLE: RawWakerVTable = RawWakerVTable::new(noop_clone, noop, noop, noop); - -const fn noop_raw_waker() -> RawWaker { - RawWaker::new(null(), &NOOP_WAKER_VTABLE) -} - -fn noop_waker() -> Waker { - unsafe { Waker::from_raw(noop_raw_waker()) } -} - #[cfg(test)] mod tests; diff --git a/tokio/src/runtime/time/wheel/cancellation_queue/tests.rs b/tokio/src/runtime/time/wheel/cancellation_queue/tests.rs index 32548be19ce..31610cec49b 100644 --- a/tokio/src/runtime/time/wheel/cancellation_queue/tests.rs +++ b/tokio/src/runtime/time/wheel/cancellation_queue/tests.rs @@ -30,11 +30,7 @@ fn single_thread() { unsafe { tx.send(new_handle()) }; } - for _ in 0..i { - unsafe { rx.try_recv() }.unwrap(); - } - - assert!(unsafe { rx.try_recv() }.is_none()); + assert_eq!(rx.recv_all().count(), i); } }); } @@ -69,16 +65,12 @@ fn multi_thread() { let mut count = 0; loop { - while unsafe { rx.try_recv() }.is_some() { - count += 1; - } + count += rx.recv_all().count(); if sent.fetch_add(0, SeqCst) == NUM_ITEMS * NUM_THREADS { jhs.into_iter().for_each(|jh| { jh.join().unwrap(); }); - while unsafe { rx.try_recv() }.is_some() { - count += 1; - } + count += rx.recv_all().count(); break; } thread::yield_now(); diff --git a/tokio/src/runtime/time/wheel/entry.rs b/tokio/src/runtime/time/wheel/entry.rs index b79f7867f1a..9ce976a9450 100644 --- a/tokio/src/runtime/time/wheel/entry.rs +++ b/tokio/src/runtime/time/wheel/entry.rs @@ -1,10 +1,10 @@ use super::cancellation_queue::Sender; use crate::loom::cell::UnsafeCell; -use crate::loom::sync::atomic::{AtomicPtr, AtomicU8, Ordering::*}; +use crate::loom::sync::atomic::{AtomicU8, Ordering::*}; use crate::loom::sync::Arc; use crate::{sync::AtomicWaker, util::linked_list}; -use std::ptr::{null_mut, NonNull}; +use std::ptr::NonNull; use std::task::Waker; pub(crate) type EntryList = linked_list::LinkedList; @@ -39,7 +39,7 @@ pub(crate) struct Entry { pointers: linked_list::Pointers, /// The intrusive pointer used by cancellation queue. - cancel_pointer: AtomicPtr, + cancel_pointer: UnsafeCell>>, /// The tick when this entry is scheduled to expire. deadline: u64, @@ -55,10 +55,13 @@ pub(crate) struct Entry { state: AtomicU8, } -// Safety: -// -// * Caller guarantees the `Self::pointers` is used correctly. -// * AND `Self::cancel_tx` is protected by `Self::state`. +/// Safety: There are two fields are neither `Send` nor `Sync`. +/// +/// - [`Self::cancel_pointer`]: This is protected by [`cancellation_queue`]. +/// - [`Self::cancel_tx`]: This is protected by [`Self::state`]. +/// +/// [`cancellation_queue`]: `super::cancellation_queue` +unsafe impl Send for Entry {} unsafe impl Sync for Entry {} generate_addr_of_methods! { @@ -92,7 +95,7 @@ unsafe impl linked_list::Link for Entry { } impl Entry { - pub(super) fn cancel_pointer(&self) -> &AtomicPtr { + pub(super) fn cancel_pointer(&self) -> &UnsafeCell>> { &self.cancel_pointer } } @@ -121,7 +124,7 @@ impl Handle { pub(crate) fn new(deadline: u64, waker: &Waker) -> Self { let entry = Arc::new(Entry { pointers: linked_list::Pointers::new(), - cancel_pointer: AtomicPtr::new(null_mut()), + cancel_pointer: UnsafeCell::new(None), deadline, waker: AtomicWaker::new(), cancel_tx: UnsafeCell::new(None), @@ -137,6 +140,7 @@ impl Handle { match self .entry .state + // We don't need to synchronize anything, so we can use relaxed ordering. .compare_exchange(STATE_PENDING, STATE_WOKEN_UP, Relaxed, Relaxed) { Ok(_) => self.entry.waker.wake(), @@ -155,11 +159,12 @@ impl Handle { /// Wake the entry if it has already elapsed before registering to the timer wheel. pub(crate) fn wake_unregistered(&self) { - match self - .entry - .state - .compare_exchange(STATE_UNREGISTERED, STATE_WOKEN_UP, Relaxed, Relaxed) - { + match self.entry.state.compare_exchange( + STATE_UNREGISTERED, + STATE_WOKEN_UP, + Relaxed, // no need to synchronize anything + Relaxed, // no need to synchronize anything + ) { Ok(_) => self.entry.waker.wake(), Err(STATE_REGISTERED) => { panic!("entry is already registered, please call `wake` instead") @@ -184,8 +189,8 @@ impl Handle { match self.entry.state.compare_exchange( STATE_UNREGISTERED, STATE_BUSY_REGISTERING, - Relaxed, - Relaxed, + Relaxed, // no need to synchronize anything + Relaxed, // no need to synchronize anything ) { Ok(_) => (), // successfully locked the `self.cancel_tx` Err(STATE_BUSY_REGISTERING) => panic!("should not be called concurrently"), @@ -220,6 +225,7 @@ impl Handle { match self .entry .state + // We don't need to synchronize anything, so we can use relaxed ordering. .compare_exchange(STATE_REGISTERED, STATE_PENDING, Relaxed, Relaxed) { Ok(_) => TransitionToPending::Success, @@ -265,6 +271,9 @@ impl Handle { tx.send(self.clone()); } }); + + // No need to emit an release fence here + // because this method will not be called twice. } pub(crate) fn deadline(&self) -> u64 { From 88292d86f5dea53b030e3cf32ab4c1274a6a5dea Mon Sep 17 00:00:00 2001 From: Qi Date: Sat, 16 Aug 2025 20:40:48 +0800 Subject: [PATCH 022/100] avoid `noalias` and `drop_in_place` Signed-off-by: ADD-SP --- spellcheck.dic | 3 ++- tokio/src/runtime/time/wheel/entry.rs | 21 +++++++++++++++++++-- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/spellcheck.dic b/spellcheck.dic index 9ff45b691fc..3cf00c84223 100644 --- a/spellcheck.dic +++ b/spellcheck.dic @@ -1,4 +1,4 @@ -308 +309 & + < @@ -163,6 +163,7 @@ Lauck libc lifecycle lifo +LLVM lookups macOS MacOS diff --git a/tokio/src/runtime/time/wheel/entry.rs b/tokio/src/runtime/time/wheel/entry.rs index 9ce976a9450..b03652fd96c 100644 --- a/tokio/src/runtime/time/wheel/entry.rs +++ b/tokio/src/runtime/time/wheel/entry.rs @@ -4,6 +4,7 @@ use crate::loom::sync::atomic::{AtomicU8, Ordering::*}; use crate::loom::sync::Arc; use crate::{sync::AtomicWaker, util::linked_list}; +use std::marker::PhantomPinned; use std::ptr::NonNull; use std::task::Waker; @@ -48,11 +49,15 @@ pub(crate) struct Entry { waker: AtomicWaker, /// The mpsc channel used to cancel the entry. - // Since `mpsc::Sender` doesn't have `Drop` implementation, - // we don't need to `drop_in_place` it when the entry is dropped. cancel_tx: UnsafeCell>, state: AtomicU8, + + /// Make the type `!Unpin` to prevent LLVM from emitting + /// the `noalias` attribute for mutable references. + /// + /// See . + _pin: PhantomPinned, } /// Safety: There are two fields are neither `Send` nor `Sync`. @@ -64,6 +69,17 @@ pub(crate) struct Entry { unsafe impl Send for Entry {} unsafe impl Sync for Entry {} +impl Drop for Entry { + fn drop(&mut self) { + // Safety: `cancel_pointer` is protected by `cancellation_queue`. + unsafe { + self.cancel_pointer.with_mut(|p| { + std::ptr::drop_in_place(p); + }); + } + } +} + generate_addr_of_methods! { impl<> Entry { unsafe fn addr_of_pointers(self: NonNull) -> NonNull> { @@ -129,6 +145,7 @@ impl Handle { waker: AtomicWaker::new(), cancel_tx: UnsafeCell::new(None), state: AtomicU8::new(STATE_UNREGISTERED), + _pin: PhantomPinned, }); entry.waker.register_by_ref(waker); From bb932b087ff49c81e76f44ca4c9c91d8e8750cf1 Mon Sep 17 00:00:00 2001 From: Qi Date: Sun, 17 Aug 2025 23:28:12 +0800 Subject: [PATCH 023/100] remove legacy works in `spellcheck.dic` Signed-off-by: ADD-SP --- spellcheck.dic | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/spellcheck.dic b/spellcheck.dic index 3cf00c84223..3dd739d71b2 100644 --- a/spellcheck.dic +++ b/spellcheck.dic @@ -1,4 +1,4 @@ -309 +307 & + < @@ -99,7 +99,6 @@ destructors destructure Destructures Dev -Dmitry dns DNS DoS @@ -301,7 +300,6 @@ versa versioned versioning vtable -Vyukov's waker wakers Wakers From 4b1573603225823492dd495ba44be68c237cfe23 Mon Sep 17 00:00:00 2001 From: Qi Date: Sun, 24 Aug 2025 22:13:31 +0800 Subject: [PATCH 024/100] reuse the existing intrusice list implementation Signed-off-by: ADD-SP --- .../runtime/time/wheel/cancellation_queue.rs | 65 +++-------------- tokio/src/runtime/time/wheel/entry.rs | 70 +++++++++---------- 2 files changed, 46 insertions(+), 89 deletions(-) diff --git a/tokio/src/runtime/time/wheel/cancellation_queue.rs b/tokio/src/runtime/time/wheel/cancellation_queue.rs index d96e686de4b..7e0db3f0203 100644 --- a/tokio/src/runtime/time/wheel/cancellation_queue.rs +++ b/tokio/src/runtime/time/wheel/cancellation_queue.rs @@ -1,14 +1,14 @@ use super::{Entry, EntryHandle}; use crate::loom::sync::{Arc, Mutex}; +use crate::util::linked_list; use std::marker::PhantomData; -use std::mem::ManuallyDrop; -use std::ptr::NonNull; + +type EntryList = linked_list::LinkedList<(Entry,), Entry>; #[derive(Debug)] struct Inner { - head: Option>, - tail: Option>, + list: EntryList, } /// Safety: [`Inner`] is protected by [`Mutex`]. @@ -19,20 +19,14 @@ unsafe impl Sync for Inner {} impl Drop for Inner { fn drop(&mut self) { - unsafe { - while let Some(head) = self.head { - self.head = head.as_ref().cancel_pointer().with(|p| *p); - drop(EntryHandle::from(head)); - } - } + let _ = self.iter().count(); } } impl Inner { fn new() -> Self { Self { - head: None, - tail: None, + list: EntryList::new(), } } @@ -41,50 +35,13 @@ impl Inner { /// Behavior is undefined if any of the following conditions are violated: /// /// - `hdl` must not in any cancellation queue. - unsafe fn push_back(&mut self, hdl: EntryHandle) { - // Since we need to access the intrusive pointer, we must not drop the entry. - let entry = ManuallyDrop::new(hdl.into_entry()); - - entry.cancel_pointer().with_mut(|p| { - // Safety: this UnsafeCell is only accessed with the mutex locked. - let p = unsafe { p.as_mut() }.unwrap(); - *p = None; - }); - - let entry_ptr = Arc::as_ptr(&entry).cast_mut(); - - if self.head.is_none() { - self.head = NonNull::new(entry_ptr); - self.tail = self.head; - } else { - let tail = self.tail.unwrap(); - unsafe { - tail.as_ref().cancel_pointer().with_mut(|p| { - *p = Some(NonNull::new(entry_ptr).unwrap()); - }); - } - self.tail = Some(NonNull::new(entry_ptr).unwrap()); - } + unsafe fn push_front(&mut self, hdl: EntryHandle) { + self.list.push_front(hdl); } fn iter(&mut self) -> impl Iterator { - let mut head = self.head.take(); - let _ = self.tail.take(); - - std::iter::from_fn(move || match head { - Some(ptr) => { - // Safety: We wrap the `hdl` using `ManuallyDrop` in `self.push_back`, - // so the ptr is still valid. - head = unsafe { ptr.as_ref() } - .cancel_pointer() - // Safety: All side effects have been synchronized - // by the mutex. - .with(|p| unsafe { *p }); - let hdl = EntryHandle::from(ptr); - Some(hdl) - } - None => None, - }) + let mut list = std::mem::take(&mut self.list); + std::iter::from_fn(move || list.pop_front()) } } @@ -106,7 +63,7 @@ impl Sender { /// /// - `hdl` must not in any cancellation queue. pub(crate) unsafe fn send(&self, hdl: EntryHandle) { - self.inner.lock().push_back(hdl); + self.inner.lock().push_front(hdl); } } diff --git a/tokio/src/runtime/time/wheel/entry.rs b/tokio/src/runtime/time/wheel/entry.rs index b03652fd96c..8c61c1f3301 100644 --- a/tokio/src/runtime/time/wheel/entry.rs +++ b/tokio/src/runtime/time/wheel/entry.rs @@ -37,10 +37,10 @@ const STATE_CANCELLING: u8 = 5; #[derive(Debug)] pub(crate) struct Entry { /// The intrusive pointers used by timer wheel. - pointers: linked_list::Pointers, + wheel_pointers: linked_list::Pointers, /// The intrusive pointer used by cancellation queue. - cancel_pointer: UnsafeCell>>, + cancel_pointers: linked_list::Pointers, /// The tick when this entry is scheduled to expire. deadline: u64, @@ -71,20 +71,10 @@ unsafe impl Sync for Entry {} impl Drop for Entry { fn drop(&mut self) { - // Safety: `cancel_pointer` is protected by `cancellation_queue`. - unsafe { - self.cancel_pointer.with_mut(|p| { - std::ptr::drop_in_place(p); - }); - } - } -} - -generate_addr_of_methods! { - impl<> Entry { - unsafe fn addr_of_pointers(self: NonNull) -> NonNull> { - &self.pointers - } + self.cancel_tx.with_mut(|tx| { + let maybe_tx = unsafe { &mut *tx }; + drop(maybe_tx.take()); + }) } } @@ -106,19 +96,41 @@ unsafe impl linked_list::Link for Entry { unsafe fn pointers( target: NonNull, ) -> NonNull> { - Entry::addr_of_pointers(target) + let this = target.as_ptr(); + let field = std::ptr::addr_of_mut!((*this).wheel_pointers); + NonNull::new_unchecked(field) } } +// `impl for (Entry,)` is to avoid conflicts with the `Entry` impl, +// this enables using `Entry` in multiple intrusive lists, +// this `impl` is for `cancellation_queue`. +// Safety: `Entry` is always in an `Arc`. +unsafe impl linked_list::Link for (Entry,) { + type Handle = Handle; + type Target = Entry; + + fn as_raw(hdl: &Self::Handle) -> NonNull { + unsafe { NonNull::new_unchecked(Arc::as_ptr(&hdl.entry).cast_mut()) } + } + + unsafe fn from_raw(ptr: NonNull) -> Self::Handle { + Handle { + entry: Arc::from_raw(ptr.as_ptr()), + } + } -impl Entry { - pub(super) fn cancel_pointer(&self) -> &UnsafeCell>> { - &self.cancel_pointer + unsafe fn pointers( + target: NonNull, + ) -> NonNull> { + let this = target.as_ptr(); + let field = std::ptr::addr_of_mut!((*this).cancel_pointers); + NonNull::new_unchecked(field) } } #[derive(Debug, Clone)] pub(crate) struct Handle { - entry: Arc, + pub(crate) entry: Arc, } impl From for NonNull { @@ -128,19 +140,11 @@ impl From for NonNull { } } -impl From> for Handle { - fn from(ptr: NonNull) -> Self { - // Safety: `ptr` is guaranteed to be non-null by the caller. - let ptr = unsafe { Arc::from_raw(ptr.as_ptr()) }; - Handle { entry: ptr } - } -} - impl Handle { pub(crate) fn new(deadline: u64, waker: &Waker) -> Self { let entry = Arc::new(Entry { - pointers: linked_list::Pointers::new(), - cancel_pointer: UnsafeCell::new(None), + wheel_pointers: linked_list::Pointers::new(), + cancel_pointers: linked_list::Pointers::new(), deadline, waker: AtomicWaker::new(), cancel_tx: UnsafeCell::new(None), @@ -308,10 +312,6 @@ impl Handle { pub(crate) fn is_woken_up(&self) -> bool { self.entry.state.fetch_or(0, Relaxed) == STATE_WOKEN_UP } - - pub(super) fn into_entry(self) -> Arc { - self.entry - } } /// An error returned when trying to transition From e89259e742a969efde01c13bd22dbe5948c384b0 Mon Sep 17 00:00:00 2001 From: Qi Date: Sun, 24 Aug 2025 22:16:32 +0800 Subject: [PATCH 025/100] fix docstring issue caused by renaming `Entry::cancel_pointer` to `Entry::cancel_pointers` Signed-off-by: ADD-SP --- tokio/src/runtime/time/wheel/entry.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tokio/src/runtime/time/wheel/entry.rs b/tokio/src/runtime/time/wheel/entry.rs index 8c61c1f3301..06761ac1cf4 100644 --- a/tokio/src/runtime/time/wheel/entry.rs +++ b/tokio/src/runtime/time/wheel/entry.rs @@ -60,9 +60,8 @@ pub(crate) struct Entry { _pin: PhantomPinned, } -/// Safety: There are two fields are neither `Send` nor `Sync`. +/// Safety: There is a field is neither `Send` nor `Sync`. /// -/// - [`Self::cancel_pointer`]: This is protected by [`cancellation_queue`]. /// - [`Self::cancel_tx`]: This is protected by [`Self::state`]. /// /// [`cancellation_queue`]: `super::cancellation_queue` From 5eee490373de93fd758dad99b7cf3fc1357f2fc2 Mon Sep 17 00:00:00 2001 From: Qi Date: Wed, 27 Aug 2025 22:07:46 +0800 Subject: [PATCH 026/100] reuse existing intrusive list impl using a ZST Signed-off-by: ADD-SP --- tokio/src/runtime/time/wheel/cancellation_queue.rs | 3 ++- tokio/src/runtime/time/wheel/entry.rs | 13 +++++++++---- tokio/src/runtime/time/wheel/mod.rs | 2 +- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/tokio/src/runtime/time/wheel/cancellation_queue.rs b/tokio/src/runtime/time/wheel/cancellation_queue.rs index 7e0db3f0203..4e75c1a8af6 100644 --- a/tokio/src/runtime/time/wheel/cancellation_queue.rs +++ b/tokio/src/runtime/time/wheel/cancellation_queue.rs @@ -1,10 +1,11 @@ use super::{Entry, EntryHandle}; use crate::loom::sync::{Arc, Mutex}; +use crate::runtime::time::wheel::CancellationQueueEntry; use crate::util::linked_list; use std::marker::PhantomData; -type EntryList = linked_list::LinkedList<(Entry,), Entry>; +type EntryList = linked_list::LinkedList; #[derive(Debug)] struct Inner { diff --git a/tokio/src/runtime/time/wheel/entry.rs b/tokio/src/runtime/time/wheel/entry.rs index 06761ac1cf4..9eb8ca7a874 100644 --- a/tokio/src/runtime/time/wheel/entry.rs +++ b/tokio/src/runtime/time/wheel/entry.rs @@ -100,11 +100,16 @@ unsafe impl linked_list::Link for Entry { NonNull::new_unchecked(field) } } -// `impl for (Entry,)` is to avoid conflicts with the `Entry` impl, -// this enables using `Entry` in multiple intrusive lists, -// this `impl` is for `cancellation_queue`. + +/// An ZST to allow [`super::cancellation_queue`] to utilize the [`Entry::cancel_pointers`] +/// by impl [`linked_list::Link`] as we cannot impl it on [`Entry`] +/// directly due to the conflicting implementations used by [`Entry::wheel_pointers`]. +/// +/// This type should never be constructed. +pub(super) struct CancellationQueueEntry; + // Safety: `Entry` is always in an `Arc`. -unsafe impl linked_list::Link for (Entry,) { +unsafe impl linked_list::Link for CancellationQueueEntry { type Handle = Handle; type Target = Entry; diff --git a/tokio/src/runtime/time/wheel/mod.rs b/tokio/src/runtime/time/wheel/mod.rs index a0e38456fbe..3c6a98c61f7 100644 --- a/tokio/src/runtime/time/wheel/mod.rs +++ b/tokio/src/runtime/time/wheel/mod.rs @@ -6,7 +6,7 @@ mod entry; pub(crate) use entry::Handle as EntryHandle; use entry::TransitionToPending; use entry::TransitionToRegistered; -use entry::{Entry, EntryList}; +use entry::{CancellationQueueEntry, Entry, EntryList}; pub(crate) mod cancellation_queue; use cancellation_queue::Sender; From 6626ad054b3f6959a96357413780819c3657c641 Mon Sep 17 00:00:00 2001 From: Qi Date: Wed, 27 Aug 2025 22:15:24 +0800 Subject: [PATCH 027/100] chore(spellcheck.dic): add 'ZST' Signed-off-by: ADD-SP --- spellcheck.dic | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/spellcheck.dic b/spellcheck.dic index 3dd739d71b2..725c4eee14e 100644 --- a/spellcheck.dic +++ b/spellcheck.dic @@ -1,4 +1,4 @@ -307 +308 & + < @@ -306,3 +306,4 @@ Wakers wakeup wakeups workstealing +ZST From 65276ac35288d74886782a7fd5b3d52f4d3d72ed Mon Sep 17 00:00:00 2001 From: Qi Date: Thu, 28 Aug 2025 21:37:12 +0800 Subject: [PATCH 028/100] fix memory leakage of cancellation queue Signed-off-by: ADD-SP --- .../runtime/time/wheel/cancellation_queue.rs | 26 +++++++++++++++++-- .../time/wheel/cancellation_queue/tests.rs | 18 +++++++++++++ tokio/src/runtime/time/wheel/entry.rs | 6 +++++ 3 files changed, 48 insertions(+), 2 deletions(-) diff --git a/tokio/src/runtime/time/wheel/cancellation_queue.rs b/tokio/src/runtime/time/wheel/cancellation_queue.rs index 4e75c1a8af6..7b85166d48b 100644 --- a/tokio/src/runtime/time/wheel/cancellation_queue.rs +++ b/tokio/src/runtime/time/wheel/cancellation_queue.rs @@ -20,6 +20,7 @@ unsafe impl Sync for Inner {} impl Drop for Inner { fn drop(&mut self) { + // consume all entries let _ = self.iter().count(); } } @@ -41,8 +42,29 @@ impl Inner { } fn iter(&mut self) -> impl Iterator { - let mut list = std::mem::take(&mut self.list); - std::iter::from_fn(move || list.pop_front()) + struct Iter { + list: EntryList, + } + + impl Drop for Iter { + fn drop(&mut self) { + while let Some(hdl) = self.list.pop_front() { + drop(hdl); + } + } + } + + impl Iterator for Iter { + type Item = EntryHandle; + + fn next(&mut self) -> Option { + self.list.pop_front() + } + } + + Iter { + list: std::mem::take(&mut self.list), + } } } diff --git a/tokio/src/runtime/time/wheel/cancellation_queue/tests.rs b/tokio/src/runtime/time/wheel/cancellation_queue/tests.rs index 31610cec49b..17b426e23de 100644 --- a/tokio/src/runtime/time/wheel/cancellation_queue/tests.rs +++ b/tokio/src/runtime/time/wheel/cancellation_queue/tests.rs @@ -79,3 +79,21 @@ fn multi_thread() { assert_eq!(count, NUM_ITEMS * NUM_THREADS); }) } + +#[test] +fn drop_iter_should_not_leak_memory() { + model(|| { + let (tx, mut rx) = new(); + + let hdls = (0..NUM_ITEMS).map(|_| new_handle()).collect::>(); + for hdl in hdls.iter() { + unsafe { tx.send(hdl.clone()) }; + } + + drop(rx.recv_all()); + + for hdl in hdls { + assert_eq!(hdl.inner_strong_count(), 1); + } + }); +} diff --git a/tokio/src/runtime/time/wheel/entry.rs b/tokio/src/runtime/time/wheel/entry.rs index 9eb8ca7a874..c8af946eb90 100644 --- a/tokio/src/runtime/time/wheel/entry.rs +++ b/tokio/src/runtime/time/wheel/entry.rs @@ -316,6 +316,12 @@ impl Handle { pub(crate) fn is_woken_up(&self) -> bool { self.entry.state.fetch_or(0, Relaxed) == STATE_WOKEN_UP } + + #[cfg(test)] + /// Only used for unit tests. + pub(crate) fn inner_strong_count(&self) -> usize { + Arc::strong_count(&self.entry) + } } /// An error returned when trying to transition From e4b1e68f6609686be50477ac72f3af9434923e47 Mon Sep 17 00:00:00 2001 From: Qi Date: Thu, 28 Aug 2025 21:45:28 +0800 Subject: [PATCH 029/100] remove useless `unsafe impl Send for Entry` Signed-off-by: ADD-SP --- tokio/src/runtime/time/wheel/entry.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tokio/src/runtime/time/wheel/entry.rs b/tokio/src/runtime/time/wheel/entry.rs index c8af946eb90..d9f86767ed2 100644 --- a/tokio/src/runtime/time/wheel/entry.rs +++ b/tokio/src/runtime/time/wheel/entry.rs @@ -60,12 +60,9 @@ pub(crate) struct Entry { _pin: PhantomPinned, } -/// Safety: There is a field is neither `Send` nor `Sync`. +/// Safety: There is a field is not [`Sync`] /// /// - [`Self::cancel_tx`]: This is protected by [`Self::state`]. -/// -/// [`cancellation_queue`]: `super::cancellation_queue` -unsafe impl Send for Entry {} unsafe impl Sync for Entry {} impl Drop for Entry { From 009d15870dd76a7504dba713d48e8ae1df7ea17d Mon Sep 17 00:00:00 2001 From: Qi Date: Thu, 28 Aug 2025 21:46:57 +0800 Subject: [PATCH 030/100] clarify the meaning of `STATE_BUSY_REGISTERING` Signed-off-by: ADD-SP --- tokio/src/runtime/time/wheel/entry.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tokio/src/runtime/time/wheel/entry.rs b/tokio/src/runtime/time/wheel/entry.rs index d9f86767ed2..da7c4d4b5bf 100644 --- a/tokio/src/runtime/time/wheel/entry.rs +++ b/tokio/src/runtime/time/wheel/entry.rs @@ -14,7 +14,8 @@ pub(crate) type EntryList = linked_list::LinkedList; const STATE_UNREGISTERED: u8 = 0; /// The entry is being registered to the timer wheel, -/// and also saving the `cancel_tx` to the entry. +/// and also saving the [`Sender`] of the cancellation queue +/// into the entry. const STATE_BUSY_REGISTERING: u8 = 1; /// The entry is registered to the timer wheel, From 0f3c9ecd09df81a68eae4c3ce6fa8d86dcb20a7b Mon Sep 17 00:00:00 2001 From: Qi Date: Mon, 1 Sep 2025 22:55:34 +0800 Subject: [PATCH 031/100] eliminate atomic state by `Mutex` of `Entry` Signed-off-by: ADD-SP --- tokio/src/runtime/time/wheel/entry.rs | 236 ++++++++++---------------- 1 file changed, 85 insertions(+), 151 deletions(-) diff --git a/tokio/src/runtime/time/wheel/entry.rs b/tokio/src/runtime/time/wheel/entry.rs index da7c4d4b5bf..ce45309e93e 100644 --- a/tokio/src/runtime/time/wheel/entry.rs +++ b/tokio/src/runtime/time/wheel/entry.rs @@ -1,7 +1,5 @@ use super::cancellation_queue::Sender; -use crate::loom::cell::UnsafeCell; -use crate::loom::sync::atomic::{AtomicU8, Ordering::*}; -use crate::loom::sync::Arc; +use crate::loom::sync::{Arc, Mutex}; use crate::{sync::AtomicWaker, util::linked_list}; use std::marker::PhantomPinned; @@ -10,30 +8,28 @@ use std::task::Waker; pub(crate) type EntryList = linked_list::LinkedList; -/// A pure new entry, no any changes to the state. -const STATE_UNREGISTERED: u8 = 0; - -/// The entry is being registered to the timer wheel, -/// and also saving the [`Sender`] of the cancellation queue -/// into the entry. -const STATE_BUSY_REGISTERING: u8 = 1; +#[derive(Debug)] +enum State { + /// A pure new entry, no any changes to the state. + Unregistered, -/// The entry is registered to the timer wheel, -/// but not in the pending queue of the timer wheel. -const STATE_REGISTERED: u8 = 2; + /// The entry is registered to the timer wheel, + /// but not in the pending queue of the timer wheel. + Registered(Sender), -/// The entry is in the pending queue of the timer wheel, -/// and not in any wheel level, which means that -/// the entry is reached its deadline and waiting to be woken up. -const STATE_PENDING: u8 = 3; + /// The entry is in the pending queue of the timer wheel, + /// and not in any wheel level, which means that + /// the entry is reached its deadline and waiting to be woken up. + Pending, -/// The waker has been called, and the entry is no longer in the timer wheel -/// (both each wheel level and the pending queue), which means that -/// the entry is reached its deadline and woken up. -const STATE_WOKEN_UP: u8 = 4; + /// The waker has been called, and the entry is no longer in the timer wheel + /// (both each wheel level and the pending queue), which means that + /// the entry is reached its deadline and woken up. + WokenUp, -/// The [`Handle`] has been sent to the [`Sender`]. -const STATE_CANCELLING: u8 = 5; + /// The [`Handle`] has been sent to the [`Sender`]. + Cancelling, +} #[derive(Debug)] pub(crate) struct Entry { @@ -49,10 +45,7 @@ pub(crate) struct Entry { /// The currently registered waker. waker: AtomicWaker, - /// The mpsc channel used to cancel the entry. - cancel_tx: UnsafeCell>, - - state: AtomicU8, + state: Mutex, /// Make the type `!Unpin` to prevent LLVM from emitting /// the `noalias` attribute for mutable references. @@ -61,20 +54,6 @@ pub(crate) struct Entry { _pin: PhantomPinned, } -/// Safety: There is a field is not [`Sync`] -/// -/// - [`Self::cancel_tx`]: This is protected by [`Self::state`]. -unsafe impl Sync for Entry {} - -impl Drop for Entry { - fn drop(&mut self) { - self.cancel_tx.with_mut(|tx| { - let maybe_tx = unsafe { &mut *tx }; - drop(maybe_tx.take()); - }) - } -} - // Safety: `Entry` is always in an `Arc`. unsafe impl linked_list::Link for Entry { type Handle = Handle; @@ -149,8 +128,7 @@ impl Handle { cancel_pointers: linked_list::Pointers::new(), deadline, waker: AtomicWaker::new(), - cancel_tx: UnsafeCell::new(None), - state: AtomicU8::new(STATE_UNREGISTERED), + state: Mutex::new(State::Unregistered), _pin: PhantomPinned, }); entry.waker.register_by_ref(waker); @@ -160,47 +138,41 @@ impl Handle { /// Wake the entry if it is already in the pending queue of the timer wheel. pub(crate) fn wake(&self) { - match self - .entry - .state - // We don't need to synchronize anything, so we can use relaxed ordering. - .compare_exchange(STATE_PENDING, STATE_WOKEN_UP, Relaxed, Relaxed) - { - Ok(_) => self.entry.waker.wake(), - Err(STATE_UNREGISTERED) => { - panic!("entry is not registered, please call `wake_unregistered` instead") + let mut lock = self.entry.state.lock(); + match &*lock { + // don't unlock — poisoning the `Mutex` stops others from using the bad state. + state @ (State::Unregistered | State::Registered(_)) => { + panic!("corrupted state: {state:#?}") } - Err(STATE_BUSY_REGISTERING) => { - panic!("should be be called concurrently with `transition_to_registered`") + State::Pending => { + *lock = State::WokenUp; + // Since state has been updated, no need to hold the lock. + drop(lock); + self.entry.waker.wake(); } - Err(STATE_REGISTERED) => panic!("should not be called on non-pending entry"), - Err(STATE_WOKEN_UP) => panic!("should not be called on woken up entry"), - Err(STATE_CANCELLING) => (), // no need to wake up cancelling entries - Err(actual) => panic!("state is corrupted ({actual})"), + // don't unlock — poisoning the `Mutex` stops others from using the bad state. + state @ (State::WokenUp | State::Cancelling) => panic!("corrupted state: {state:#?}"), } } /// Wake the entry if it has already elapsed before registering to the timer wheel. pub(crate) fn wake_unregistered(&self) { - match self.entry.state.compare_exchange( - STATE_UNREGISTERED, - STATE_WOKEN_UP, - Relaxed, // no need to synchronize anything - Relaxed, // no need to synchronize anything - ) { - Ok(_) => self.entry.waker.wake(), - Err(STATE_REGISTERED) => { - panic!("entry is already registered, please call `wake` instead") - } - Err(STATE_BUSY_REGISTERING) => { - panic!("should be be called concurrently with `transition_to_registered`") + let mut lock = self.entry.state.lock(); + match &*lock { + State::Unregistered => { + *lock = State::WokenUp; + // Since state has been updated, no need to hold the lock. + drop(lock); + self.entry.waker.wake(); } - Err(STATE_PENDING) => { - panic!("entry is already pending, please call `wake` instead") + // don't unlock — poisoning the `Mutex` stops others from using the bad state. + state @ (State::Registered(_) | State::WokenUp) => { + panic!("corrupted state: {state:#?}") } - Err(STATE_WOKEN_UP) => panic!("entry is already woken up"), - Err(STATE_CANCELLING) => (), // no need to wake up cancelling entries - Err(actual) => panic!("state is corrupted ({actual})"), + // don't wake up cancelling entries + State::Cancelling => (), + // don't unlock — poisoning the `Mutex` stops others from using the bad state. + State::Pending => panic!("corrupted state: State::Pending"), } } @@ -209,35 +181,17 @@ impl Handle { } pub(crate) fn transition_to_registered(&self, cancel_tx: Sender) -> TransitionToRegistered { - match self.entry.state.compare_exchange( - STATE_UNREGISTERED, - STATE_BUSY_REGISTERING, - Relaxed, // no need to synchronize anything - Relaxed, // no need to synchronize anything - ) { - Ok(_) => (), // successfully locked the `self.cancel_tx` - Err(STATE_BUSY_REGISTERING) => panic!("should not be called concurrently"), - Err(STATE_REGISTERED) => panic!("should not be called twice"), - Err(STATE_PENDING) => panic!("entry is already pending, cannot register again"), - Err(STATE_WOKEN_UP) => panic!("already woken up, cannot register again"), - Err(STATE_CANCELLING) => return TransitionToRegistered::Cancelling, - Err(actual) => panic!("state is corrupted ({actual})"), - } - - self.entry.cancel_tx.with_mut(|tx| { - // Safety: we have claimed the `STATE_BUSY_REGISTERING` state - let tx = unsafe { tx.as_mut().unwrap_unchecked() }; - assert!(tx.replace(cancel_tx).is_none(), "duplicate registration"); - }); - - match self.entry.state.compare_exchange( - STATE_BUSY_REGISTERING, - STATE_REGISTERED, - Release, // `Release` the `cancel_tx` to other threads - Relaxed, - ) { - Ok(_) => TransitionToRegistered::Success, - Err(actual) => panic!("state is corrupted ({actual})"), + let mut lock = self.entry.state.lock(); + match &*lock { + State::Unregistered => { + *lock = State::Registered(cancel_tx); + TransitionToRegistered::Success + } + // don't unlock — poisoning the `Mutex` stops others from using the bad state. + state @ (State::Registered(_) | State::Pending | State::WokenUp) => { + panic!("corrupted state: {state:#?}") + } + State::Cancelling => TransitionToRegistered::Cancelling, } } @@ -245,58 +199,38 @@ impl Handle { if self.entry.deadline > not_after { return TransitionToPending::NotElapsed(self.entry.deadline); } - match self - .entry - .state - // We don't need to synchronize anything, so we can use relaxed ordering. - .compare_exchange(STATE_REGISTERED, STATE_PENDING, Relaxed, Relaxed) - { - Ok(_) => TransitionToPending::Success, - Err(STATE_UNREGISTERED) => panic!("should not be called on unregistered entry"), - Err(STATE_BUSY_REGISTERING) => { - panic!("should not be called concurrently with `transition_to_registered`") + + let mut lock = self.entry.state.lock(); + match &*lock { + // don't unlock — poisoning the `Mutex` stops others from using the bad state. + State::Unregistered => panic!("corrupted state: State::Unregistered"), + State::Registered(_) => { + *lock = State::Pending; + TransitionToPending::Success } - Err(STATE_PENDING) => panic!("should not be called twice"), - Err(STATE_WOKEN_UP) => panic!("should not be called on woken up entry"), - Err(STATE_CANCELLING) => TransitionToPending::Cancelling, - Err(actual) => panic!("state is corrupted ({actual})"), + // don't unlock — poisoning the `Mutex` stops others from using the bad state. + state @ (State::Pending | State::WokenUp) => panic!("corrupted state: {state:#?}"), + State::Cancelling => TransitionToPending::Cancelling, } } pub(crate) fn transition_to_cancelling(&self) { - loop { - match self.entry.state.compare_exchange( - STATE_REGISTERED, - STATE_CANCELLING, - Acquire, // `Acquire` the side-effects of `transition_to_registered` - Relaxed, - ) { - Ok(_) => break, - Err(STATE_UNREGISTERED) => return, // no need to cancel unregistered entries. - Err(STATE_BUSY_REGISTERING) => { - // Entry is being registered, wait for it to finish. - std::hint::spin_loop(); - continue; + let mut lock = self.entry.state.lock(); + + match *lock { + State::Unregistered => *lock = State::Cancelling, + State::Registered(ref tx) => { + // Safety: entry is not in any cancellation queue + unsafe { + tx.send(self.clone()); } - Err(STATE_PENDING) => return, // no need to cancel pending entries - Err(STATE_WOKEN_UP) => return, // no need to cancel woken up entries - Err(STATE_CANCELLING) => panic!("should not be called twice"), - Err(actual) => panic!("state is corrupted ({actual})"), + *lock = State::Cancelling; } + // no need to cancel a pending or woken up entry + State::Pending | State::WokenUp => *lock = State::Cancelling, + // don't unlock — poisoning the `Mutex` stops others from using the bad state. + State::Cancelling => panic!("should not be called twice"), } - self.entry.cancel_tx.with_mut(|tx| { - // Safety: Since previous state is `STATE_REGISTERED`, - // this is synchronized with the `transition_to_registered` call, - // and the `cancel_tx` should be already stored. - let tx = unsafe { tx.as_mut().unwrap_unchecked() }; - let tx = tx.take().unwrap(); - unsafe { - tx.send(self.clone()); - } - }); - - // No need to emit an release fence here - // because this method will not be called twice. } pub(crate) fn deadline(&self) -> u64 { @@ -304,15 +238,15 @@ impl Handle { } pub(crate) fn is_registered(&self) -> bool { - self.entry.state.fetch_or(0, Relaxed) == STATE_REGISTERED + matches!(*self.entry.state.lock(), State::Registered(_)) } pub(crate) fn is_pending(&self) -> bool { - self.entry.state.fetch_or(0, Relaxed) == STATE_PENDING + matches!(*self.entry.state.lock(), State::Pending) } pub(crate) fn is_woken_up(&self) -> bool { - self.entry.state.fetch_or(0, Relaxed) == STATE_WOKEN_UP + matches!(*self.entry.state.lock(), State::WokenUp) } #[cfg(test)] From 6b8eed509969faf16ff7219a536bff38a93c0e3a Mon Sep 17 00:00:00 2001 From: Qi Date: Tue, 2 Sep 2025 21:53:57 +0800 Subject: [PATCH 032/100] fixup! eliminate atomic state by `Mutex` of `Entry` --- tokio/src/runtime/time/wheel/entry.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tokio/src/runtime/time/wheel/entry.rs b/tokio/src/runtime/time/wheel/entry.rs index ce45309e93e..aa48b2e86d3 100644 --- a/tokio/src/runtime/time/wheel/entry.rs +++ b/tokio/src/runtime/time/wheel/entry.rs @@ -151,7 +151,9 @@ impl Handle { self.entry.waker.wake(); } // don't unlock — poisoning the `Mutex` stops others from using the bad state. - state @ (State::WokenUp | State::Cancelling) => panic!("corrupted state: {state:#?}"), + State::WokenUp => panic!("corrupted state: `State::WokenUp`"), + // no need to wake up cancelling entry + State::Cancelling => (), } } @@ -169,10 +171,10 @@ impl Handle { state @ (State::Registered(_) | State::WokenUp) => { panic!("corrupted state: {state:#?}") } - // don't wake up cancelling entries - State::Cancelling => (), // don't unlock — poisoning the `Mutex` stops others from using the bad state. State::Pending => panic!("corrupted state: State::Pending"), + // don't wake up cancelling entries + State::Cancelling => (), } } From 6bb207d87c7193289f5e8c50545aa94c6a3cc261 Mon Sep 17 00:00:00 2001 From: Qi Date: Tue, 2 Sep 2025 21:58:03 +0800 Subject: [PATCH 033/100] reduce lock contention while drainning the remote timers Signed-off-by: ADD-SP --- tokio/src/runtime/scheduler/multi_thread/worker.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tokio/src/runtime/scheduler/multi_thread/worker.rs b/tokio/src/runtime/scheduler/multi_thread/worker.rs index 1fed740bbb0..b0017184ca7 100644 --- a/tokio/src/runtime/scheduler/multi_thread/worker.rs +++ b/tokio/src/runtime/scheduler/multi_thread/worker.rs @@ -1248,8 +1248,12 @@ impl Handle { } pub(crate) fn take_remote_timers(&self) -> Vec { - let mut synced = self.shared.synced.lock(); - std::mem::take(&mut synced.inject_timers) + // It's ok to lost the race, as another worker is + // draining the inject_timers. + match self.shared.synced.try_lock() { + Some(mut synced) => std::mem::take(&mut synced.inject_timers), + None => Vec::new(), + } } } From 2385b3df68ec6d85f4be18923d8b0a0013cd4c11 Mon Sep 17 00:00:00 2001 From: Qi Date: Thu, 11 Sep 2025 22:16:27 +0800 Subject: [PATCH 034/100] improve the cache locality while checking the shutdown flag Signed-off-by: ADD-SP --- .../runtime/scheduler/current_thread/mod.rs | 16 ++++++++++++--- .../runtime/scheduler/multi_thread/handle.rs | 11 ++++++++++ .../runtime/scheduler/multi_thread/worker.rs | 6 ++++-- tokio/src/runtime/time/handle.rs | 13 +++++------- tokio/src/runtime/time/mod.rs | 15 +++++++------- tokio/src/runtime/time/tests/mod.rs | 4 ++-- tokio/src/runtime/time/timer.rs | 20 ++++++++++--------- 7 files changed, 53 insertions(+), 32 deletions(-) diff --git a/tokio/src/runtime/scheduler/current_thread/mod.rs b/tokio/src/runtime/scheduler/current_thread/mod.rs index 459ebb40eed..1481de19a1d 100644 --- a/tokio/src/runtime/scheduler/current_thread/mod.rs +++ b/tokio/src/runtime/scheduler/current_thread/mod.rs @@ -1,4 +1,4 @@ -use crate::loom::sync::atomic::AtomicBool; +use crate::loom::sync::atomic::{AtomicBool, Ordering}; use crate::loom::sync::Arc; use crate::runtime::driver::{self, Driver}; use crate::runtime::scheduler::{self, Defer, Inject}; @@ -121,6 +121,9 @@ struct Shared { /// This scheduler only has one worker. worker_metrics: WorkerMetrics, + + /// Indicates that the runtime is shutting down. + is_shutdown: AtomicBool, } /// Thread-local context. @@ -183,6 +186,7 @@ impl CurrentThread { config, scheduler_metrics: SchedulerMetrics::new(), worker_metrics, + is_shutdown: AtomicBool::new(false), }, driver: driver_handle, blocking_spawner, @@ -300,6 +304,8 @@ impl CurrentThread { let core = shutdown2(core, handle); *context.core.borrow_mut() = Some(core); } + + handle.shared.is_shutdown.store(true, Ordering::SeqCst); } } @@ -555,11 +561,11 @@ impl Context { pub(crate) fn with_wheel(&self, f: F) -> R where - F: FnOnce(Option<(&mut Wheel, cancellation_queue::Sender)>) -> R, + F: FnOnce(Option<(&mut Wheel, cancellation_queue::Sender, bool)>) -> R, { self.with_core(|maybe_core| { if let Some(core) = maybe_core { - f(Some((&mut core.wheel, core.timer_cancel_tx.clone()))) + f(Some((&mut core.wheel, core.timer_cancel_tx.clone(), false))) } else { f(None) } @@ -726,6 +732,10 @@ impl Handle { let mut inject_timers = self.shared.inject_timers.lock(); std::mem::take(&mut inject_timers) } + + pub(crate) fn is_shutdown(&self) -> bool { + self.shared.is_shutdown.load(Ordering::SeqCst) + } } } diff --git a/tokio/src/runtime/scheduler/multi_thread/handle.rs b/tokio/src/runtime/scheduler/multi_thread/handle.rs index 9acfcb270d6..1b53331bc2b 100644 --- a/tokio/src/runtime/scheduler/multi_thread/handle.rs +++ b/tokio/src/runtime/scheduler/multi_thread/handle.rs @@ -1,5 +1,6 @@ use crate::future::Future; use crate::loom::sync::Arc; +use crate::loom::sync::atomic::{AtomicBool, Ordering}; use crate::runtime::scheduler::multi_thread::worker; use crate::runtime::task::{Notified, Task, TaskHarnessScheduleHooks}; use crate::runtime::{ @@ -33,6 +34,9 @@ pub(crate) struct Handle { /// User-supplied hooks to invoke for things pub(crate) task_hooks: TaskHooks, + + /// Indicates that the runtime is shutting down. + pub(crate) is_shutdown: AtomicBool, } impl Handle { @@ -50,7 +54,14 @@ impl Handle { Self::bind_new_task(me, future, id, spawned_at) } + cfg_time! { + pub(crate) fn is_shutdown(&self) -> bool { + self.is_shutdown.load(Ordering::SeqCst) + } + } + pub(crate) fn shutdown(&self) { + self.is_shutdown.store(true, Ordering::SeqCst); self.close(); } diff --git a/tokio/src/runtime/scheduler/multi_thread/worker.rs b/tokio/src/runtime/scheduler/multi_thread/worker.rs index b0017184ca7..96875f880a7 100644 --- a/tokio/src/runtime/scheduler/multi_thread/worker.rs +++ b/tokio/src/runtime/scheduler/multi_thread/worker.rs @@ -57,6 +57,7 @@ //! leak. use crate::loom::sync::{Arc, Mutex}; +use crate::loom::sync::atomic::AtomicBool; use crate::runtime; use crate::runtime::scheduler::multi_thread::{ idle, queue, Counters, Handle, Idle, Overflow, Parker, Stats, TraceStatus, Unparker, @@ -331,6 +332,7 @@ pub(super) fn create( driver: driver_handle, blocking_spawner, seed_generator, + is_shutdown: AtomicBool::new(false), }); let mut launch = Launch(vec![]); @@ -888,11 +890,11 @@ impl Context { pub(crate) fn with_wheel(&self, f: F) -> R where - F: FnOnce(Option<(&mut Wheel, cancellation_queue::Sender)>) -> R, + F: FnOnce(Option<(&mut Wheel, cancellation_queue::Sender, bool)>) -> R, { self.with_core(|core| { if let Some(core) = core { - f(Some((&mut core.wheel, core.timer_cancel_tx.clone()))) + f(Some((&mut core.wheel, core.timer_cancel_tx.clone(), core.is_shutdown))) } else { f(None) } diff --git a/tokio/src/runtime/time/handle.rs b/tokio/src/runtime/time/handle.rs index c3e8f92aaa4..ef7bc742530 100644 --- a/tokio/src/runtime/time/handle.rs +++ b/tokio/src/runtime/time/handle.rs @@ -1,12 +1,14 @@ -use crate::loom::sync::atomic::{AtomicBool, Ordering}; -use crate::loom::sync::Arc; use crate::runtime::time::{TimeSource, Wheel}; use std::fmt; +cfg_test_util! { + use crate::loom::sync::Arc; + use crate::loom::sync::atomic::{AtomicBool, Ordering}; +} + /// Handle to time driver instance. pub(crate) struct Handle { pub(super) time_source: TimeSource, - pub(super) is_shutdown: Arc, // When `true`, a call to `park_timeout` should immediately return and time // should not advance. One reason for this to be `true` is if the task @@ -50,11 +52,6 @@ impl Handle { &self.time_source } - /// Checks whether the driver has been shutdown. - pub(super) fn is_shutdown(&self) -> bool { - self.is_shutdown.load(Ordering::SeqCst) - } - /// Track that the driver is being unparked pub(crate) fn unpark(&self) { #[cfg(feature = "test-util")] diff --git a/tokio/src/runtime/time/mod.rs b/tokio/src/runtime/time/mod.rs index b7876b074d5..364de59a3bd 100644 --- a/tokio/src/runtime/time/mod.rs +++ b/tokio/src/runtime/time/mod.rs @@ -24,8 +24,11 @@ cfg_rt_or_time! { pub(crate) use wheel::Wheel; } +cfg_test_util! { + use crate::loom::sync::Arc; +} + use crate::loom::sync::atomic::{AtomicBool, Ordering}; -use crate::loom::sync::Arc; use crate::runtime::driver::{self, IoStack}; use crate::time::{Clock, Duration}; @@ -89,7 +92,7 @@ pub(crate) struct Driver { /// Parker to delegate to. park: IoStack, - is_shutdown: Arc, + is_shutdown: AtomicBool, } // ===== impl Driver ===== @@ -101,16 +104,14 @@ impl Driver { /// Specifying the source of time is useful when testing. pub(crate) fn new(park: IoStack, clock: &Clock) -> (Driver, Handle) { let time_source = TimeSource::new(clock); - let is_shutdown = Arc::new(AtomicBool::new(false)); let handle = Handle { time_source, - is_shutdown: is_shutdown.clone(), #[cfg(feature = "test-util")] did_wake: Arc::new(AtomicBool::new(false)), }; - let driver = Driver { park, is_shutdown }; + let driver = Driver { park, is_shutdown: AtomicBool::new(false) }; (driver, handle) } @@ -124,9 +125,7 @@ impl Driver { } pub(crate) fn shutdown(&mut self, rt_handle: &driver::Handle) { - let handle = rt_handle.time(); - - if handle.is_shutdown() { + if self.is_shutdown.load(Ordering::SeqCst) { return; } diff --git a/tokio/src/runtime/time/tests/mod.rs b/tokio/src/runtime/time/tests/mod.rs index d018059ec32..7ea0259ea30 100644 --- a/tokio/src/runtime/time/tests/mod.rs +++ b/tokio/src/runtime/time/tests/mod.rs @@ -48,7 +48,7 @@ async fn fire_all_timers(handle: &Handle, exit_rx: oneshot::Receiver<()>) { // In the `block_on` context, we can get the current wheel // fire all timers. with_current_wheel(&handle.inner, |maybe_wheel| { - let (wheel, _tx) = maybe_wheel.unwrap(); + let (wheel, _tx, _is_shutdown) = maybe_wheel.unwrap(); let time = handle.inner.driver().time(); time.process_at_time(wheel, u64::MAX); // 2 seconds }); @@ -61,7 +61,7 @@ async fn fire_all_timers(handle: &Handle, exit_rx: oneshot::Receiver<()>) { fn process_at_time(handle: &Handle, at: u64) { let handle = &handle.inner; with_current_wheel(handle, |maybe_wheel| { - let (wheel, _tx) = maybe_wheel.unwrap(); + let (wheel, _tx, _is_shutdown) = maybe_wheel.unwrap(); let time = handle.driver().time(); time.process_at_time(wheel, at); }); diff --git a/tokio/src/runtime/time/timer.rs b/tokio/src/runtime/time/timer.rs index 03c01b6297d..9c32dd236f0 100644 --- a/tokio/src/runtime/time/timer.rs +++ b/tokio/src/runtime/time/timer.rs @@ -63,7 +63,8 @@ impl Timer { with_current_wheel(&this.sched_handle, |maybe_wheel| { let deadline = deadline_to_tick(&this.sched_handle, this.deadline); let hdl = EntryHandle::new(deadline, cx.waker()); - if let Some((wheel, tx)) = maybe_wheel { + if let Some((wheel, tx, is_shutdown)) = maybe_wheel { + assert!(!is_shutdown, "{RUNTIME_SHUTTING_DOWN_ERROR}"); // Safety: the entry is not registered yet match unsafe { wheel.insert(hdl.clone(), tx) } { Insert::Success => { @@ -95,7 +96,7 @@ impl Timer { pub(super) fn with_current_wheel(hdl: &SchedulerHandle, f: F) -> R where - F: FnOnce(Option<(&mut Wheel, Sender)>) -> R, + F: FnOnce(Option<(&mut Wheel, Sender, bool)>) -> R, { #[cfg(not(feature = "rt"))] { @@ -162,19 +163,20 @@ fn push_from_remote(sched_hdl: &SchedulerHandle, entry_hdl: EntryHandle) { use crate::runtime::scheduler::Handle::MultiThread; match sched_hdl { - CurrentThread(hdl) => hdl.push_remote_timer(entry_hdl), + CurrentThread(hdl) => { + assert!(!hdl.is_shutdown(), "{RUNTIME_SHUTTING_DOWN_ERROR}"); + hdl.push_remote_timer(entry_hdl) + } #[cfg(feature = "rt-multi-thread")] - MultiThread(hdl) => hdl.push_remote_timer(entry_hdl), + MultiThread(hdl) => { + assert!(!hdl.is_shutdown(), "{RUNTIME_SHUTTING_DOWN_ERROR}"); + hdl.push_remote_timer(entry_hdl) + } } } } fn deadline_to_tick(sched_hdl: &SchedulerHandle, deadline: Instant) -> u64 { let time_hdl = sched_hdl.driver().time(); - - if time_hdl.is_shutdown() { - panic!("{RUNTIME_SHUTTING_DOWN_ERROR}"); - } - time_hdl.time_source().deadline_to_tick(deadline) } From 2fcfb4b6575f3a6e2dac6c4028758ea48fa2d479 Mon Sep 17 00:00:00 2001 From: Qi Date: Thu, 11 Sep 2025 22:17:44 +0800 Subject: [PATCH 035/100] remove unnecessary `PhantomData` in cancellation queue Signed-off-by: ADD-SP --- tokio/src/runtime/time/wheel/cancellation_queue.rs | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/tokio/src/runtime/time/wheel/cancellation_queue.rs b/tokio/src/runtime/time/wheel/cancellation_queue.rs index 7b85166d48b..bcb372ffefc 100644 --- a/tokio/src/runtime/time/wheel/cancellation_queue.rs +++ b/tokio/src/runtime/time/wheel/cancellation_queue.rs @@ -3,8 +3,6 @@ use crate::loom::sync::{Arc, Mutex}; use crate::runtime::time::wheel::CancellationQueueEntry; use crate::util::linked_list; -use std::marker::PhantomData; - type EntryList = linked_list::LinkedList; #[derive(Debug)] @@ -93,18 +91,8 @@ impl Sender { #[derive(Debug)] pub(crate) struct Receiver { inner: Arc>, - - // Technically, receiver is `Sync`, however, we only - // need single receiver for cancellation purpose, - // so we make it `!Sync` to prevent abusing. - _not_sync: PhantomData<*const ()>, } -/// Safety: [`Inner`] is protected by [`Mutex`]. -// We need the `Receiver` to be `Send` because the `Core` struct for multi-thread -// runtime will be send to another thread during the shutdown. -unsafe impl Send for Receiver {} - impl Receiver { pub(crate) fn recv_all(&mut self) -> impl Iterator { self.inner.lock().iter() @@ -119,7 +107,6 @@ pub(crate) fn new() -> (Sender, Receiver) { }, Receiver { inner, - _not_sync: PhantomData, }, ) } From 9657e6d0b0ee5d7155b9105cbcbaaa0d7fd9791f Mon Sep 17 00:00:00 2001 From: Qi Date: Thu, 11 Sep 2025 22:20:24 +0800 Subject: [PATCH 036/100] fix rustfmt reports Signed-off-by: ADD-SP --- tokio/src/runtime/scheduler/multi_thread/handle.rs | 2 +- tokio/src/runtime/scheduler/multi_thread/worker.rs | 2 +- tokio/src/runtime/time/mod.rs | 5 ++++- tokio/src/runtime/time/wheel/cancellation_queue.rs | 4 +--- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/tokio/src/runtime/scheduler/multi_thread/handle.rs b/tokio/src/runtime/scheduler/multi_thread/handle.rs index 1b53331bc2b..7c74ea007cc 100644 --- a/tokio/src/runtime/scheduler/multi_thread/handle.rs +++ b/tokio/src/runtime/scheduler/multi_thread/handle.rs @@ -1,6 +1,6 @@ use crate::future::Future; -use crate::loom::sync::Arc; use crate::loom::sync::atomic::{AtomicBool, Ordering}; +use crate::loom::sync::Arc; use crate::runtime::scheduler::multi_thread::worker; use crate::runtime::task::{Notified, Task, TaskHarnessScheduleHooks}; use crate::runtime::{ diff --git a/tokio/src/runtime/scheduler/multi_thread/worker.rs b/tokio/src/runtime/scheduler/multi_thread/worker.rs index 96875f880a7..c901ad1d5f3 100644 --- a/tokio/src/runtime/scheduler/multi_thread/worker.rs +++ b/tokio/src/runtime/scheduler/multi_thread/worker.rs @@ -56,8 +56,8 @@ //! the inject queue indefinitely. This would be a ref-count cycle and a memory //! leak. -use crate::loom::sync::{Arc, Mutex}; use crate::loom::sync::atomic::AtomicBool; +use crate::loom::sync::{Arc, Mutex}; use crate::runtime; use crate::runtime::scheduler::multi_thread::{ idle, queue, Counters, Handle, Idle, Overflow, Parker, Stats, TraceStatus, Unparker, diff --git a/tokio/src/runtime/time/mod.rs b/tokio/src/runtime/time/mod.rs index 364de59a3bd..8ed4f1e4b5b 100644 --- a/tokio/src/runtime/time/mod.rs +++ b/tokio/src/runtime/time/mod.rs @@ -111,7 +111,10 @@ impl Driver { did_wake: Arc::new(AtomicBool::new(false)), }; - let driver = Driver { park, is_shutdown: AtomicBool::new(false) }; + let driver = Driver { + park, + is_shutdown: AtomicBool::new(false), + }; (driver, handle) } diff --git a/tokio/src/runtime/time/wheel/cancellation_queue.rs b/tokio/src/runtime/time/wheel/cancellation_queue.rs index bcb372ffefc..9fc08292c8c 100644 --- a/tokio/src/runtime/time/wheel/cancellation_queue.rs +++ b/tokio/src/runtime/time/wheel/cancellation_queue.rs @@ -105,9 +105,7 @@ pub(crate) fn new() -> (Sender, Receiver) { Sender { inner: inner.clone(), }, - Receiver { - inner, - }, + Receiver { inner }, ) } From c208c3afcb3e279e277d7507d775bfffeb953f90 Mon Sep 17 00:00:00 2001 From: Qi Date: Mon, 15 Sep 2025 19:53:47 +0800 Subject: [PATCH 037/100] ci: fix spellcheck.dic Signed-off-by: ADD-SP --- spellcheck.dic | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spellcheck.dic b/spellcheck.dic index a51ce7ce2aa..90cbfa96f8e 100644 --- a/spellcheck.dic +++ b/spellcheck.dic @@ -1,4 +1,4 @@ -308 +309 & + < From 40cc384e96bb3dfc536b1124e9bf7ea280412f70 Mon Sep 17 00:00:00 2001 From: Qi Date: Sun, 21 Sep 2025 22:31:41 +0800 Subject: [PATCH 038/100] improve the ergonomic of `Handle::with_wheel()` Signed-off-by: ADD-SP --- .../runtime/scheduler/current_thread/mod.rs | 12 +++--- .../runtime/scheduler/multi_thread/worker.rs | 15 ++++--- tokio/src/runtime/time/mod.rs | 18 +++++++- tokio/src/runtime/time/tests/mod.rs | 25 +++++++---- tokio/src/runtime/time/timer.rs | 42 +++++++++++-------- 5 files changed, 74 insertions(+), 38 deletions(-) diff --git a/tokio/src/runtime/scheduler/current_thread/mod.rs b/tokio/src/runtime/scheduler/current_thread/mod.rs index 1481de19a1d..0bb8e513a2d 100644 --- a/tokio/src/runtime/scheduler/current_thread/mod.rs +++ b/tokio/src/runtime/scheduler/current_thread/mod.rs @@ -561,13 +561,15 @@ impl Context { pub(crate) fn with_wheel(&self, f: F) -> R where - F: FnOnce(Option<(&mut Wheel, cancellation_queue::Sender, bool)>) -> R, + F: FnOnce(Option>) -> R, { self.with_core(|maybe_core| { - if let Some(core) = maybe_core { - f(Some((&mut core.wheel, core.timer_cancel_tx.clone(), false))) - } else { - f(None) + match maybe_core { + Some(core) => f(Some(crate::runtime::time::Context::Running { + wheel: &mut core.wheel, + canc_tx: &core.timer_cancel_tx, + })), + None => f(None), } }) } diff --git a/tokio/src/runtime/scheduler/multi_thread/worker.rs b/tokio/src/runtime/scheduler/multi_thread/worker.rs index c901ad1d5f3..b144621b129 100644 --- a/tokio/src/runtime/scheduler/multi_thread/worker.rs +++ b/tokio/src/runtime/scheduler/multi_thread/worker.rs @@ -890,13 +890,16 @@ impl Context { pub(crate) fn with_wheel(&self, f: F) -> R where - F: FnOnce(Option<(&mut Wheel, cancellation_queue::Sender, bool)>) -> R, + F: FnOnce(Option>) -> R, { - self.with_core(|core| { - if let Some(core) = core { - f(Some((&mut core.wheel, core.timer_cancel_tx.clone(), core.is_shutdown))) - } else { - f(None) + self.with_core(|maybe_core| { + match maybe_core { + Some(core) if core.is_shutdown => f(Some(crate::runtime::time::Context::Shutdown)), + Some(core) => f(Some(crate::runtime::time::Context::Running { + wheel: &mut core.wheel, + canc_tx: &core.timer_cancel_tx, + })), + None => f(None), } }) } diff --git a/tokio/src/runtime/time/mod.rs b/tokio/src/runtime/time/mod.rs index 8ed4f1e4b5b..4aef399391c 100644 --- a/tokio/src/runtime/time/mod.rs +++ b/tokio/src/runtime/time/mod.rs @@ -18,9 +18,9 @@ pub(crate) use source::TimeSource; mod wheel; cfg_rt_and_time! { pub(crate) use wheel::{Insert, EntryHandle}; - pub(crate) use wheel::cancellation_queue; } cfg_rt_or_time! { + pub(crate) use wheel::cancellation_queue; pub(crate) use wheel::Wheel; } @@ -137,5 +137,21 @@ impl Driver { } } +cfg_rt_or_time! { + /// Local context for the time driver. + pub(crate) enum Context<'a> { + /// The runtime is running, we can access it. + Running { + /// the local time wheel + wheel: &'a mut Wheel, + /// channel to push timers that are pending cancellation + canc_tx: &'a cancellation_queue::Sender, + }, + #[cfg(feature = "rt-multi-thread")] + /// The runtime is shutting down, no timers can be registered. + Shutdown, + } +} + #[cfg(test)] mod tests; diff --git a/tokio/src/runtime/time/tests/mod.rs b/tokio/src/runtime/time/tests/mod.rs index 7ea0259ea30..9d851faca60 100644 --- a/tokio/src/runtime/time/tests/mod.rs +++ b/tokio/src/runtime/time/tests/mod.rs @@ -8,6 +8,7 @@ use futures::task::noop_waker_ref; use crate::loom::thread; use crate::runtime::time::timer::with_current_wheel; +use crate::runtime::time::Context as TimeContext; use crate::runtime::Handle; use crate::sync::oneshot; @@ -47,10 +48,14 @@ async fn fire_all_timers(handle: &Handle, exit_rx: oneshot::Receiver<()>) { // In the `block_on` context, we can get the current wheel // fire all timers. - with_current_wheel(&handle.inner, |maybe_wheel| { - let (wheel, _tx, _is_shutdown) = maybe_wheel.unwrap(); - let time = handle.inner.driver().time(); - time.process_at_time(wheel, u64::MAX); // 2 seconds + with_current_wheel(&handle.inner, |maybe_wheel| match maybe_wheel { + Some(TimeContext::Running { wheel, .. }) => { + let time = handle.inner.driver().time(); + time.process_at_time(wheel, u64::MAX); + } + #[cfg(feature = "rt-multi-thread")] + Some(TimeContext::Shutdown) => panic!("runtime is shutting down"), + None => panic!("no current wheel"), }); thread::yield_now(); @@ -60,10 +65,14 @@ async fn fire_all_timers(handle: &Handle, exit_rx: oneshot::Receiver<()>) { // This function must be called inside the `rt.block_on`. fn process_at_time(handle: &Handle, at: u64) { let handle = &handle.inner; - with_current_wheel(handle, |maybe_wheel| { - let (wheel, _tx, _is_shutdown) = maybe_wheel.unwrap(); - let time = handle.driver().time(); - time.process_at_time(wheel, at); + with_current_wheel(handle, |maybe_wheel| match maybe_wheel { + Some(TimeContext::Running { wheel, .. }) => { + let time = handle.driver().time(); + time.process_at_time(wheel, at); + } + #[cfg(feature = "rt-multi-thread")] + Some(TimeContext::Shutdown) => panic!("runtime is shutting down"), + None => panic!("no current wheel"), }); } diff --git a/tokio/src/runtime/time/timer.rs b/tokio/src/runtime/time/timer.rs index 9c32dd236f0..6fd5114d371 100644 --- a/tokio/src/runtime/time/timer.rs +++ b/tokio/src/runtime/time/timer.rs @@ -1,14 +1,15 @@ use super::wheel::EntryHandle; use crate::runtime::scheduler::Handle as SchedulerHandle; -use crate::runtime::time::wheel::cancellation_queue::Sender; use crate::runtime::time::wheel::Insert; -use crate::runtime::time::Wheel; +use crate::runtime::time::Context as TimeContext; use crate::time::Instant; -use crate::util::error::RUNTIME_SHUTTING_DOWN_ERROR; use std::pin::Pin; use std::task::{Context, Poll}; +#[cfg(any(feature = "rt", feature = "rt-multi-thread"))] +use crate::util::error::RUNTIME_SHUTTING_DOWN_ERROR; + pub(crate) struct Timer { sched_handle: SchedulerHandle, @@ -60,24 +61,29 @@ impl Timer { fn register(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<()> { let this = self.get_mut(); - with_current_wheel(&this.sched_handle, |maybe_wheel| { + with_current_wheel(&this.sched_handle, |maybe_time_cx| { let deadline = deadline_to_tick(&this.sched_handle, this.deadline); let hdl = EntryHandle::new(deadline, cx.waker()); - if let Some((wheel, tx, is_shutdown)) = maybe_wheel { - assert!(!is_shutdown, "{RUNTIME_SHUTTING_DOWN_ERROR}"); - // Safety: the entry is not registered yet - match unsafe { wheel.insert(hdl.clone(), tx) } { - Insert::Success => { - this.entry = Some(hdl); - Poll::Pending + + match maybe_time_cx { + Some(TimeContext::Running { wheel, canc_tx }) => { + // Safety: the entry is not registered yet + match unsafe { wheel.insert(hdl.clone(), canc_tx.clone()) } { + Insert::Success => { + this.entry = Some(hdl); + Poll::Pending + } + Insert::Elapsed => Poll::Ready(()), + Insert::Cancelling => Poll::Pending, } - Insert::Elapsed => Poll::Ready(()), - Insert::Cancelling => Poll::Pending, } - } else { - this.entry = Some(hdl.clone()); - push_from_remote(&this.sched_handle, hdl); - Poll::Pending + #[cfg(feature = "rt-multi-thread")] + Some(TimeContext::Shutdown) => panic!("{RUNTIME_SHUTTING_DOWN_ERROR}"), + None => { + this.entry = Some(hdl.clone()); + push_from_remote(&this.sched_handle, hdl); + Poll::Pending + } } }) } @@ -96,7 +102,7 @@ impl Timer { pub(super) fn with_current_wheel(hdl: &SchedulerHandle, f: F) -> R where - F: FnOnce(Option<(&mut Wheel, Sender, bool)>) -> R, + F: FnOnce(Option>) -> R, { #[cfg(not(feature = "rt"))] { From 81e57c90a7f62779c9175128a788e8ca94448037 Mon Sep 17 00:00:00 2001 From: Qi Date: Sun, 5 Oct 2025 19:44:29 +0800 Subject: [PATCH 039/100] simplify `tokio/src/runtime/time/timer.rs` Signed-off-by: ADD-SP --- tokio/src/runtime/scheduler/mod.rs | 33 ++++++++++++++++++++ tokio/src/runtime/time/timer.rs | 50 ++++-------------------------- 2 files changed, 39 insertions(+), 44 deletions(-) diff --git a/tokio/src/runtime/scheduler/mod.rs b/tokio/src/runtime/scheduler/mod.rs index d0b36f893d2..3f40a483e3d 100644 --- a/tokio/src/runtime/scheduler/mod.rs +++ b/tokio/src/runtime/scheduler/mod.rs @@ -109,6 +109,30 @@ cfg_rt! { } } + cfg_time! { + /// Returns true if both handles belong to the same runtime instance. + pub(crate) fn is_same_runtime(&self, other: &Handle) -> bool { + match (self, other) { + (Handle::CurrentThread(a), Handle::CurrentThread(b)) => Arc::ptr_eq(a, b), + #[cfg(feature = "rt-multi-thread")] + (Handle::MultiThread(a), Handle::MultiThread(b)) => Arc::ptr_eq(a, b), + _ => false, + } + } + + /// Returns true if the runtime is shutting down. + pub(crate) fn is_shutdown(&self) -> bool { + match_flavor!(self, Handle(h) => h.is_shutdown()) + } + + /// Push a timer entry that was created outside of this runtime + /// into the runtime-global queue. The pushed timer will be + /// processed by a random worker thread. + pub(crate) fn push_remote_timer(&self, entry_hdl: crate::runtime::time::EntryHandle) { + match_flavor!(self, Handle(h) => h.push_remote_timer(entry_hdl)) + } + } + /// Returns true if this is a local runtime and the runtime is owned by the current thread. pub(crate) fn can_spawn_local_on_local_runtime(&self) -> bool { match self { @@ -249,6 +273,15 @@ cfg_rt! { match_flavor!(self, Context(context) => context.defer(waker)); } + cfg_time! { + pub(crate) fn with_wheel(&self, f: F) -> R + where + F: FnOnce(Option>) -> R, + { + match_flavor!(self, Context(context) => context.with_wheel(f)) + } + } + cfg_rt_multi_thread! { #[track_caller] pub(crate) fn expect_multi_thread(&self) -> &multi_thread::Context { diff --git a/tokio/src/runtime/time/timer.rs b/tokio/src/runtime/time/timer.rs index 6fd5114d371..eb5ad4dfbd2 100644 --- a/tokio/src/runtime/time/timer.rs +++ b/tokio/src/runtime/time/timer.rs @@ -112,33 +112,10 @@ where #[cfg(feature = "rt")] { - use crate::loom::sync::Arc; use crate::runtime::context; - use crate::runtime::scheduler::Context; - use crate::runtime::scheduler::Handle::CurrentThread; - #[cfg(feature = "rt-multi-thread")] - use crate::runtime::scheduler::Handle::MultiThread; - - let is_same_rt = context::with_current(|cur_hdl| match (cur_hdl, hdl) { - (CurrentThread(cur_hdl), CurrentThread(hdl)) => Arc::ptr_eq(cur_hdl, hdl), - #[cfg(feature = "rt-multi-thread")] - (MultiThread(cur_hdl), MultiThread(hdl)) => Arc::ptr_eq(cur_hdl, hdl), - #[cfg(feature = "rt-multi-thread")] - // this above cfg is needed to avoid the compiler warning reported by: - // cargo check -Zbuild-std --target target-specs/i686-unknown-linux-gnu.json \ - // --manifest-path tokio/Cargo.toml --no-default-features \ - // --features test-util` - // error: unreachable pattern - // --> tokio/src/runtime/time/timer.rs:118:13 - // | - // 115 | (CurrentThread(cur_hdl), CurrentThread(hdl)) => Arc::ptr_eq(cur_hdl, hdl), - // | -------------------------------------------- matches all the relevant values - // ... - // 118 | _ => false, - // | ^ no value can reach this - _ => false, - }) - .unwrap_or_default(); + + let is_same_rt = + context::with_current(|cur_hdl| cur_hdl.is_same_runtime(hdl)).unwrap_or_default(); if !is_same_rt { // We don't want to create the timer in one runtime, @@ -146,9 +123,7 @@ where f(None) } else { context::with_scheduler(|maybe_cx| match maybe_cx { - Some(Context::CurrentThread(cx)) => cx.with_wheel(f), - #[cfg(feature = "rt-multi-thread")] - Some(Context::MultiThread(cx)) => cx.with_wheel(f), + Some(cx) => cx.with_wheel(f), None => f(None), }) } @@ -164,21 +139,8 @@ fn push_from_remote(sched_hdl: &SchedulerHandle, entry_hdl: EntryHandle) { #[cfg(feature = "rt")] { - use crate::runtime::scheduler::Handle::CurrentThread; - #[cfg(feature = "rt-multi-thread")] - use crate::runtime::scheduler::Handle::MultiThread; - - match sched_hdl { - CurrentThread(hdl) => { - assert!(!hdl.is_shutdown(), "{RUNTIME_SHUTTING_DOWN_ERROR}"); - hdl.push_remote_timer(entry_hdl) - } - #[cfg(feature = "rt-multi-thread")] - MultiThread(hdl) => { - assert!(!hdl.is_shutdown(), "{RUNTIME_SHUTTING_DOWN_ERROR}"); - hdl.push_remote_timer(entry_hdl) - } - } + assert!(!sched_hdl.is_shutdown(), "{RUNTIME_SHUTTING_DOWN_ERROR}"); + sched_hdl.push_remote_timer(entry_hdl); } } From 9b5a90cd502d7cd3d5d710249a461f55a137c3af Mon Sep 17 00:00:00 2001 From: Qi Date: Mon, 6 Oct 2025 19:30:30 +0800 Subject: [PATCH 040/100] fixup! eliminate atomic state by `Mutex` of `Entry` --- tokio/src/runtime/time/wheel/entry.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/tokio/src/runtime/time/wheel/entry.rs b/tokio/src/runtime/time/wheel/entry.rs index aa48b2e86d3..970d88ea4f6 100644 --- a/tokio/src/runtime/time/wheel/entry.rs +++ b/tokio/src/runtime/time/wheel/entry.rs @@ -179,6 +179,7 @@ impl Handle { } pub(crate) fn register_waker(&self, waker: &Waker) { + let _lock = self.entry.state.lock(); self.entry.waker.register_by_ref(waker); } From 4862ef7028f7bfd6085beb9b386001286707f8e8 Mon Sep 17 00:00:00 2001 From: Qi Date: Mon, 6 Oct 2025 19:30:34 +0800 Subject: [PATCH 041/100] push timer tasks into local queue instead of global queue Signed-off-by: ADD-SP --- .../runtime/scheduler/current_thread/mod.rs | 70 +++++++++---------- .../runtime/scheduler/multi_thread/worker.rs | 70 +++++++++---------- tokio/src/runtime/time/mod.rs | 17 +++++ 3 files changed, 86 insertions(+), 71 deletions(-) diff --git a/tokio/src/runtime/scheduler/current_thread/mod.rs b/tokio/src/runtime/scheduler/current_thread/mod.rs index 0bb8e513a2d..259f571eff9 100644 --- a/tokio/src/runtime/scheduler/current_thread/mod.rs +++ b/tokio/src/runtime/scheduler/current_thread/mod.rs @@ -24,7 +24,7 @@ use std::{fmt, thread}; cfg_time! { use crate::runtime::scheduler::util; - use crate::runtime::time::{EntryHandle, Wheel, cancellation_queue}; + use crate::runtime::time::EntryHandle; use crate::loom::sync::Mutex; } @@ -69,16 +69,7 @@ struct Core { tick: u32, #[cfg(feature = "time")] - /// Worker local timer wheel - wheel: Wheel, - - #[cfg(feature = "time")] - /// Channel for sending timers that need to be cancelled - timer_cancel_tx: cancellation_queue::Sender, - - #[cfg(feature = "time")] - /// Channel for receiving timers that need to be cancelled - timer_cancel_rx: cancellation_queue::Receiver, + time_context: Option, /// Runtime driver /// @@ -194,17 +185,11 @@ impl CurrentThread { local_tid, }); - #[cfg(feature = "time")] - let (timer_cancel_tx, timer_cancel_rx) = cancellation_queue::new(); let core = AtomicCell::new(Some(Box::new(Core { tasks: VecDeque::with_capacity(INITIAL_CAPACITY), tick: 0, #[cfg(feature = "time")] - wheel: Wheel::new(), - #[cfg(feature = "time")] - timer_cancel_tx, - #[cfg(feature = "time")] - timer_cancel_rx, + time_context: Some(crate::runtime::time::Context2::new()), driver: Some(driver), metrics: MetricsBatch::new(&handle.shared.worker_metrics), global_queue_interval, @@ -316,14 +301,17 @@ fn shutdown2(mut core: Box, handle: &Handle) -> Box { handle.shared.owned.close_and_shutdown_all(0); #[cfg(feature = "time")] - util::time::shutdown_local_timers( - &mut core.wheel, - &core.timer_cancel_tx, - &mut core.timer_cancel_rx, - handle.take_remote_timers(), - &handle.driver, - ); - + { + let mut time_context = core.time_context.take().unwrap(); + util::time::shutdown_local_timers( + &mut time_context.wheel, + &time_context.canc_tx, + &mut time_context.canc_rx, + handle.take_remote_timers(), + &handle.driver, + ); + assert!(core.time_context.replace(time_context).is_none()); + } // Drain the local queue // We already shut down every task, so we just need to drop the task. while let Some(task) = core.next_local_task(handle) { @@ -478,13 +466,17 @@ impl Context { // otherwise the compiler will complain that the `core` parameter does not need to be mutable // if the 'time' feature is not enabled. let mut core = core; - util::time::remove_cancelled_timers(&mut core.wheel, &mut core.timer_cancel_rx); + + let mut time_context = core.time_context.take().unwrap(); + util::time::remove_cancelled_timers(&mut time_context.wheel, &mut time_context.canc_rx); let should_yield = util::time::insert_inject_timers( - &mut core.wheel, - &core.timer_cancel_tx, + &mut time_context.wheel, + &time_context.canc_tx, handle.take_remote_timers(), ); - let next_timer = util::time::next_expiration_time(&core.wheel, &handle.driver); + let next_timer = util::time::next_expiration_time(&time_context.wheel, &handle.driver); + core.time_context = Some(time_context); + if should_yield { (core, Some(Duration::from_millis(0)), None) } else { @@ -524,8 +516,12 @@ impl Context { // | help: remove this `mut` // | let mut core = core; + + let mut time_context = core.time_context.take().unwrap(); util::time::post_auto_advance(&handle.driver, maybe_advance_duration); - util::time::process_expired_timers(&mut core.wheel, &handle.driver); + util::time::process_expired_timers(&mut time_context.wheel, &handle.driver); + core.time_context = Some(time_context); + core }; @@ -565,10 +561,13 @@ impl Context { { self.with_core(|maybe_core| { match maybe_core { - Some(core) => f(Some(crate::runtime::time::Context::Running { - wheel: &mut core.wheel, - canc_tx: &core.timer_cancel_tx, - })), + Some(core) => { + let time_context = core.time_context.as_mut().expect("time context missing"); + f(Some(crate::runtime::time::Context::Running { + wheel: &mut time_context.wheel, + canc_tx: &mut time_context.canc_tx, + })) + } None => f(None), } }) @@ -805,7 +804,6 @@ impl Schedule for Arc { core.push_task(self, task); } else { // runtime is shutting down - // OR waking up expired timers // Track that a task was scheduled from **outside** of the runtime. self.shared.scheduler_metrics.inc_remote_schedule_count(); diff --git a/tokio/src/runtime/scheduler/multi_thread/worker.rs b/tokio/src/runtime/scheduler/multi_thread/worker.rs index b144621b129..89994eae43a 100644 --- a/tokio/src/runtime/scheduler/multi_thread/worker.rs +++ b/tokio/src/runtime/scheduler/multi_thread/worker.rs @@ -77,7 +77,7 @@ use std::time::Duration; cfg_time! { use crate::runtime::scheduler::util; - use crate::runtime::time::{EntryHandle, Wheel, cancellation_queue}; + use crate::runtime::time::EntryHandle; } mod metrics; @@ -122,16 +122,7 @@ struct Core { run_queue: queue::Local>, #[cfg(feature = "time")] - /// Worker local timer wheel - wheel: Wheel, - - #[cfg(feature = "time")] - /// Channel for sending timers that need to be cancelled - timer_cancel_tx: cancellation_queue::Sender, - - #[cfg(feature = "time")] - /// Channel for receiving timers that need to be cancelled - timer_cancel_rx: cancellation_queue::Receiver, + time_context: Option, /// True if the worker is currently searching for more work. Searching /// involves attempting to steal from other workers. @@ -278,8 +269,6 @@ pub(super) fn create( let unpark = park.unpark(); let metrics = WorkerMetrics::from_config(&config); let stats = Stats::new(&metrics); - #[cfg(feature = "time")] - let (timer_cancel_tx, timer_cancel_rx) = cancellation_queue::new(); cores.push(Box::new(Core { tick: 0, @@ -287,11 +276,7 @@ pub(super) fn create( lifo_enabled: !config.disable_lifo_slot, run_queue, #[cfg(feature = "time")] - wheel: Wheel::new(), - #[cfg(feature = "time")] - timer_cancel_tx, - #[cfg(feature = "time")] - timer_cancel_rx, + time_context: Some(crate::runtime::time::Context2::new()), is_searching: false, is_shutdown: false, is_traced: false, @@ -436,6 +421,10 @@ where Some(core) => core, None => return Ok(()), }; + assert!( + core.time_context.is_some(), + "should always be `Some` unless processing local timers" + ); // If we heavily call `spawn_blocking`, there might be no available thread to // run this core. Except for the task in the lifo_slot, all tasks can be @@ -596,14 +585,17 @@ impl Context { } #[cfg(feature = "time")] - util::time::shutdown_local_timers( - &mut core.wheel, - &core.timer_cancel_tx, - &mut core.timer_cancel_rx, - self.worker.handle.take_remote_timers(), - &self.worker.handle.driver, - ); - + { + let mut time_context = core.time_context.take().expect("time context missing"); + util::time::shutdown_local_timers( + &mut time_context.wheel, + &time_context.canc_tx, + &mut time_context.canc_rx, + self.worker.handle.take_remote_timers(), + &self.worker.handle.driver, + ); + core.time_context = Some(time_context); + } core.pre_shutdown(&self.worker); // Signal shutdown self.worker.handle.shutdown_core(core); @@ -808,13 +800,16 @@ impl Context { let (duration, maybe_advance_duration) = { let handle = &self.worker.handle; - util::time::remove_cancelled_timers(&mut core.wheel, &mut core.timer_cancel_rx); + let mut time_context = core.time_context.take().expect("time context missing"); + util::time::remove_cancelled_timers(&mut time_context.wheel, &mut time_context.canc_rx); let should_yield = util::time::insert_inject_timers( - &mut core.wheel, - &core.timer_cancel_tx, + &mut time_context.wheel, + &time_context.canc_tx, handle.take_remote_timers(), ); - let next_timer = util::time::next_expiration_time(&core.wheel, &handle.driver); + let next_timer = util::time::next_expiration_time(&time_context.wheel, &handle.driver); + core.time_context = Some(time_context); + if should_yield { (Some(Duration::from_millis(0)), None) } else { @@ -853,8 +848,10 @@ impl Context { #[cfg(feature = "time")] { let handle = &self.worker.handle; + let mut core_time_context = core.time_context.take().expect("time context missing"); util::time::post_auto_advance(&handle.driver, maybe_advance_duration); - util::time::process_expired_timers(&mut core.wheel, &handle.driver); + util::time::process_expired_timers(&mut core_time_context.wheel, &handle.driver); + core.time_context = Some(core_time_context); } // Place `park` back in `core` @@ -895,10 +892,13 @@ impl Context { self.with_core(|maybe_core| { match maybe_core { Some(core) if core.is_shutdown => f(Some(crate::runtime::time::Context::Shutdown)), - Some(core) => f(Some(crate::runtime::time::Context::Running { - wheel: &mut core.wheel, - canc_tx: &core.timer_cancel_tx, - })), + Some(core) => { + let time_context = core.time_context.as_mut().expect("time context missing"); + f(Some(crate::runtime::time::Context::Running { + wheel: &mut time_context.wheel, + canc_tx: &time_context.canc_tx, + })) + } None => f(None), } }) diff --git a/tokio/src/runtime/time/mod.rs b/tokio/src/runtime/time/mod.rs index 4aef399391c..99e196d7df8 100644 --- a/tokio/src/runtime/time/mod.rs +++ b/tokio/src/runtime/time/mod.rs @@ -151,6 +151,23 @@ cfg_rt_or_time! { /// The runtime is shutting down, no timers can be registered. Shutdown, } + + pub(crate) struct Context2 { + pub(crate) wheel: Wheel, + pub(crate) canc_tx: cancellation_queue::Sender, + pub(crate) canc_rx: cancellation_queue::Receiver, + } + + impl Context2 { + pub(crate) fn new() -> Self { + let (canc_tx, canc_rx) = cancellation_queue::new(); + Self { + wheel: Wheel::new(), + canc_tx, + canc_rx, + } + } + } } #[cfg(test)] From b3c14eae7636d18caed60fbf6e127c3755d8cec7 Mon Sep 17 00:00:00 2001 From: Qi Date: Mon, 6 Oct 2025 19:39:03 +0800 Subject: [PATCH 042/100] fixup! simplify `tokio/src/runtime/time/timer.rs` --- tokio/src/runtime/scheduler/mod.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tokio/src/runtime/scheduler/mod.rs b/tokio/src/runtime/scheduler/mod.rs index 3f40a483e3d..ff6c59a7531 100644 --- a/tokio/src/runtime/scheduler/mod.rs +++ b/tokio/src/runtime/scheduler/mod.rs @@ -116,7 +116,8 @@ cfg_rt! { (Handle::CurrentThread(a), Handle::CurrentThread(b)) => Arc::ptr_eq(a, b), #[cfg(feature = "rt-multi-thread")] (Handle::MultiThread(a), Handle::MultiThread(b)) => Arc::ptr_eq(a, b), - _ => false, + #[cfg(feature = "rt-multi-thread")] + _ => false, // different runtime types } } From 2aafc9265fa291d626180c0c7c756972d3fc6005 Mon Sep 17 00:00:00 2001 From: Qi Date: Mon, 6 Oct 2025 19:43:19 +0800 Subject: [PATCH 043/100] fixup! push timer tasks into local queue instead of global queue --- tokio/src/runtime/scheduler/multi_thread/worker.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/tokio/src/runtime/scheduler/multi_thread/worker.rs b/tokio/src/runtime/scheduler/multi_thread/worker.rs index 89994eae43a..afb69cf352d 100644 --- a/tokio/src/runtime/scheduler/multi_thread/worker.rs +++ b/tokio/src/runtime/scheduler/multi_thread/worker.rs @@ -421,6 +421,7 @@ where Some(core) => core, None => return Ok(()), }; + #[cfg(feature = "time")] assert!( core.time_context.is_some(), "should always be `Some` unless processing local timers" From aec80f95b2b09ba7f0e430bf59c0b7dfdbcd8358 Mon Sep 17 00:00:00 2001 From: Qi Date: Mon, 6 Oct 2025 20:07:05 +0800 Subject: [PATCH 044/100] fix `spellcheck.dic` Signed-off-by: ADD-SP --- spellcheck.dic | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spellcheck.dic b/spellcheck.dic index c62209c04ce..8d4eb252ded 100644 --- a/spellcheck.dic +++ b/spellcheck.dic @@ -1,4 +1,4 @@ -309 +310 & + < From 8bb88bc04cf276ccc0f1005481c5fc2c1cb73478 Mon Sep 17 00:00:00 2001 From: Qi Date: Mon, 6 Oct 2025 20:56:32 +0800 Subject: [PATCH 045/100] fixup! push timer tasks into local queue instead of global queue --- .../runtime/scheduler/current_thread/mod.rs | 29 ++++++++++++------- .../runtime/scheduler/multi_thread/worker.rs | 22 +++++++++----- 2 files changed, 34 insertions(+), 17 deletions(-) diff --git a/tokio/src/runtime/scheduler/current_thread/mod.rs b/tokio/src/runtime/scheduler/current_thread/mod.rs index 259f571eff9..9264ed1e26a 100644 --- a/tokio/src/runtime/scheduler/current_thread/mod.rs +++ b/tokio/src/runtime/scheduler/current_thread/mod.rs @@ -468,14 +468,21 @@ impl Context { let mut core = core; let mut time_context = core.time_context.take().unwrap(); - util::time::remove_cancelled_timers(&mut time_context.wheel, &mut time_context.canc_rx); - let should_yield = util::time::insert_inject_timers( - &mut time_context.wheel, - &time_context.canc_tx, - handle.take_remote_timers(), - ); - let next_timer = util::time::next_expiration_time(&time_context.wheel, &handle.driver); - core.time_context = Some(time_context); + let (mut core, (should_yield, next_timer)) = self.enter(core, || { + util::time::remove_cancelled_timers( + &mut time_context.wheel, + &mut time_context.canc_rx, + ); + let should_yield = util::time::insert_inject_timers( + &mut time_context.wheel, + &time_context.canc_tx, + handle.take_remote_timers(), + ); + let next_timer = + util::time::next_expiration_time(&time_context.wheel, &handle.driver); + (should_yield, next_timer) + }); + assert!(core.time_context.replace(time_context).is_none()); if should_yield { (core, Some(Duration::from_millis(0)), None) @@ -518,8 +525,10 @@ impl Context { let mut core = core; let mut time_context = core.time_context.take().unwrap(); - util::time::post_auto_advance(&handle.driver, maybe_advance_duration); - util::time::process_expired_timers(&mut time_context.wheel, &handle.driver); + let (mut core, ()) = self.enter(core, || { + util::time::post_auto_advance(&handle.driver, maybe_advance_duration); + util::time::process_expired_timers(&mut time_context.wheel, &handle.driver); + }); core.time_context = Some(time_context); core diff --git a/tokio/src/runtime/scheduler/multi_thread/worker.rs b/tokio/src/runtime/scheduler/multi_thread/worker.rs index afb69cf352d..81685804bb8 100644 --- a/tokio/src/runtime/scheduler/multi_thread/worker.rs +++ b/tokio/src/runtime/scheduler/multi_thread/worker.rs @@ -798,10 +798,12 @@ impl Context { self.assert_lifo_enabled_is_correct(&core); #[cfg(feature = "time")] - let (duration, maybe_advance_duration) = { + let (mut core, duration, maybe_advance_duration) = { let handle = &self.worker.handle; let mut time_context = core.time_context.take().expect("time context missing"); + // Store `core` in context + *self.core.borrow_mut() = Some(core); util::time::remove_cancelled_timers(&mut time_context.wheel, &mut time_context.canc_rx); let should_yield = util::time::insert_inject_timers( &mut time_context.wheel, @@ -809,10 +811,11 @@ impl Context { handle.take_remote_timers(), ); let next_timer = util::time::next_expiration_time(&time_context.wheel, &handle.driver); + let mut core = self.core.borrow_mut().take().expect("core missing"); core.time_context = Some(time_context); if should_yield { - (Some(Duration::from_millis(0)), None) + (core, Some(Duration::from_millis(0)), None) } else { let dur = match (next_timer, duration) { (Some(next_timer), Some(park_duration)) => Some(next_timer.min(park_duration)), @@ -821,9 +824,9 @@ impl Context { (None, None) => None, }; if util::time::pre_auto_advance(&handle.driver, dur) { - (Some(Duration::ZERO), dur) + (core, Some(Duration::ZERO), dur) } else { - (dur, None) + (core, dur, None) } } }; @@ -843,18 +846,23 @@ impl Context { self.defer.wake(); - // Remove `core` from context - core = self.core.borrow_mut().take().expect("core missing"); - #[cfg(feature = "time")] { let handle = &self.worker.handle; + // Remove `core` from context + core = self.core.borrow_mut().take().expect("core missing"); + let mut core_time_context = core.time_context.take().expect("time context missing"); util::time::post_auto_advance(&handle.driver, maybe_advance_duration); util::time::process_expired_timers(&mut core_time_context.wheel, &handle.driver); core.time_context = Some(core_time_context); + + assert!(self.core.borrow_mut().replace(core).is_none()); } + // Remove `core` from context + core = self.core.borrow_mut().take().expect("core missing"); + // Place `park` back in `core` core.park = Some(park); From 0b086ec71a47c4022051a6f1d1828e050d873267 Mon Sep 17 00:00:00 2001 From: Qi Date: Mon, 6 Oct 2025 21:07:14 +0800 Subject: [PATCH 046/100] fixup! push timer tasks into local queue instead of global queue --- tokio/src/runtime/scheduler/multi_thread/worker.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tokio/src/runtime/scheduler/multi_thread/worker.rs b/tokio/src/runtime/scheduler/multi_thread/worker.rs index 81685804bb8..e442d21bc43 100644 --- a/tokio/src/runtime/scheduler/multi_thread/worker.rs +++ b/tokio/src/runtime/scheduler/multi_thread/worker.rs @@ -851,12 +851,12 @@ impl Context { let handle = &self.worker.handle; // Remove `core` from context core = self.core.borrow_mut().take().expect("core missing"); - - let mut core_time_context = core.time_context.take().expect("time context missing"); + let mut time_context = core.time_context.take().expect("time context missing"); + *self.core.borrow_mut() = Some(core); util::time::post_auto_advance(&handle.driver, maybe_advance_duration); - util::time::process_expired_timers(&mut core_time_context.wheel, &handle.driver); - core.time_context = Some(core_time_context); - + util::time::process_expired_timers(&mut time_context.wheel, &handle.driver); + core = self.core.borrow_mut().take().expect("core missing"); + core.time_context = Some(time_context); assert!(self.core.borrow_mut().replace(core).is_none()); } From 35be1b958c7be7f2c0f2feeccef3dc7b2231d073 Mon Sep 17 00:00:00 2001 From: Qi Date: Tue, 7 Oct 2025 23:16:35 +0800 Subject: [PATCH 047/100] fixup! push timer tasks into local queue instead of global queue --- tokio/src/runtime/scheduler/current_thread/mod.rs | 14 +++++++++----- tokio/src/runtime/scheduler/multi_thread/worker.rs | 12 +++++++----- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/tokio/src/runtime/scheduler/current_thread/mod.rs b/tokio/src/runtime/scheduler/current_thread/mod.rs index 9264ed1e26a..b8b2a7d46bc 100644 --- a/tokio/src/runtime/scheduler/current_thread/mod.rs +++ b/tokio/src/runtime/scheduler/current_thread/mod.rs @@ -571,11 +571,15 @@ impl Context { self.with_core(|maybe_core| { match maybe_core { Some(core) => { - let time_context = core.time_context.as_mut().expect("time context missing"); - f(Some(crate::runtime::time::Context::Running { - wheel: &mut time_context.wheel, - canc_tx: &mut time_context.canc_tx, - })) + match core.time_context { + Some(ref mut time_context) => { + f(Some(crate::runtime::time::Context::Running { + wheel: &mut time_context.wheel, + canc_tx: &time_context.canc_tx, + })) + } + None => f(None), + } } None => f(None), } diff --git a/tokio/src/runtime/scheduler/multi_thread/worker.rs b/tokio/src/runtime/scheduler/multi_thread/worker.rs index e442d21bc43..3ca9b3fa4b7 100644 --- a/tokio/src/runtime/scheduler/multi_thread/worker.rs +++ b/tokio/src/runtime/scheduler/multi_thread/worker.rs @@ -902,11 +902,13 @@ impl Context { match maybe_core { Some(core) if core.is_shutdown => f(Some(crate::runtime::time::Context::Shutdown)), Some(core) => { - let time_context = core.time_context.as_mut().expect("time context missing"); - f(Some(crate::runtime::time::Context::Running { - wheel: &mut time_context.wheel, - canc_tx: &time_context.canc_tx, - })) + match core.time_context { + Some(ref mut time_context) => f(Some(crate::runtime::time::Context::Running { + wheel: &mut time_context.wheel, + canc_tx: &time_context.canc_tx, + })), + None => f(None), + } } None => f(None), } From d00f247daa84d67097c14e9a735341ea28a24e76 Mon Sep 17 00:00:00 2001 From: Qi Date: Wed, 8 Oct 2025 15:21:33 +0800 Subject: [PATCH 048/100] fixup! eliminate atomic state by `Mutex` of `Entry` --- tokio/src/runtime/time/wheel/entry.rs | 88 +++++++++++++++++---------- 1 file changed, 55 insertions(+), 33 deletions(-) diff --git a/tokio/src/runtime/time/wheel/entry.rs b/tokio/src/runtime/time/wheel/entry.rs index 970d88ea4f6..e0f5d53b664 100644 --- a/tokio/src/runtime/time/wheel/entry.rs +++ b/tokio/src/runtime/time/wheel/entry.rs @@ -1,6 +1,6 @@ use super::cancellation_queue::Sender; use crate::loom::sync::{Arc, Mutex}; -use crate::{sync::AtomicWaker, util::linked_list}; +use crate::util::linked_list; use std::marker::PhantomPinned; use std::ptr::NonNull; @@ -11,16 +11,16 @@ pub(crate) type EntryList = linked_list::LinkedList; #[derive(Debug)] enum State { /// A pure new entry, no any changes to the state. - Unregistered, + Unregistered(Waker), /// The entry is registered to the timer wheel, /// but not in the pending queue of the timer wheel. - Registered(Sender), + Registered(Sender, Waker), /// The entry is in the pending queue of the timer wheel, /// and not in any wheel level, which means that /// the entry is reached its deadline and waiting to be woken up. - Pending, + Pending(Waker), /// The waker has been called, and the entry is no longer in the timer wheel /// (both each wheel level and the pending queue), which means that @@ -42,9 +42,6 @@ pub(crate) struct Entry { /// The tick when this entry is scheduled to expire. deadline: u64, - /// The currently registered waker. - waker: AtomicWaker, - state: Mutex, /// Make the type `!Unpin` to prevent LLVM from emitting @@ -127,11 +124,9 @@ impl Handle { wheel_pointers: linked_list::Pointers::new(), cancel_pointers: linked_list::Pointers::new(), deadline, - waker: AtomicWaker::new(), - state: Mutex::new(State::Unregistered), + state: Mutex::new(State::Unregistered(waker.clone())), _pin: PhantomPinned, }); - entry.waker.register_by_ref(waker); Handle { entry } } @@ -141,14 +136,19 @@ impl Handle { let mut lock = self.entry.state.lock(); match &*lock { // don't unlock — poisoning the `Mutex` stops others from using the bad state. - state @ (State::Unregistered | State::Registered(_)) => { + state @ (State::Unregistered(_) | State::Registered(_, _)) => { panic!("corrupted state: {state:#?}") } - State::Pending => { - *lock = State::WokenUp; + State::Pending(_waker) => { + let old_state = std::mem::replace(&mut *lock, State::WokenUp); // Since state has been updated, no need to hold the lock. drop(lock); - self.entry.waker.wake(); + if let State::Pending(old_waker) = old_state { + // Merge the wakers to ensure that the most recent waker is used. + old_waker.wake(); + } else { + unreachable!() + } } // don't unlock — poisoning the `Mutex` stops others from using the bad state. State::WokenUp => panic!("corrupted state: `State::WokenUp`"), @@ -161,37 +161,59 @@ impl Handle { pub(crate) fn wake_unregistered(&self) { let mut lock = self.entry.state.lock(); match &*lock { - State::Unregistered => { - *lock = State::WokenUp; + State::Unregistered(_waker) => { + let old_state = std::mem::replace(&mut *lock, State::WokenUp); // Since state has been updated, no need to hold the lock. drop(lock); - self.entry.waker.wake(); + if let State::Unregistered(old_waker) = old_state { + // Merge the wakers to ensure that the most recent waker is used. + old_waker.wake(); + } else { + unreachable!() + } } // don't unlock — poisoning the `Mutex` stops others from using the bad state. - state @ (State::Registered(_) | State::WokenUp) => { + state @ (State::Registered(_, _) | State::WokenUp) => { panic!("corrupted state: {state:#?}") } // don't unlock — poisoning the `Mutex` stops others from using the bad state. - State::Pending => panic!("corrupted state: State::Pending"), + State::Pending(_) => panic!("corrupted state: State::Pending"), // don't wake up cancelling entries State::Cancelling => (), } } pub(crate) fn register_waker(&self, waker: &Waker) { - let _lock = self.entry.state.lock(); - self.entry.waker.register_by_ref(waker); + let mut lock = self.entry.state.lock(); + match &mut *lock { + State::Unregistered(old_waker) => { + if !old_waker.will_wake(waker) { + *old_waker = waker.clone(); + } + } + State::Registered(_, old_waker) => { + if !old_waker.will_wake(waker) { + *old_waker = waker.clone(); + } + } + State::Pending(old_waker) => { + if !old_waker.will_wake(waker) { + *old_waker = waker.clone(); + } + } + State::WokenUp | State::Cancelling => (), // no need to update the waker + } } pub(crate) fn transition_to_registered(&self, cancel_tx: Sender) -> TransitionToRegistered { let mut lock = self.entry.state.lock(); match &*lock { - State::Unregistered => { - *lock = State::Registered(cancel_tx); + State::Unregistered(waker) => { + *lock = State::Registered(cancel_tx, waker.clone()); TransitionToRegistered::Success } // don't unlock — poisoning the `Mutex` stops others from using the bad state. - state @ (State::Registered(_) | State::Pending | State::WokenUp) => { + state @ (State::Registered(_, _) | State::Pending(_) | State::WokenUp) => { panic!("corrupted state: {state:#?}") } State::Cancelling => TransitionToRegistered::Cancelling, @@ -206,13 +228,13 @@ impl Handle { let mut lock = self.entry.state.lock(); match &*lock { // don't unlock — poisoning the `Mutex` stops others from using the bad state. - State::Unregistered => panic!("corrupted state: State::Unregistered"), - State::Registered(_) => { - *lock = State::Pending; + State::Unregistered(_) => panic!("corrupted state: State::Unregistered"), + State::Registered(_, waker) => { + *lock = State::Pending(waker.clone()); TransitionToPending::Success } // don't unlock — poisoning the `Mutex` stops others from using the bad state. - state @ (State::Pending | State::WokenUp) => panic!("corrupted state: {state:#?}"), + state @ (State::Pending(_) | State::WokenUp) => panic!("corrupted state: {state:#?}"), State::Cancelling => TransitionToPending::Cancelling, } } @@ -221,8 +243,8 @@ impl Handle { let mut lock = self.entry.state.lock(); match *lock { - State::Unregistered => *lock = State::Cancelling, - State::Registered(ref tx) => { + State::Unregistered(_) => *lock = State::Cancelling, + State::Registered(ref tx, _) => { // Safety: entry is not in any cancellation queue unsafe { tx.send(self.clone()); @@ -230,7 +252,7 @@ impl Handle { *lock = State::Cancelling; } // no need to cancel a pending or woken up entry - State::Pending | State::WokenUp => *lock = State::Cancelling, + State::Pending(_) | State::WokenUp => *lock = State::Cancelling, // don't unlock — poisoning the `Mutex` stops others from using the bad state. State::Cancelling => panic!("should not be called twice"), } @@ -241,11 +263,11 @@ impl Handle { } pub(crate) fn is_registered(&self) -> bool { - matches!(*self.entry.state.lock(), State::Registered(_)) + matches!(*self.entry.state.lock(), State::Registered(_, _)) } pub(crate) fn is_pending(&self) -> bool { - matches!(*self.entry.state.lock(), State::Pending) + matches!(*self.entry.state.lock(), State::Pending(_)) } pub(crate) fn is_woken_up(&self) -> bool { From c5eeb3da279c6dda4ee07a3c1ca16b91d10e8561 Mon Sep 17 00:00:00 2001 From: Qi Date: Wed, 8 Oct 2025 18:36:29 +0800 Subject: [PATCH 049/100] fixup! eliminate atomic state by `Mutex` of `Entry` --- tokio/src/macros/cfg.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/tokio/src/macros/cfg.rs b/tokio/src/macros/cfg.rs index 3615cb79948..58c17134d0d 100644 --- a/tokio/src/macros/cfg.rs +++ b/tokio/src/macros/cfg.rs @@ -94,7 +94,6 @@ macro_rules! cfg_atomic_waker_impl { feature = "process", feature = "rt", feature = "signal", - feature = "time", ))] #[cfg(not(loom))] $item From 52150890987c054befb283e7d06f3cd391ccc9cd Mon Sep 17 00:00:00 2001 From: Qi Date: Wed, 8 Oct 2025 20:23:49 +0800 Subject: [PATCH 050/100] cleanup the `Context::park_internal` Signed-off-by: ADD-SP --- .../runtime/scheduler/current_thread/mod.rs | 182 +++++++++++------- .../runtime/scheduler/multi_thread/worker.rs | 166 +++++++++++----- tokio/src/runtime/time/mod.rs | 4 +- 3 files changed, 237 insertions(+), 115 deletions(-) diff --git a/tokio/src/runtime/scheduler/current_thread/mod.rs b/tokio/src/runtime/scheduler/current_thread/mod.rs index b8b2a7d46bc..69fbc6d5a3c 100644 --- a/tokio/src/runtime/scheduler/current_thread/mod.rs +++ b/tokio/src/runtime/scheduler/current_thread/mod.rs @@ -460,46 +460,11 @@ impl Context { ) -> Box { debug_assert!(core.driver.is_none()); - #[cfg(feature = "time")] - let (core, duration, maybe_advance_duration) = { - // declare as mutable to avoid compiler warning, - // otherwise the compiler will complain that the `core` parameter does not need to be mutable - // if the 'time' feature is not enabled. - let mut core = core; - - let mut time_context = core.time_context.take().unwrap(); - let (mut core, (should_yield, next_timer)) = self.enter(core, || { - util::time::remove_cancelled_timers( - &mut time_context.wheel, - &mut time_context.canc_rx, - ); - let should_yield = util::time::insert_inject_timers( - &mut time_context.wheel, - &time_context.canc_tx, - handle.take_remote_timers(), - ); - let next_timer = - util::time::next_expiration_time(&time_context.wheel, &handle.driver); - (should_yield, next_timer) - }); - assert!(core.time_context.replace(time_context).is_none()); - - if should_yield { - (core, Some(Duration::from_millis(0)), None) - } else { - let dur = match (next_timer, duration) { - (Some(next_timer), Some(park_duration)) => Some(next_timer.min(park_duration)), - (Some(next_timer), None) => Some(next_timer), - (None, Some(park_duration)) => Some(park_duration), - (None, None) => None, - }; - if util::time::pre_auto_advance(&handle.driver, dur) { - (core, Some(Duration::ZERO), dur) - } else { - (core, dur, None) - } - } - }; + let MaintainLocalTimer { + core, + park_duration: duration, + auto_advance_duration, + } = self.maintain_local_timers_before_parking(core, handle, duration); let (core, ()) = self.enter(core, || { if let Some(duration) = duration { @@ -511,30 +476,7 @@ impl Context { self.defer.wake(); - #[cfg(feature = "time")] - let core = { - // declare as mutable to avoid compiler warning - // error: variable does not need to be mutable - // --> tokio/src/runtime/scheduler/current_thread/mod.rs:497:14 - // | - // 497 | let (mut core, ()) = self.enter(core, || { - // | ----^^^^ - // | | - // | help: remove this `mut` - // | - let mut core = core; - - let mut time_context = core.time_context.take().unwrap(); - let (mut core, ()) = self.enter(core, || { - util::time::post_auto_advance(&handle.driver, maybe_advance_duration); - util::time::process_expired_timers(&mut time_context.wheel, &handle.driver); - }); - core.time_context = Some(time_context); - - core - }; - - core + self.maintain_local_timers_after_parking(core, handle, auto_advance_duration) } fn enter(&self, core: Box, f: impl FnOnce() -> R) -> (Box, R) { @@ -556,6 +498,89 @@ impl Context { } cfg_time! { + /// Maintain local timers before parking the resource driver. + /// + /// * Remove cancelled timers from the local timer wheel. + /// * Register remote timers to the local timer wheel. + /// * Adjust the park duration based on + /// * the next timer expiration time. + /// * whether auto-advancing is required (feature = "test-util"). + /// + /// # Returns + /// + /// `(Box, park_duration, auto_advance_duration)` + fn maintain_local_timers_before_parking( + &self, + core: Box, + handle: &Handle, + park_duration: Option + ) -> MaintainLocalTimer { + let (core, park_duration, auto_advance_duration) = { + let (core, (should_yield, next_timer)) = + self.enter_with_time_context(core, |time_cx| { + util::time::remove_cancelled_timers(&mut time_cx.wheel, &mut time_cx.canc_rx); + let should_yield = util::time::insert_inject_timers( + &mut time_cx.wheel, + &time_cx.canc_tx, + handle.take_remote_timers(), + ); + let next_timer = + util::time::next_expiration_time(&time_cx.wheel, &handle.driver); + (should_yield, next_timer) + }); + + if should_yield { + (core, Some(Duration::from_millis(0)), None) + } else { + let dur = match (next_timer, park_duration) { + (Some(next_timer), Some(park_duration)) => Some(next_timer.min(park_duration)), + (Some(next_timer), None) => Some(next_timer), + (None, Some(park_duration)) => Some(park_duration), + (None, None) => None, + }; + if util::time::pre_auto_advance(&handle.driver, dur) { + (core, Some(Duration::ZERO), dur) + } else { + (core, dur, None) + } + } + }; + + MaintainLocalTimer { core, park_duration, auto_advance_duration } + } + + /// Maintain local timers after unparking the resource driver. + /// + /// * Auto-advance time, if required (feature = "test-util"). + /// * Process expired timers. + fn maintain_local_timers_after_parking( + &self, + core: Box, + handle: &Handle, + auto_advance_duration: Option + ) -> Box { + let (core, ()) = self.enter_with_time_context(core, |time_cx| { + util::time::post_auto_advance(&handle.driver, auto_advance_duration); + util::time::process_expired_timers(&mut time_cx.wheel, &handle.driver); + }); + core + } + + /// Take out the time context from the core, + /// and then setup the [`Core`] to the thread-local [`Context`], + /// finally, run the provided closure `f` with the time context. + fn enter_with_time_context(&self, mut core: Box, f: F) -> (Box, R) + where + F: FnOnce(&mut crate::runtime::time::Context2) -> R, + { + let mut time_cx = core.time_context.take().expect("time context missing"); + let (mut core, ret) = self.enter(core, || { + f(&mut time_cx) + }); + assert!(core.time_context.replace(time_cx).is_none()); + (core, ret) + } + fn with_core(&self, f: F) -> R where F: FnOnce(Option<&mut Core>) -> R, @@ -585,7 +610,27 @@ impl Context { } }) } - } + } // cfg_time! + + cfg_not_time! { + fn maintain_local_timers_before_parking( + &self, + core: Box, + _handle: &Handle, + park_duration: Option + ) -> MaintainLocalTimer { + MaintainLocalTimer { core, park_duration, auto_advance_duration: None } + } + + fn maintain_local_timers_after_parking( + &self, + core: Box, + _handle: &Handle, + _auto_advance_duration: Option + ) -> Box { + core + } + } // cfg_not_time! } // ===== impl Handle ===== @@ -1022,3 +1067,10 @@ impl Drop for CoreGuard<'_> { } } } + +/// Returned by [`Context::maintain_local_timers_before_parking`]. +struct MaintainLocalTimer { + core: Box, + park_duration: Option, + auto_advance_duration: Option, +} diff --git a/tokio/src/runtime/scheduler/multi_thread/worker.rs b/tokio/src/runtime/scheduler/multi_thread/worker.rs index 3ca9b3fa4b7..2d33c09fec2 100644 --- a/tokio/src/runtime/scheduler/multi_thread/worker.rs +++ b/tokio/src/runtime/scheduler/multi_thread/worker.rs @@ -794,42 +794,14 @@ impl Context { self.park_internal(core, Some(Duration::from_millis(0))) } - fn park_internal(&self, mut core: Box, duration: Option) -> Box { + fn park_internal(&self, core: Box, duration: Option) -> Box { self.assert_lifo_enabled_is_correct(&core); - #[cfg(feature = "time")] - let (mut core, duration, maybe_advance_duration) = { - let handle = &self.worker.handle; - - let mut time_context = core.time_context.take().expect("time context missing"); - // Store `core` in context - *self.core.borrow_mut() = Some(core); - util::time::remove_cancelled_timers(&mut time_context.wheel, &mut time_context.canc_rx); - let should_yield = util::time::insert_inject_timers( - &mut time_context.wheel, - &time_context.canc_tx, - handle.take_remote_timers(), - ); - let next_timer = util::time::next_expiration_time(&time_context.wheel, &handle.driver); - let mut core = self.core.borrow_mut().take().expect("core missing"); - core.time_context = Some(time_context); - - if should_yield { - (core, Some(Duration::from_millis(0)), None) - } else { - let dur = match (next_timer, duration) { - (Some(next_timer), Some(park_duration)) => Some(next_timer.min(park_duration)), - (Some(next_timer), None) => Some(next_timer), - (None, Some(park_duration)) => Some(park_duration), - (None, None) => None, - }; - if util::time::pre_auto_advance(&handle.driver, dur) { - (core, Some(Duration::ZERO), dur) - } else { - (core, dur, None) - } - } - }; + let MaintainLocalTimer { + mut core, + park_duration: duration, + auto_advance_duration, + } = self.maintain_local_timers_before_parking(core, duration); // Take the parker out of core let mut park = core.park.take().expect("park missing"); @@ -846,26 +818,14 @@ impl Context { self.defer.wake(); - #[cfg(feature = "time")] - { - let handle = &self.worker.handle; - // Remove `core` from context - core = self.core.borrow_mut().take().expect("core missing"); - let mut time_context = core.time_context.take().expect("time context missing"); - *self.core.borrow_mut() = Some(core); - util::time::post_auto_advance(&handle.driver, maybe_advance_duration); - util::time::process_expired_timers(&mut time_context.wheel, &handle.driver); - core = self.core.borrow_mut().take().expect("core missing"); - core.time_context = Some(time_context); - assert!(self.core.borrow_mut().replace(core).is_none()); - } - // Remove `core` from context core = self.core.borrow_mut().take().expect("core missing"); // Place `park` back in `core` core.park = Some(park); + core = self.maintain_local_timers_after_parking(core, auto_advance_duration); + if core.should_notify_others() { self.worker.handle.notify_parked_local(); } @@ -884,6 +844,89 @@ impl Context { } cfg_time! { + /// Maintain local timers before parking the resource driver. + /// + /// * Remove cancelled timers from the local timer wheel. + /// * Register remote timers to the local timer wheel. + /// * Adjust the park duration based on + /// * the next timer expiration time. + /// * whether auto-advancing is required (feature = "test-util"). + /// + /// # Returns + /// + /// `(Box, park_duration, auto_advance_duration)` + fn maintain_local_timers_before_parking( + &self,core: Box, + park_duration: Option + ) -> MaintainLocalTimer { + let (core, park_duration, auto_advance_duration) = { + let handle = &self.worker.handle; + + let (core, (should_yield, next_timer)) = + self.enter_with_time_context(core, |time_cx| { + util::time::remove_cancelled_timers(&mut time_cx.wheel, &mut time_cx.canc_rx); + let should_yield = util::time::insert_inject_timers( + &mut time_cx.wheel, + &time_cx.canc_tx, + handle.take_remote_timers(), + ); + let next_timer = + util::time::next_expiration_time(&time_cx.wheel, &handle.driver); + (should_yield, next_timer) + }); + + if should_yield { + (core, Some(Duration::from_millis(0)), None) + } else { + let dur = match (next_timer, park_duration) { + (Some(next_timer), Some(park_duration)) => Some(next_timer.min(park_duration)), + (Some(next_timer), None) => Some(next_timer), + (None, Some(park_duration)) => Some(park_duration), + (None, None) => None, + }; + if util::time::pre_auto_advance(&handle.driver, dur) { + (core, Some(Duration::ZERO), dur) + } else { + (core, dur, None) + } + } + }; + + MaintainLocalTimer { core, park_duration, auto_advance_duration } + } + + /// Maintain local timers after unparking the resource driver. + /// + /// * Auto-advance time, if required (feature = "test-util"). + /// * Process expired timers. + fn maintain_local_timers_after_parking( + &self, + core: Box, + auto_advance_duration: Option + ) -> Box { + let handle = &self.worker.handle; + let (core, ()) = self.enter_with_time_context(core, |time_cx| { + util::time::post_auto_advance(&handle.driver, auto_advance_duration); + util::time::process_expired_timers(&mut time_cx.wheel, &handle.driver); + }); + core + } + + /// Take out the time context from the core, + /// and then setup the [`Core`] to the thread-local [`Context`], + /// finally, run the provided closure `f` with the time context. + fn enter_with_time_context(&self, mut core: Box, f: F) -> (Box, R) + where + F: FnOnce(&mut crate::runtime::time::Context2) -> R, + { + let mut time_cx = core.time_context.take().expect("time context missing"); + assert!(self.core.borrow_mut().replace(core).is_none()); + let ret = f(&mut time_cx); + let mut core = self.core.borrow_mut().take().expect("core missing"); + assert!(core.time_context.replace(time_cx).is_none()); + (core, ret) + } + fn with_core(&self, f: F) -> R where F: FnOnce(Option<&mut Core>) -> R, @@ -914,7 +957,25 @@ impl Context { } }) } - } + } // cfg_time! + + cfg_not_time! { + fn maintain_local_timers_before_parking( + &self, + core: Box, + park_duration: Option + ) -> MaintainLocalTimer { + MaintainLocalTimer { core, park_duration, auto_advance_duration: None } + } + + fn maintain_local_timers_after_parking( + &self, + core: Box, + _auto_advance_duration: Option + ) -> Box { + core + } + } // cfg_not_time! } impl Core { @@ -1391,6 +1452,13 @@ impl<'a> Lock for &'a Handle { } } +/// Returned by [`Context::maintain_local_timers_before_parking`]. +struct MaintainLocalTimer { + core: Box, + park_duration: Option, + auto_advance_duration: Option, +} + #[track_caller] fn with_current(f: impl FnOnce(Option<&Context>) -> R) -> R { use scheduler::Context::MultiThread; diff --git a/tokio/src/runtime/time/mod.rs b/tokio/src/runtime/time/mod.rs index 99e196d7df8..81a7484711a 100644 --- a/tokio/src/runtime/time/mod.rs +++ b/tokio/src/runtime/time/mod.rs @@ -138,7 +138,7 @@ impl Driver { } cfg_rt_or_time! { - /// Local context for the time driver. + /// Local context for the time driver, used when creating timers. pub(crate) enum Context<'a> { /// The runtime is running, we can access it. Running { @@ -152,6 +152,8 @@ cfg_rt_or_time! { Shutdown, } + /// Local context for the time driver, used when the runtime wants to + /// fire/cancel timers. pub(crate) struct Context2 { pub(crate) wheel: Wheel, pub(crate) canc_tx: cancellation_queue::Sender, From 57cd24427a185c3e8d78b8c347d6e57479654ce9 Mon Sep 17 00:00:00 2001 From: Qi Date: Sun, 12 Oct 2025 16:15:05 +0800 Subject: [PATCH 051/100] fix two bugs causes freqently park/unpark This bug causes very frequently park/unpark operations. For example, the next expired local timer is 10ms, but the driver lock is currently holding by another worker thread, we still need to park 10ms. Another bug is keep the `core.park == None` to delay the `notify_parked_local` after parking. Signed-off-by: ADD-SP --- .../runtime/scheduler/multi_thread/park.rs | 29 +++++++++++++++---- .../runtime/scheduler/multi_thread/worker.rs | 12 ++++---- 2 files changed, 30 insertions(+), 11 deletions(-) diff --git a/tokio/src/runtime/scheduler/multi_thread/park.rs b/tokio/src/runtime/scheduler/multi_thread/park.rs index fab35ce8283..74e6b6f3d3c 100644 --- a/tokio/src/runtime/scheduler/multi_thread/park.rs +++ b/tokio/src/runtime/scheduler/multi_thread/park.rs @@ -73,6 +73,8 @@ impl Parker { pub(crate) fn park_timeout(&mut self, handle: &driver::Handle, duration: Duration) { if let Some(mut driver) = self.inner.shared.driver.try_lock() { self.inner.park_driver(&mut driver, handle, Some(duration)); + } else if !duration.is_zero() { + self.inner.park_condvar(Some(duration)); } else { // https://github.com/tokio-rs/tokio/issues/6536 // Hacky, but it's just for loom tests. The counter gets incremented during @@ -123,11 +125,11 @@ impl Inner { if let Some(mut driver) = self.shared.driver.try_lock() { self.park_driver(&mut driver, handle, None); } else { - self.park_condvar(); + self.park_condvar(None); } } - fn park_condvar(&self) { + fn park_condvar(&self, duration: Option) { // Otherwise we need to coordinate going to sleep let mut m = self.mutex.lock(); @@ -152,9 +154,26 @@ impl Inner { } loop { - m = self.condvar.wait(m).unwrap(); - - if self + let is_timeout; + (m, is_timeout) = match duration { + Some(dur) => { + assert_ne!(dur, Duration::ZERO); + let (m, res) = self.condvar.wait_timeout(m, dur).unwrap(); + (m, res.timed_out()) + } + None => (self.condvar.wait(m).unwrap(), false), + }; + + if is_timeout { + match self.state.swap(EMPTY, SeqCst) { + PARKED_CONDVAR => return, // timed out, and no notification received + NOTIFIED => return, // nofication and timeout happened concurrently, + // treat as notification + _ => return, // surious wakeup, since this function is called with a timeout, + // we cannot go back to sleep. + // Otherwise, we may miss the expired timers. + } + } else if self .state .compare_exchange(NOTIFIED, EMPTY, SeqCst, SeqCst) .is_ok() diff --git a/tokio/src/runtime/scheduler/multi_thread/worker.rs b/tokio/src/runtime/scheduler/multi_thread/worker.rs index 2d33c09fec2..a3a8c63e88d 100644 --- a/tokio/src/runtime/scheduler/multi_thread/worker.rs +++ b/tokio/src/runtime/scheduler/multi_thread/worker.rs @@ -794,18 +794,18 @@ impl Context { self.park_internal(core, Some(Duration::from_millis(0))) } - fn park_internal(&self, core: Box, duration: Option) -> Box { + fn park_internal(&self, mut core: Box, duration: Option) -> Box { self.assert_lifo_enabled_is_correct(&core); + // Take the parker out of core + let mut park = core.park.take().expect("park missing"); + let MaintainLocalTimer { mut core, park_duration: duration, auto_advance_duration, } = self.maintain_local_timers_before_parking(core, duration); - // Take the parker out of core - let mut park = core.park.take().expect("park missing"); - // Store `core` in context *self.core.borrow_mut() = Some(core); @@ -821,11 +821,11 @@ impl Context { // Remove `core` from context core = self.core.borrow_mut().take().expect("core missing"); + core = self.maintain_local_timers_after_parking(core, auto_advance_duration); + // Place `park` back in `core` core.park = Some(park); - core = self.maintain_local_timers_after_parking(core, auto_advance_duration); - if core.should_notify_others() { self.worker.handle.notify_parked_local(); } From 88cd24b0ff1f088177baad12e0ccbfa7fcffcf64 Mon Sep 17 00:00:00 2001 From: Qi Date: Sun, 12 Oct 2025 18:32:39 +0800 Subject: [PATCH 052/100] fixup! fix two bugs causes freqently park/unpark --- .../src/runtime/scheduler/multi_thread/park.rs | 17 ++++++++++++++--- .../runtime/scheduler/multi_thread/worker.rs | 8 ++++++++ 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/tokio/src/runtime/scheduler/multi_thread/park.rs b/tokio/src/runtime/scheduler/multi_thread/park.rs index 74e6b6f3d3c..22fdeaca36d 100644 --- a/tokio/src/runtime/scheduler/multi_thread/park.rs +++ b/tokio/src/runtime/scheduler/multi_thread/park.rs @@ -70,6 +70,11 @@ impl Parker { self.inner.park(handle); } + /// Parks the current thread for up to `duration`. + /// + /// This function tries to acquire the driver lock. If it succeeds, it + /// parks using the driver. Otherwise, it fails back to using a condvar, + /// unless the duration is zero, in which case it returns immediately. pub(crate) fn park_timeout(&mut self, handle: &driver::Handle, duration: Duration) { if let Some(mut driver) = self.inner.shared.driver.try_lock() { self.inner.park_driver(&mut driver, handle, Some(duration)); @@ -129,6 +134,13 @@ impl Inner { } } + /// Parks the current thread using a condvar for up to `duration`. + /// + /// If `duration` is `None`, parks indefinitely until notified. + /// + /// # Panics + /// + /// Panics if `duration` is `Some` and the duration is zero. fn park_condvar(&self, duration: Option) { // Otherwise we need to coordinate going to sleep let mut m = self.mutex.lock(); @@ -167,9 +179,8 @@ impl Inner { if is_timeout { match self.state.swap(EMPTY, SeqCst) { PARKED_CONDVAR => return, // timed out, and no notification received - NOTIFIED => return, // nofication and timeout happened concurrently, - // treat as notification - _ => return, // surious wakeup, since this function is called with a timeout, + NOTIFIED => return, // notification and timeout happened concurrently + _ => return, // spurious wakeup, since this function is called with a timeout, // we cannot go back to sleep. // Otherwise, we may miss the expired timers. } diff --git a/tokio/src/runtime/scheduler/multi_thread/worker.rs b/tokio/src/runtime/scheduler/multi_thread/worker.rs index a3a8c63e88d..da6f58077a2 100644 --- a/tokio/src/runtime/scheduler/multi_thread/worker.rs +++ b/tokio/src/runtime/scheduler/multi_thread/worker.rs @@ -800,6 +800,10 @@ impl Context { // Take the parker out of core let mut park = core.park.take().expect("park missing"); + // Must happens after taking out the parker, as the `Handle::schedule_local` + // will delay the notify if the parker taken out. + // + // See comments in `Handle::schedule_local` for more details. let MaintainLocalTimer { mut core, park_duration: duration, @@ -821,6 +825,10 @@ impl Context { // Remove `core` from context core = self.core.borrow_mut().take().expect("core missing"); + // Must happens before placing back the parker, as the `Handle::schedule_local` + // will delay the notify if the parker is still in `core`. + // + // See comments in `Handle::schedule_local` for more details. core = self.maintain_local_timers_after_parking(core, auto_advance_duration); // Place `park` back in `core` From 99a084f484e19677ea7adc0b2ddc7823fb250d53 Mon Sep 17 00:00:00 2001 From: Qi Date: Sun, 12 Oct 2025 19:00:31 +0800 Subject: [PATCH 053/100] fix `spellcheck.dic` Signed-off-by: ADD-SP --- spellcheck.dic | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/spellcheck.dic b/spellcheck.dic index 8d4eb252ded..e377506bac6 100644 --- a/spellcheck.dic +++ b/spellcheck.dic @@ -1,4 +1,4 @@ -310 +311 & + < @@ -64,6 +64,7 @@ codec codecs combinator combinators +condvar config Config connectionless From 980c977bc6d098e9bf09565f501d9c828f226994 Mon Sep 17 00:00:00 2001 From: Qi Date: Sun, 12 Oct 2025 20:21:01 +0800 Subject: [PATCH 054/100] wake up the defer task correctly in the current thread runtime Signed-off-by: ADD-SP --- tokio/src/runtime/scheduler/current_thread/mod.rs | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/tokio/src/runtime/scheduler/current_thread/mod.rs b/tokio/src/runtime/scheduler/current_thread/mod.rs index 15128768981..4fcdd57eb91 100644 --- a/tokio/src/runtime/scheduler/current_thread/mod.rs +++ b/tokio/src/runtime/scheduler/current_thread/mod.rs @@ -472,9 +472,9 @@ impl Context { } else { driver.park(&handle.driver); } - }); - self.defer.wake(); + self.defer.wake(); + }); self.maintain_local_timers_after_parking(core, handle, auto_advance_duration) } @@ -858,17 +858,10 @@ impl Schedule for Arc { Some(CurrentThread(cx)) if Arc::ptr_eq(self, &cx.handle) => { let mut core = cx.core.borrow_mut(); + // If `None`, the runtime is shutting down, so there is no need + // to schedule the task. if let Some(core) = core.as_mut() { core.push_task(self, task); - } else { - // runtime is shutting down - - // Track that a task was scheduled from **outside** of the runtime. - self.shared.scheduler_metrics.inc_remote_schedule_count(); - - // Schedule the task - self.shared.inject.push(task); - self.driver.unpark(); } } _ => { From ea37b238c7cb6f1434e406c0b6a7cb16b6efb01a Mon Sep 17 00:00:00 2001 From: Qi Date: Thu, 16 Oct 2025 21:54:14 +0800 Subject: [PATCH 055/100] cancel the timer locally if possible Signed-off-by: ADD-SP --- .../runtime/scheduler/current_thread/mod.rs | 1 - .../runtime/scheduler/multi_thread/worker.rs | 1 - tokio/src/runtime/scheduler/util.rs | 10 ++++-- tokio/src/runtime/time/timer.rs | 25 +++++++++---- tokio/src/runtime/time/wheel/entry.rs | 35 +++++++++++++------ tokio/src/runtime/time/wheel/mod.rs | 11 ++++-- 6 files changed, 60 insertions(+), 23 deletions(-) diff --git a/tokio/src/runtime/scheduler/current_thread/mod.rs b/tokio/src/runtime/scheduler/current_thread/mod.rs index 4fcdd57eb91..6b8e70b378d 100644 --- a/tokio/src/runtime/scheduler/current_thread/mod.rs +++ b/tokio/src/runtime/scheduler/current_thread/mod.rs @@ -305,7 +305,6 @@ fn shutdown2(mut core: Box, handle: &Handle) -> Box { let mut time_context = core.time_context.take().unwrap(); util::time::shutdown_local_timers( &mut time_context.wheel, - &time_context.canc_tx, &mut time_context.canc_rx, handle.take_remote_timers(), &handle.driver, diff --git a/tokio/src/runtime/scheduler/multi_thread/worker.rs b/tokio/src/runtime/scheduler/multi_thread/worker.rs index da6f58077a2..5dbb73753f8 100644 --- a/tokio/src/runtime/scheduler/multi_thread/worker.rs +++ b/tokio/src/runtime/scheduler/multi_thread/worker.rs @@ -590,7 +590,6 @@ impl Context { let mut time_context = core.time_context.take().expect("time context missing"); util::time::shutdown_local_timers( &mut time_context.wheel, - &time_context.canc_tx, &mut time_context.canc_rx, self.worker.handle.take_remote_timers(), &self.worker.handle.driver, diff --git a/tokio/src/runtime/scheduler/util.rs b/tokio/src/runtime/scheduler/util.rs index 54c9de4a9f6..6b13180a745 100644 --- a/tokio/src/runtime/scheduler/util.rs +++ b/tokio/src/runtime/scheduler/util.rs @@ -11,9 +11,10 @@ cfg_rt_and_time! { ) -> bool { use crate::runtime::time::Insert; let mut fired = false; + let thread_id = crate::runtime::context::thread_id().expect("should not be called during the thread shutdown"); // process injected timers for hdl in inject { - match unsafe { wheel.insert(hdl.clone(), tx.clone()) } { + match unsafe { wheel.insert(hdl.clone(), tx.clone(), thread_id) } { Insert::Success => {} Insert::Elapsed => { hdl.wake_unregistered(); @@ -144,7 +145,6 @@ cfg_rt_and_time! { pub(crate) fn shutdown_local_timers( wheel: &mut Wheel, - tx: &Sender, rx: &mut Receiver, inject: Vec, drv_hdl: &driver::Handle, @@ -156,8 +156,12 @@ cfg_rt_and_time! { }; remove_cancelled_timers(wheel, rx); - insert_inject_timers(wheel, tx, inject); time_hdl.shutdown(wheel); + + // simply wake all unregistered timers + for hdl in inject { + hdl.wake_unregistered(); + } }); } } diff --git a/tokio/src/runtime/time/timer.rs b/tokio/src/runtime/time/timer.rs index eb5ad4dfbd2..f69a6fdaa90 100644 --- a/tokio/src/runtime/time/timer.rs +++ b/tokio/src/runtime/time/timer.rs @@ -1,4 +1,5 @@ use super::wheel::EntryHandle; +use crate::runtime::context; use crate::runtime::scheduler::Handle as SchedulerHandle; use crate::runtime::time::wheel::Insert; use crate::runtime::time::Context as TimeContext; @@ -33,7 +34,18 @@ impl std::fmt::Debug for Timer { impl Drop for Timer { fn drop(&mut self) { if let Some(entry) = self.entry.take() { - entry.transition_to_cancelling(); + with_current_wheel(&self.sched_handle, |maybe_time_cx| { + if let Some(TimeContext::Running { wheel, canc_tx: _ }) = maybe_time_cx { + if let Ok(thread_id) = context::thread_id() { + if entry.can_be_cancelled_locally(thread_id) { + // Safety: we have verified that the entry is registered in this wheel. + unsafe { wheel.remove(entry) }; + } + } + } else { + entry.transition_to_cancelling(); + } + }); } } } @@ -64,11 +76,12 @@ impl Timer { with_current_wheel(&this.sched_handle, |maybe_time_cx| { let deadline = deadline_to_tick(&this.sched_handle, this.deadline); let hdl = EntryHandle::new(deadline, cx.waker()); + let thread_id = context::thread_id().ok(); - match maybe_time_cx { - Some(TimeContext::Running { wheel, canc_tx }) => { + match (maybe_time_cx, thread_id) { + (Some(TimeContext::Running { wheel, canc_tx }), Some(thread_id)) => { // Safety: the entry is not registered yet - match unsafe { wheel.insert(hdl.clone(), canc_tx.clone()) } { + match unsafe { wheel.insert(hdl.clone(), canc_tx.clone(), thread_id) } { Insert::Success => { this.entry = Some(hdl); Poll::Pending @@ -78,8 +91,8 @@ impl Timer { } } #[cfg(feature = "rt-multi-thread")] - Some(TimeContext::Shutdown) => panic!("{RUNTIME_SHUTTING_DOWN_ERROR}"), - None => { + (Some(TimeContext::Shutdown), _) => panic!("{RUNTIME_SHUTTING_DOWN_ERROR}"), + _ => { this.entry = Some(hdl.clone()); push_from_remote(&this.sched_handle, hdl); Poll::Pending diff --git a/tokio/src/runtime/time/wheel/entry.rs b/tokio/src/runtime/time/wheel/entry.rs index e0f5d53b664..15babaee2a5 100644 --- a/tokio/src/runtime/time/wheel/entry.rs +++ b/tokio/src/runtime/time/wheel/entry.rs @@ -1,5 +1,6 @@ use super::cancellation_queue::Sender; use crate::loom::sync::{Arc, Mutex}; +use crate::runtime::ThreadId; use crate::util::linked_list; use std::marker::PhantomPinned; @@ -15,7 +16,7 @@ enum State { /// The entry is registered to the timer wheel, /// but not in the pending queue of the timer wheel. - Registered(Sender, Waker), + Registered(Sender, Waker, ThreadId), /// The entry is in the pending queue of the timer wheel, /// and not in any wheel level, which means that @@ -136,7 +137,7 @@ impl Handle { let mut lock = self.entry.state.lock(); match &*lock { // don't unlock — poisoning the `Mutex` stops others from using the bad state. - state @ (State::Unregistered(_) | State::Registered(_, _)) => { + state @ (State::Unregistered(..) | State::Registered(..)) => { panic!("corrupted state: {state:#?}") } State::Pending(_waker) => { @@ -173,7 +174,7 @@ impl Handle { } } // don't unlock — poisoning the `Mutex` stops others from using the bad state. - state @ (State::Registered(_, _) | State::WokenUp) => { + state @ (State::Registered(..) | State::WokenUp) => { panic!("corrupted state: {state:#?}") } // don't unlock — poisoning the `Mutex` stops others from using the bad state. @@ -191,7 +192,7 @@ impl Handle { *old_waker = waker.clone(); } } - State::Registered(_, old_waker) => { + State::Registered(_, old_waker, _) => { if !old_waker.will_wake(waker) { *old_waker = waker.clone(); } @@ -205,15 +206,19 @@ impl Handle { } } - pub(crate) fn transition_to_registered(&self, cancel_tx: Sender) -> TransitionToRegistered { + pub(crate) fn transition_to_registered( + &self, + cancel_tx: Sender, + thread_id: ThreadId, + ) -> TransitionToRegistered { let mut lock = self.entry.state.lock(); match &*lock { State::Unregistered(waker) => { - *lock = State::Registered(cancel_tx, waker.clone()); + *lock = State::Registered(cancel_tx, waker.clone(), thread_id); TransitionToRegistered::Success } // don't unlock — poisoning the `Mutex` stops others from using the bad state. - state @ (State::Registered(_, _) | State::Pending(_) | State::WokenUp) => { + state @ (State::Registered(..) | State::Pending(..) | State::WokenUp) => { panic!("corrupted state: {state:#?}") } State::Cancelling => TransitionToRegistered::Cancelling, @@ -229,7 +234,7 @@ impl Handle { match &*lock { // don't unlock — poisoning the `Mutex` stops others from using the bad state. State::Unregistered(_) => panic!("corrupted state: State::Unregistered"), - State::Registered(_, waker) => { + State::Registered(_, waker, _) => { *lock = State::Pending(waker.clone()); TransitionToPending::Success } @@ -244,7 +249,7 @@ impl Handle { match *lock { State::Unregistered(_) => *lock = State::Cancelling, - State::Registered(ref tx, _) => { + State::Registered(ref tx, _, _) => { // Safety: entry is not in any cancellation queue unsafe { tx.send(self.clone()); @@ -262,8 +267,18 @@ impl Handle { self.entry.deadline } + /// Equivalent to `is_registered() && thread_id == entry_thread_id`. + pub(crate) fn can_be_cancelled_locally(&self, thread_id: ThreadId) -> bool { + let lock = self.entry.state.lock(); + match &*lock { + State::Unregistered(_) => panic!("corrupted state: State::Unregistered"), + State::Registered(_, _, entry_thread_id) => *entry_thread_id == thread_id, + State::Pending(..) | State::WokenUp | State::Cancelling => false, + } + } + pub(crate) fn is_registered(&self) -> bool { - matches!(*self.entry.state.lock(), State::Registered(_, _)) + matches!(*self.entry.state.lock(), State::Registered(..)) } pub(crate) fn is_pending(&self) -> bool { diff --git a/tokio/src/runtime/time/wheel/mod.rs b/tokio/src/runtime/time/wheel/mod.rs index 3c6a98c61f7..19fda605516 100644 --- a/tokio/src/runtime/time/wheel/mod.rs +++ b/tokio/src/runtime/time/wheel/mod.rs @@ -1,4 +1,6 @@ mod level; +use crate::runtime::ThreadId; + pub(crate) use self::level::Expiration; use self::level::Level; @@ -86,7 +88,12 @@ impl Wheel { /// The caller must ensure: /// /// * The entry is not already registered in ANY wheel. - pub(crate) unsafe fn insert(&mut self, hdl: EntryHandle, cancel_tx: Sender) -> Insert { + pub(crate) unsafe fn insert( + &mut self, + hdl: EntryHandle, + cancel_tx: Sender, + thread_id: ThreadId, + ) -> Insert { let deadline = hdl.deadline(); if deadline <= self.elapsed { @@ -96,7 +103,7 @@ impl Wheel { // Get the level at which the entry should be stored let level = self.level_for(deadline); - match hdl.transition_to_registered(cancel_tx) { + match hdl.transition_to_registered(cancel_tx, thread_id) { TransitionToRegistered::Success => { unsafe { self.levels[level].add_entry(hdl); From df81bbd9367e3bd04ddd297ac72214abd56d111a Mon Sep 17 00:00:00 2001 From: Qi Date: Thu, 16 Oct 2025 22:22:56 +0800 Subject: [PATCH 056/100] fixup! cancel the timer locally if possible --- tokio/src/runtime/context.rs | 37 ++++++++++++++++++++-------------- tokio/src/runtime/mod.rs | 8 +++++--- tokio/src/runtime/thread_id.rs | 1 + 3 files changed, 28 insertions(+), 18 deletions(-) diff --git a/tokio/src/runtime/context.rs b/tokio/src/runtime/context.rs index d78935e7243..fa2aed6e786 100644 --- a/tokio/src/runtime/context.rs +++ b/tokio/src/runtime/context.rs @@ -136,21 +136,6 @@ pub(crate) fn budget(f: impl FnOnce(&Cell) -> R) -> Result Result { - CONTEXT.try_with(|ctx| { - match ctx.thread_id.get() { - Some(id) => id, - None => { - let id = ThreadId::next(); - ctx.thread_id.set(Some(id)); - id - } - } - }) - } - pub(crate) fn set_current_task_id(id: Option) -> Option { CONTEXT.try_with(|ctx| ctx.current_task_id.replace(id)).unwrap_or(None) } @@ -198,3 +183,25 @@ cfg_rt! { } } } + + +cfg_rt_or_time! { + use crate::runtime::ThreadId; + + pub(crate) fn thread_id() -> Result { + #[cfg(not(feature = "rt"))] + panic!("thread_id() called without the 'rt' feature enabled"); + + #[cfg(feature = "rt")] + CONTEXT.try_with(|ctx| { + match ctx.thread_id.get() { + Some(id) => id, + None => { + let id = ThreadId::next(); + ctx.thread_id.set(Some(id)); + id + } + } + }) + } +} diff --git a/tokio/src/runtime/mod.rs b/tokio/src/runtime/mod.rs index ae58ce6da86..f15b8dcdea6 100644 --- a/tokio/src/runtime/mod.rs +++ b/tokio/src/runtime/mod.rs @@ -456,9 +456,6 @@ cfg_rt! { 16384 }; - mod thread_id; - pub(crate) use thread_id::ThreadId; - pub(crate) mod metrics; pub use metrics::RuntimeMetrics; @@ -475,3 +472,8 @@ cfg_rt! { /// After thread starts / before thread stops type Callback = std::sync::Arc; } + +cfg_rt_or_time! { + mod thread_id; + pub(crate) use thread_id::ThreadId; +} diff --git a/tokio/src/runtime/thread_id.rs b/tokio/src/runtime/thread_id.rs index ef392897963..c8c44c0b0e3 100644 --- a/tokio/src/runtime/thread_id.rs +++ b/tokio/src/runtime/thread_id.rs @@ -4,6 +4,7 @@ use std::num::NonZeroU64; pub(crate) struct ThreadId(NonZeroU64); impl ThreadId { + #[cfg_attr(not(feature = "rt"), expect(dead_code))] pub(crate) fn next() -> Self { use crate::loom::sync::atomic::{Ordering::Relaxed, StaticAtomicU64}; From 88d3596de44123c8dd59653454436a6f6a645569 Mon Sep 17 00:00:00 2001 From: Qi Date: Thu, 16 Oct 2025 22:26:51 +0800 Subject: [PATCH 057/100] fixup! cancel the timer locally if possible --- tokio/src/runtime/context.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/tokio/src/runtime/context.rs b/tokio/src/runtime/context.rs index fa2aed6e786..41bcb413f63 100644 --- a/tokio/src/runtime/context.rs +++ b/tokio/src/runtime/context.rs @@ -184,7 +184,6 @@ cfg_rt! { } } - cfg_rt_or_time! { use crate::runtime::ThreadId; From d8b47b96508de0918ade5536374a797ca2aed81f Mon Sep 17 00:00:00 2001 From: Qi Date: Sat, 18 Oct 2025 18:48:34 +0800 Subject: [PATCH 058/100] small refactor of cancellation code path Signed-off-by: ADD-SP --- .../runtime/scheduler/multi_thread/park.rs | 7 +- tokio/src/runtime/scheduler/util.rs | 15 +-- tokio/src/runtime/time/mod.rs | 2 +- tokio/src/runtime/time/timer.rs | 21 +++- tokio/src/runtime/time/wheel/entry.rs | 100 ++++++++++-------- tokio/src/runtime/time/wheel/mod.rs | 1 + 6 files changed, 84 insertions(+), 62 deletions(-) diff --git a/tokio/src/runtime/scheduler/multi_thread/park.rs b/tokio/src/runtime/scheduler/multi_thread/park.rs index 22fdeaca36d..a9e4fc689ea 100644 --- a/tokio/src/runtime/scheduler/multi_thread/park.rs +++ b/tokio/src/runtime/scheduler/multi_thread/park.rs @@ -180,9 +180,10 @@ impl Inner { match self.state.swap(EMPTY, SeqCst) { PARKED_CONDVAR => return, // timed out, and no notification received NOTIFIED => return, // notification and timeout happened concurrently - _ => return, // spurious wakeup, since this function is called with a timeout, - // we cannot go back to sleep. - // Otherwise, we may miss the expired timers. + actual @ (PARKED_DRIVER | EMPTY) => { + panic!("inconsistent park_timeout state, actual = {actual}") + } + invalid => panic!("invalid park_timeout state, actual = {invalid}"), } } else if self .state diff --git a/tokio/src/runtime/scheduler/util.rs b/tokio/src/runtime/scheduler/util.rs index 6b13180a745..a5eddde410f 100644 --- a/tokio/src/runtime/scheduler/util.rs +++ b/tokio/src/runtime/scheduler/util.rs @@ -1,7 +1,8 @@ cfg_rt_and_time! { pub(crate) mod time { use crate::runtime::{scheduler::driver}; - use crate::runtime::time::{EntryHandle, Wheel, cancellation_queue::{Sender, Receiver}}; + use crate::runtime::time::{EntryHandle, EntryState, Wheel}; + use crate::runtime::time::cancellation_queue::{Sender, Receiver}; use std::time::Duration; pub(crate) fn insert_inject_timers( @@ -32,12 +33,14 @@ cfg_rt_and_time! { rx: &mut Receiver, ) { for hdl in rx.recv_all() { - unsafe { - let is_registered = hdl.is_registered(); - let is_pending = hdl.is_pending(); - if is_registered && !is_pending { - wheel.remove(hdl); + match hdl.state() { + // INVARIANT: unregistered entry should not be in the wheel. + EntryState::Unregistered => unreachable!(), + EntryState::Registered(_thread_id) | EntryState::Pending(_thread_id) => { + // Safety: we have verified that the entry is registered in this wheel. + unsafe { wheel.remove(hdl) }; } + EntryState::Cancelling | EntryState::WokenUp => (), } } } diff --git a/tokio/src/runtime/time/mod.rs b/tokio/src/runtime/time/mod.rs index 81a7484711a..38f483eec3b 100644 --- a/tokio/src/runtime/time/mod.rs +++ b/tokio/src/runtime/time/mod.rs @@ -17,7 +17,7 @@ pub(crate) use source::TimeSource; mod wheel; cfg_rt_and_time! { - pub(crate) use wheel::{Insert, EntryHandle}; + pub(crate) use wheel::{Insert, EntryHandle, EntryState}; } cfg_rt_or_time! { pub(crate) use wheel::cancellation_queue; diff --git a/tokio/src/runtime/time/timer.rs b/tokio/src/runtime/time/timer.rs index f69a6fdaa90..189c438e5cc 100644 --- a/tokio/src/runtime/time/timer.rs +++ b/tokio/src/runtime/time/timer.rs @@ -1,4 +1,4 @@ -use super::wheel::EntryHandle; +use super::wheel::{EntryHandle, EntryState}; use crate::runtime::context; use crate::runtime::scheduler::Handle as SchedulerHandle; use crate::runtime::time::wheel::Insert; @@ -36,10 +36,21 @@ impl Drop for Timer { if let Some(entry) = self.entry.take() { with_current_wheel(&self.sched_handle, |maybe_time_cx| { if let Some(TimeContext::Running { wheel, canc_tx: _ }) = maybe_time_cx { - if let Ok(thread_id) = context::thread_id() { - if entry.can_be_cancelled_locally(thread_id) { - // Safety: we have verified that the entry is registered in this wheel. - unsafe { wheel.remove(entry) }; + if let Ok(curr_id) = context::thread_id() { + match entry.state() { + // INVARIANT: `self.entry` is `Some` only after the timer is registered. + EntryState::Unregistered => unreachable!(), + EntryState::Registered(thread_id) | EntryState::Pending(thread_id) + if thread_id == curr_id => + { + // Safety: we have verified that the entry is registered in this wheel. + unsafe { wheel.remove(entry) }; + } + // thread_id doesn't match or entry is in WokenUp/Cancelling state + EntryState::Registered(..) + | EntryState::Pending(..) + | EntryState::Cancelling // entry is already in cancellation queue, nothing to do + | EntryState::WokenUp => (), // entry is already woken up, nothing to do } } } else { diff --git a/tokio/src/runtime/time/wheel/entry.rs b/tokio/src/runtime/time/wheel/entry.rs index 15babaee2a5..ba90deb255c 100644 --- a/tokio/src/runtime/time/wheel/entry.rs +++ b/tokio/src/runtime/time/wheel/entry.rs @@ -10,7 +10,7 @@ use std::task::Waker; pub(crate) type EntryList = linked_list::LinkedList; #[derive(Debug)] -enum State { +enum PrivState { /// A pure new entry, no any changes to the state. Unregistered(Waker), @@ -21,7 +21,7 @@ enum State { /// The entry is in the pending queue of the timer wheel, /// and not in any wheel level, which means that /// the entry is reached its deadline and waiting to be woken up. - Pending(Waker), + Pending(Sender, Waker, ThreadId), /// The waker has been called, and the entry is no longer in the timer wheel /// (both each wheel level and the pending queue), which means that @@ -43,7 +43,7 @@ pub(crate) struct Entry { /// The tick when this entry is scheduled to expire. deadline: u64, - state: Mutex, + state: Mutex, /// Make the type `!Unpin` to prevent LLVM from emitting /// the `noalias` attribute for mutable references. @@ -125,7 +125,7 @@ impl Handle { wheel_pointers: linked_list::Pointers::new(), cancel_pointers: linked_list::Pointers::new(), deadline, - state: Mutex::new(State::Unregistered(waker.clone())), + state: Mutex::new(PrivState::Unregistered(waker.clone())), _pin: PhantomPinned, }); @@ -137,24 +137,24 @@ impl Handle { let mut lock = self.entry.state.lock(); match &*lock { // don't unlock — poisoning the `Mutex` stops others from using the bad state. - state @ (State::Unregistered(..) | State::Registered(..)) => { + state @ (PrivState::Unregistered(..) | PrivState::Registered(..)) => { panic!("corrupted state: {state:#?}") } - State::Pending(_waker) => { - let old_state = std::mem::replace(&mut *lock, State::WokenUp); + PrivState::Pending(..) => { + let old_state = std::mem::replace(&mut *lock, PrivState::WokenUp); // Since state has been updated, no need to hold the lock. drop(lock); - if let State::Pending(old_waker) = old_state { + if let PrivState::Pending(_, waker, ..) = old_state { // Merge the wakers to ensure that the most recent waker is used. - old_waker.wake(); + waker.wake(); } else { unreachable!() } } // don't unlock — poisoning the `Mutex` stops others from using the bad state. - State::WokenUp => panic!("corrupted state: `State::WokenUp`"), + PrivState::WokenUp => panic!("corrupted state: `State::WokenUp`"), // no need to wake up cancelling entry - State::Cancelling => (), + PrivState::Cancelling => (), } } @@ -162,11 +162,11 @@ impl Handle { pub(crate) fn wake_unregistered(&self) { let mut lock = self.entry.state.lock(); match &*lock { - State::Unregistered(_waker) => { - let old_state = std::mem::replace(&mut *lock, State::WokenUp); + PrivState::Unregistered(_waker) => { + let old_state = std::mem::replace(&mut *lock, PrivState::WokenUp); // Since state has been updated, no need to hold the lock. drop(lock); - if let State::Unregistered(old_waker) = old_state { + if let PrivState::Unregistered(old_waker) = old_state { // Merge the wakers to ensure that the most recent waker is used. old_waker.wake(); } else { @@ -174,35 +174,35 @@ impl Handle { } } // don't unlock — poisoning the `Mutex` stops others from using the bad state. - state @ (State::Registered(..) | State::WokenUp) => { + state @ (PrivState::Registered(..) | PrivState::WokenUp) => { panic!("corrupted state: {state:#?}") } // don't unlock — poisoning the `Mutex` stops others from using the bad state. - State::Pending(_) => panic!("corrupted state: State::Pending"), + PrivState::Pending(..) => panic!("corrupted state: State::Pending"), // don't wake up cancelling entries - State::Cancelling => (), + PrivState::Cancelling => (), } } pub(crate) fn register_waker(&self, waker: &Waker) { let mut lock = self.entry.state.lock(); match &mut *lock { - State::Unregistered(old_waker) => { + PrivState::Unregistered(old_waker) => { if !old_waker.will_wake(waker) { *old_waker = waker.clone(); } } - State::Registered(_, old_waker, _) => { + PrivState::Registered(_, old_waker, _) => { if !old_waker.will_wake(waker) { *old_waker = waker.clone(); } } - State::Pending(old_waker) => { + PrivState::Pending(_, old_waker, ..) => { if !old_waker.will_wake(waker) { *old_waker = waker.clone(); } } - State::WokenUp | State::Cancelling => (), // no need to update the waker + PrivState::WokenUp | PrivState::Cancelling => (), // no need to update the waker } } @@ -213,15 +213,15 @@ impl Handle { ) -> TransitionToRegistered { let mut lock = self.entry.state.lock(); match &*lock { - State::Unregistered(waker) => { - *lock = State::Registered(cancel_tx, waker.clone(), thread_id); + PrivState::Unregistered(waker) => { + *lock = PrivState::Registered(cancel_tx, waker.clone(), thread_id); TransitionToRegistered::Success } // don't unlock — poisoning the `Mutex` stops others from using the bad state. - state @ (State::Registered(..) | State::Pending(..) | State::WokenUp) => { + state @ (PrivState::Registered(..) | PrivState::Pending(..) | PrivState::WokenUp) => { panic!("corrupted state: {state:#?}") } - State::Cancelling => TransitionToRegistered::Cancelling, + PrivState::Cancelling => TransitionToRegistered::Cancelling, } } @@ -233,14 +233,16 @@ impl Handle { let mut lock = self.entry.state.lock(); match &*lock { // don't unlock — poisoning the `Mutex` stops others from using the bad state. - State::Unregistered(_) => panic!("corrupted state: State::Unregistered"), - State::Registered(_, waker, _) => { - *lock = State::Pending(waker.clone()); + PrivState::Unregistered(_) => panic!("corrupted state: State::Unregistered"), + PrivState::Registered(sender, waker, thread_id) => { + *lock = PrivState::Pending(sender.clone(), waker.clone(), *thread_id); TransitionToPending::Success } // don't unlock — poisoning the `Mutex` stops others from using the bad state. - state @ (State::Pending(_) | State::WokenUp) => panic!("corrupted state: {state:#?}"), - State::Cancelling => TransitionToPending::Cancelling, + state @ (PrivState::Pending(..) | PrivState::WokenUp) => { + panic!("corrupted state: {state:#?}") + } + PrivState::Cancelling => TransitionToPending::Cancelling, } } @@ -248,18 +250,17 @@ impl Handle { let mut lock = self.entry.state.lock(); match *lock { - State::Unregistered(_) => *lock = State::Cancelling, - State::Registered(ref tx, _, _) => { + PrivState::Unregistered(_) => *lock = PrivState::Cancelling, + PrivState::Registered(ref tx, _, _) | PrivState::Pending(ref tx, _, _) => { // Safety: entry is not in any cancellation queue unsafe { tx.send(self.clone()); } - *lock = State::Cancelling; + *lock = PrivState::Cancelling; } - // no need to cancel a pending or woken up entry - State::Pending(_) | State::WokenUp => *lock = State::Cancelling, + PrivState::WokenUp => *lock = PrivState::Cancelling, // don't unlock — poisoning the `Mutex` stops others from using the bad state. - State::Cancelling => panic!("should not be called twice"), + PrivState::Cancelling => panic!("should not be called twice"), } } @@ -267,26 +268,23 @@ impl Handle { self.entry.deadline } - /// Equivalent to `is_registered() && thread_id == entry_thread_id`. - pub(crate) fn can_be_cancelled_locally(&self, thread_id: ThreadId) -> bool { + pub(crate) fn state(&self) -> State { let lock = self.entry.state.lock(); match &*lock { - State::Unregistered(_) => panic!("corrupted state: State::Unregistered"), - State::Registered(_, _, entry_thread_id) => *entry_thread_id == thread_id, - State::Pending(..) | State::WokenUp | State::Cancelling => false, + PrivState::Unregistered(_) => State::Unregistered, + PrivState::Registered(_, _, thread_id) => State::Registered(*thread_id), + PrivState::Pending(_, _, thread_id) => State::Pending(*thread_id), + PrivState::WokenUp => State::WokenUp, + PrivState::Cancelling => State::Cancelling, } } - pub(crate) fn is_registered(&self) -> bool { - matches!(*self.entry.state.lock(), State::Registered(..)) - } - pub(crate) fn is_pending(&self) -> bool { - matches!(*self.entry.state.lock(), State::Pending(_)) + matches!(*self.entry.state.lock(), PrivState::Pending(..)) } pub(crate) fn is_woken_up(&self) -> bool { - matches!(*self.entry.state.lock(), State::WokenUp) + matches!(*self.entry.state.lock(), PrivState::WokenUp) } #[cfg(test)] @@ -321,3 +319,11 @@ pub(crate) enum TransitionToPending { /// no need to transition it to the pending state. Cancelling, } + +pub(crate) enum State { + Unregistered, + Registered(ThreadId), + Pending(ThreadId), + WokenUp, + Cancelling, +} diff --git a/tokio/src/runtime/time/wheel/mod.rs b/tokio/src/runtime/time/wheel/mod.rs index 19fda605516..51b448b835c 100644 --- a/tokio/src/runtime/time/wheel/mod.rs +++ b/tokio/src/runtime/time/wheel/mod.rs @@ -6,6 +6,7 @@ use self::level::Level; mod entry; pub(crate) use entry::Handle as EntryHandle; +pub(crate) use entry::State as EntryState; use entry::TransitionToPending; use entry::TransitionToRegistered; use entry::{CancellationQueueEntry, Entry, EntryList}; From 63a93c24a60fae380606bcae1cb1848dab3b59d1 Mon Sep 17 00:00:00 2001 From: Qi Date: Sat, 18 Oct 2025 20:46:28 +0800 Subject: [PATCH 059/100] bump the wasm memory limit for doc-tests Signed-off-by: ADD-SP --- .github/workflows/ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c2f89cb6691..b76e23d4f8a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1058,6 +1058,7 @@ jobs: CARGO_TARGET_WASM32_WASIP1_RUNNER: "wasmtime run --" CARGO_TARGET_WASM32_WASIP1_THREADS_RUNNER: "wasmtime run -W bulk-memory=y -W threads=y -S threads=y --" RUSTFLAGS: --cfg tokio_unstable -Dwarnings -C target-feature=+atomics,+bulk-memory -C link-args=--max-memory=67108864 + RUSTDOCFLAGS: -C link-args=--max-memory=67108864 - name: WASI test tokio-stream run: cargo test -p tokio-stream --target ${{ matrix.target }} --features time,net,io-util,sync From aa413ddb4939054383244ff698d18cb3401d7ff6 Mon Sep 17 00:00:00 2001 From: Qi Date: Tue, 21 Oct 2025 08:15:01 +0800 Subject: [PATCH 060/100] fix potenial mem leak when dropping timer Signed-off-by: ADD-SP --- tokio/src/runtime/time/timer.rs | 40 +++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/tokio/src/runtime/time/timer.rs b/tokio/src/runtime/time/timer.rs index 189c438e5cc..dea39a4ad5a 100644 --- a/tokio/src/runtime/time/timer.rs +++ b/tokio/src/runtime/time/timer.rs @@ -16,7 +16,8 @@ pub(crate) struct Timer { /// The entry in the timing wheel. /// - /// This is `None` if the timer has been deregistered. + /// - `Some` if the timer is registered / pending / woken up / cancelling. + /// - `None` if the timer is unregistered. entry: Option, /// The deadline for the timer. @@ -35,24 +36,29 @@ impl Drop for Timer { fn drop(&mut self) { if let Some(entry) = self.entry.take() { with_current_wheel(&self.sched_handle, |maybe_time_cx| { + let Ok(cur_thread_id) = context::thread_id() else { + // current thread is shutting down, we cannot determine the thread id, + // so we need to fallback to the cancellation queue. + entry.transition_to_cancelling(); + return; + }; + if let Some(TimeContext::Running { wheel, canc_tx: _ }) = maybe_time_cx { - if let Ok(curr_id) = context::thread_id() { - match entry.state() { - // INVARIANT: `self.entry` is `Some` only after the timer is registered. - EntryState::Unregistered => unreachable!(), - EntryState::Registered(thread_id) | EntryState::Pending(thread_id) - if thread_id == curr_id => - { - // Safety: we have verified that the entry is registered in this wheel. - unsafe { wheel.remove(entry) }; - } - // thread_id doesn't match or entry is in WokenUp/Cancelling state - EntryState::Registered(..) - | EntryState::Pending(..) - | EntryState::Cancelling // entry is already in cancellation queue, nothing to do - | EntryState::WokenUp => (), // entry is already woken up, nothing to do + match entry.state() { + // INVARIANT: `self.entry` is `Some` only after the timer is registered. + EntryState::Unregistered => unreachable!(), + EntryState::Registered(thread_id) | EntryState::Pending(thread_id) + if thread_id == cur_thread_id => + { + // Safety: we have verified that the entry is registered in this wheel. + unsafe { wheel.remove(entry) }; + } + // thread_id doesn't match or entry is in WokenUp/Cancelling state + EntryState::Registered(..) // entry is registered in a different thread's wheel + | EntryState::Pending(..) // entry is registered in a different thread's wheel and is pending + | EntryState::Cancelling // entry is already in cancellation queue, nothing to do + | EntryState::WokenUp => (), // entry is already woken up, nothing to do } - } } else { entry.transition_to_cancelling(); } From 5e5312a2bd7d9f094fd131e4d1764aa7748d175f Mon Sep 17 00:00:00 2001 From: Qi Date: Tue, 21 Oct 2025 08:15:28 +0800 Subject: [PATCH 061/100] runtime: improve style by reducing indent level Signed-off-by: ADD-SP --- .../runtime/scheduler/current_thread/mod.rs | 61 ++++++++--------- .../runtime/scheduler/multi_thread/worker.rs | 65 ++++++++++--------- tokio/src/runtime/scheduler/util.rs | 9 +++ 3 files changed, 75 insertions(+), 60 deletions(-) diff --git a/tokio/src/runtime/scheduler/current_thread/mod.rs b/tokio/src/runtime/scheduler/current_thread/mod.rs index 6b8e70b378d..749eb850bf9 100644 --- a/tokio/src/runtime/scheduler/current_thread/mod.rs +++ b/tokio/src/runtime/scheduler/current_thread/mod.rs @@ -514,38 +514,41 @@ impl Context { handle: &Handle, park_duration: Option ) -> MaintainLocalTimer { - let (core, park_duration, auto_advance_duration) = { - let (core, (should_yield, next_timer)) = - self.enter_with_time_context(core, |time_cx| { - util::time::remove_cancelled_timers(&mut time_cx.wheel, &mut time_cx.canc_rx); - let should_yield = util::time::insert_inject_timers( - &mut time_cx.wheel, - &time_cx.canc_tx, - handle.take_remote_timers(), - ); - let next_timer = - util::time::next_expiration_time(&time_cx.wheel, &handle.driver); - (should_yield, next_timer) - }); - - if should_yield { - (core, Some(Duration::from_millis(0)), None) + let (core, (should_yield, next_timer)) = + self.enter_with_time_context(core, |time_cx| { + util::time::remove_cancelled_timers(&mut time_cx.wheel, &mut time_cx.canc_rx); + let should_yield = util::time::insert_inject_timers( + &mut time_cx.wheel, + &time_cx.canc_tx, + handle.take_remote_timers(), + ); + let next_timer = + util::time::next_expiration_time(&time_cx.wheel, &handle.driver); + (should_yield, next_timer) + }); + + if should_yield { + MaintainLocalTimer { + core, + park_duration: Some(Duration::ZERO), + auto_advance_duration: None, + } + } else { + let dur = util::time::min_duration(park_duration, next_timer); + if util::time::pre_auto_advance(&handle.driver, dur) { + MaintainLocalTimer { + core, + park_duration: Some(Duration::ZERO), + auto_advance_duration: dur, + } } else { - let dur = match (next_timer, park_duration) { - (Some(next_timer), Some(park_duration)) => Some(next_timer.min(park_duration)), - (Some(next_timer), None) => Some(next_timer), - (None, Some(park_duration)) => Some(park_duration), - (None, None) => None, - }; - if util::time::pre_auto_advance(&handle.driver, dur) { - (core, Some(Duration::ZERO), dur) - } else { - (core, dur, None) + MaintainLocalTimer { + core, + park_duration: dur, + auto_advance_duration: None, } } - }; - - MaintainLocalTimer { core, park_duration, auto_advance_duration } + } } /// Maintain local timers after unparking the resource driver. diff --git a/tokio/src/runtime/scheduler/multi_thread/worker.rs b/tokio/src/runtime/scheduler/multi_thread/worker.rs index 5dbb73753f8..9ae51e35872 100644 --- a/tokio/src/runtime/scheduler/multi_thread/worker.rs +++ b/tokio/src/runtime/scheduler/multi_thread/worker.rs @@ -866,40 +866,43 @@ impl Context { &self,core: Box, park_duration: Option ) -> MaintainLocalTimer { - let (core, park_duration, auto_advance_duration) = { - let handle = &self.worker.handle; - - let (core, (should_yield, next_timer)) = - self.enter_with_time_context(core, |time_cx| { - util::time::remove_cancelled_timers(&mut time_cx.wheel, &mut time_cx.canc_rx); - let should_yield = util::time::insert_inject_timers( - &mut time_cx.wheel, - &time_cx.canc_tx, - handle.take_remote_timers(), - ); - let next_timer = - util::time::next_expiration_time(&time_cx.wheel, &handle.driver); - (should_yield, next_timer) - }); - - if should_yield { - (core, Some(Duration::from_millis(0)), None) + let handle = &self.worker.handle; + let (core, (should_yield, next_timer)) = + self.enter_with_time_context(core, |time_cx| { + util::time::remove_cancelled_timers(&mut time_cx.wheel, &mut time_cx.canc_rx); + let should_yield = util::time::insert_inject_timers( + &mut time_cx.wheel, + &time_cx.canc_tx, + handle.take_remote_timers(), + ); + let next_timer = + util::time::next_expiration_time(&time_cx.wheel, &handle.driver); + (should_yield, next_timer) + }); + + if should_yield { + MaintainLocalTimer { + core, + park_duration: Some(Duration::from_millis(0)), + auto_advance_duration: None, + } + } else { + // get the minimum duration + let dur = util::time::min_duration(park_duration, next_timer); + if util::time::pre_auto_advance(&handle.driver, dur) { + MaintainLocalTimer { + core, + park_duration: Some(Duration::ZERO), + auto_advance_duration: dur, + } } else { - let dur = match (next_timer, park_duration) { - (Some(next_timer), Some(park_duration)) => Some(next_timer.min(park_duration)), - (Some(next_timer), None) => Some(next_timer), - (None, Some(park_duration)) => Some(park_duration), - (None, None) => None, - }; - if util::time::pre_auto_advance(&handle.driver, dur) { - (core, Some(Duration::ZERO), dur) - } else { - (core, dur, None) + MaintainLocalTimer { + core, + park_duration: dur, + auto_advance_duration: None, } } - }; - - MaintainLocalTimer { core, park_duration, auto_advance_duration } + } } /// Maintain local timers after unparking the resource driver. diff --git a/tokio/src/runtime/scheduler/util.rs b/tokio/src/runtime/scheduler/util.rs index a5eddde410f..949ef5211f5 100644 --- a/tokio/src/runtime/scheduler/util.rs +++ b/tokio/src/runtime/scheduler/util.rs @@ -5,6 +5,15 @@ cfg_rt_and_time! { use crate::runtime::time::cancellation_queue::{Sender, Receiver}; use std::time::Duration; + pub(crate) fn min_duration(a: Option, b: Option) -> Option { + match (a, b) { + (Some(dur_a), Some(dur_b)) => Some(std::cmp::min(dur_a, dur_b)), + (Some(dur_a), None) => Some(dur_a), + (None, Some(dur_b)) => Some(dur_b), + (None, None) => None, + } + } + pub(crate) fn insert_inject_timers( wheel: &mut Wheel, tx: &Sender, From 94d40e39aa25f22db8de4a8ae60286b05b8d875d Mon Sep 17 00:00:00 2001 From: Qi Date: Tue, 21 Oct 2025 22:55:15 +0800 Subject: [PATCH 062/100] fix memory leak when draining the cancel queue Signed-off-by: ADD-SP --- tokio/src/runtime/scheduler/util.rs | 19 +++++--- tokio/src/runtime/time/mod.rs | 2 +- tokio/src/runtime/time/timer.rs | 35 ++++++++------ tokio/src/runtime/time/wheel/entry.rs | 70 +++++++++++++++++++++------ tokio/src/runtime/time/wheel/mod.rs | 18 +++++++ 5 files changed, 109 insertions(+), 35 deletions(-) diff --git a/tokio/src/runtime/scheduler/util.rs b/tokio/src/runtime/scheduler/util.rs index 949ef5211f5..7fbebb11a9d 100644 --- a/tokio/src/runtime/scheduler/util.rs +++ b/tokio/src/runtime/scheduler/util.rs @@ -1,7 +1,7 @@ cfg_rt_and_time! { pub(crate) mod time { use crate::runtime::{scheduler::driver}; - use crate::runtime::time::{EntryHandle, EntryState, Wheel}; + use crate::runtime::time::{EntryHandle, EntryState, EntryCancelling, Wheel}; use crate::runtime::time::cancellation_queue::{Sender, Receiver}; use std::time::Duration; @@ -44,12 +44,19 @@ cfg_rt_and_time! { for hdl in rx.recv_all() { match hdl.state() { // INVARIANT: unregistered entry should not be in the wheel. - EntryState::Unregistered => unreachable!(), - EntryState::Registered(_thread_id) | EntryState::Pending(_thread_id) => { - // Safety: we have verified that the entry is registered in this wheel. - unsafe { wheel.remove(hdl) }; + EntryState::Unregistered | EntryState::Registered(..) | EntryState::Pending(..) => unreachable!(), + EntryState::Cancelling(cancelling) => match cancelling { + EntryCancelling::Unregistered => (), + EntryCancelling::Registered | EntryCancelling::Pending => { + // Safety: + // 1. entry is either in slot or pending list + // 2. `rx` ensures that the entry is registered in this thread. + unsafe { + wheel.remove(hdl); + } + } } - EntryState::Cancelling | EntryState::WokenUp => (), + EntryState::WokenUp => unreachable!(), } } } diff --git a/tokio/src/runtime/time/mod.rs b/tokio/src/runtime/time/mod.rs index 38f483eec3b..2de0132117d 100644 --- a/tokio/src/runtime/time/mod.rs +++ b/tokio/src/runtime/time/mod.rs @@ -17,7 +17,7 @@ pub(crate) use source::TimeSource; mod wheel; cfg_rt_and_time! { - pub(crate) use wheel::{Insert, EntryHandle, EntryState}; + pub(crate) use wheel::{Insert, EntryHandle, EntryState, EntryCancelling}; } cfg_rt_or_time! { pub(crate) use wheel::cancellation_queue; diff --git a/tokio/src/runtime/time/timer.rs b/tokio/src/runtime/time/timer.rs index dea39a4ad5a..3cf523ba050 100644 --- a/tokio/src/runtime/time/timer.rs +++ b/tokio/src/runtime/time/timer.rs @@ -36,6 +36,18 @@ impl Drop for Timer { fn drop(&mut self) { if let Some(entry) = self.entry.take() { with_current_wheel(&self.sched_handle, |maybe_time_cx| { + let state = entry.state(); + + let thread_id = match state { + EntryState::Unregistered => { + entry.transition_to_cancelling(); + return; + } + EntryState::Registered(thread_id) | EntryState::Pending(thread_id) => thread_id, + EntryState::Cancelling(..) => unreachable!(), + EntryState::WokenUp => return, + }; + let Ok(cur_thread_id) = context::thread_id() else { // current thread is shutting down, we cannot determine the thread id, // so we need to fallback to the cancellation queue. @@ -44,21 +56,16 @@ impl Drop for Timer { }; if let Some(TimeContext::Running { wheel, canc_tx: _ }) = maybe_time_cx { - match entry.state() { - // INVARIANT: `self.entry` is `Some` only after the timer is registered. - EntryState::Unregistered => unreachable!(), - EntryState::Registered(thread_id) | EntryState::Pending(thread_id) - if thread_id == cur_thread_id => - { - // Safety: we have verified that the entry is registered in this wheel. - unsafe { wheel.remove(entry) }; - } - // thread_id doesn't match or entry is in WokenUp/Cancelling state - EntryState::Registered(..) // entry is registered in a different thread's wheel - | EntryState::Pending(..) // entry is registered in a different thread's wheel and is pending - | EntryState::Cancelling // entry is already in cancellation queue, nothing to do - | EntryState::WokenUp => (), // entry is already woken up, nothing to do + if thread_id == cur_thread_id { + // Safety: + // 1. entry is either in slots or pending list + // 2. entry is registered in this thread + unsafe { + wheel.remove(entry); } + } else { + entry.transition_to_cancelling(); + } } else { entry.transition_to_cancelling(); } diff --git a/tokio/src/runtime/time/wheel/entry.rs b/tokio/src/runtime/time/wheel/entry.rs index ba90deb255c..af2ee56a499 100644 --- a/tokio/src/runtime/time/wheel/entry.rs +++ b/tokio/src/runtime/time/wheel/entry.rs @@ -29,7 +29,7 @@ enum PrivState { WokenUp, /// The [`Handle`] has been sent to the [`Sender`]. - Cancelling, + Cancelling(Cancelling), } #[derive(Debug)] @@ -128,6 +128,7 @@ impl Handle { state: Mutex::new(PrivState::Unregistered(waker.clone())), _pin: PhantomPinned, }); + eprintln!("new: {:?}", Arc::as_ptr(&entry)); Handle { entry } } @@ -154,7 +155,7 @@ impl Handle { // don't unlock — poisoning the `Mutex` stops others from using the bad state. PrivState::WokenUp => panic!("corrupted state: `State::WokenUp`"), // no need to wake up cancelling entry - PrivState::Cancelling => (), + PrivState::Cancelling { .. } => (), } } @@ -180,7 +181,7 @@ impl Handle { // don't unlock — poisoning the `Mutex` stops others from using the bad state. PrivState::Pending(..) => panic!("corrupted state: State::Pending"), // don't wake up cancelling entries - PrivState::Cancelling => (), + PrivState::Cancelling { .. } => (), } } @@ -202,7 +203,7 @@ impl Handle { *old_waker = waker.clone(); } } - PrivState::WokenUp | PrivState::Cancelling => (), // no need to update the waker + PrivState::WokenUp | PrivState::Cancelling { .. } => (), // no need to update the waker } } @@ -212,6 +213,7 @@ impl Handle { thread_id: ThreadId, ) -> TransitionToRegistered { let mut lock = self.entry.state.lock(); + match &*lock { PrivState::Unregistered(waker) => { *lock = PrivState::Registered(cancel_tx, waker.clone(), thread_id); @@ -221,7 +223,10 @@ impl Handle { state @ (PrivState::Registered(..) | PrivState::Pending(..) | PrivState::WokenUp) => { panic!("corrupted state: {state:#?}") } - PrivState::Cancelling => TransitionToRegistered::Cancelling, + PrivState::Cancelling(cancelling) => match cancelling { + Cancelling::Unregistered => TransitionToRegistered::Cancelling, + Cancelling::Registered | Cancelling::Pending => unreachable!(), + }, } } @@ -242,7 +247,10 @@ impl Handle { state @ (PrivState::Pending(..) | PrivState::WokenUp) => { panic!("corrupted state: {state:#?}") } - PrivState::Cancelling => TransitionToPending::Cancelling, + PrivState::Cancelling { .. } => { + *lock = PrivState::Cancelling(Cancelling::Pending); + TransitionToPending::Cancelling + } } } @@ -250,17 +258,27 @@ impl Handle { let mut lock = self.entry.state.lock(); match *lock { - PrivState::Unregistered(_) => *lock = PrivState::Cancelling, - PrivState::Registered(ref tx, _, _) | PrivState::Pending(ref tx, _, _) => { + // don't unlock — poisoning the `Mutex` stops others from using the bad state. + PrivState::Unregistered(_) => { + *lock = PrivState::Cancelling(Cancelling::Unregistered); + } + PrivState::Registered(ref tx, _, _) => { // Safety: entry is not in any cancellation queue unsafe { tx.send(self.clone()); } - *lock = PrivState::Cancelling; + *lock = PrivState::Cancelling(Cancelling::Registered); } - PrivState::WokenUp => *lock = PrivState::Cancelling, + PrivState::Pending(ref tx, _, _) => { + // Safety: entry is not in any cancellation queue + unsafe { + tx.send(self.clone()); + } + *lock = PrivState::Cancelling(Cancelling::Pending); + } + PrivState::WokenUp => (), // dropping and waking up happen concurrently // don't unlock — poisoning the `Mutex` stops others from using the bad state. - PrivState::Cancelling => panic!("should not be called twice"), + PrivState::Cancelling(..) => panic!("should not be called twice"), } } @@ -275,12 +293,20 @@ impl Handle { PrivState::Registered(_, _, thread_id) => State::Registered(*thread_id), PrivState::Pending(_, _, thread_id) => State::Pending(*thread_id), PrivState::WokenUp => State::WokenUp, - PrivState::Cancelling => State::Cancelling, + PrivState::Cancelling(cancelling) => State::Cancelling(*cancelling), } } pub(crate) fn is_pending(&self) -> bool { - matches!(*self.entry.state.lock(), PrivState::Pending(..)) + match self.state() { + State::Pending(_) => true, + State::Cancelling(cancelling) => match cancelling { + Cancelling::Unregistered => unreachable!(), + Cancelling::Registered => false, + Cancelling::Pending => true, + }, + _ => false, + } } pub(crate) fn is_woken_up(&self) -> bool { @@ -320,10 +346,26 @@ pub(crate) enum TransitionToPending { Cancelling, } +#[derive(Clone, Copy)] pub(crate) enum State { Unregistered, Registered(ThreadId), Pending(ThreadId), WokenUp, - Cancelling, + + /// The [`Handle`] has been sent to the [`Sender`]. + Cancelling(Cancelling), +} + +#[derive(Debug, Clone, Copy)] +/// Possible variants of the [`State::Cancelling`] +pub(crate) enum Cancelling { + /// [`Entry`] is being cancelled, and is not in the timer wheel. + Unregistered, + /// [`Entry`] is being cancelled, and is registered in the timer wheel, + /// but not in the pending list. + Registered, + /// [`Entry`] is being cancelled, and it registered in the timer wheel, + /// and also in the pending list. + Pending, } diff --git a/tokio/src/runtime/time/wheel/mod.rs b/tokio/src/runtime/time/wheel/mod.rs index 51b448b835c..6ea823bb4cd 100644 --- a/tokio/src/runtime/time/wheel/mod.rs +++ b/tokio/src/runtime/time/wheel/mod.rs @@ -5,6 +5,7 @@ pub(crate) use self::level::Expiration; use self::level::Level; mod entry; +pub(crate) use entry::Cancelling as EntryCancelling; pub(crate) use entry::Handle as EntryHandle; pub(crate) use entry::State as EntryState; use entry::TransitionToPending; @@ -106,6 +107,12 @@ impl Wheel { match hdl.transition_to_registered(cancel_tx, thread_id) { TransitionToRegistered::Success => { + eprintln!( + "insert: {:?} => {:?}, level = {}", + crate::loom::sync::Arc::as_ptr(&hdl.entry), + std::thread::current().id(), + level + ); unsafe { self.levels[level].add_entry(hdl); } @@ -132,6 +139,11 @@ impl Wheel { /// * The entry is already registered in THIS wheel. pub(crate) unsafe fn remove(&mut self, hdl: EntryHandle) { if hdl.is_pending() { + eprintln!( + "rm_pending: {:?} => {:?}", + crate::loom::sync::Arc::as_ptr(&hdl.entry), + std::thread::current().id() + ); self.pending.remove(hdl.into()); } else { let deadline = hdl.deadline(); @@ -143,6 +155,12 @@ impl Wheel { ); let level = self.level_for(deadline); + eprintln!( + "rm_reg: {:?} => {:?}, level = {}", + crate::loom::sync::Arc::as_ptr(&hdl.entry), + std::thread::current().id(), + level + ); self.levels[level].remove_entry(hdl.clone()); } } From 17922c2d13749acd97c356b6d2677add82aba9ff Mon Sep 17 00:00:00 2001 From: Qi Date: Tue, 21 Oct 2025 23:00:00 +0800 Subject: [PATCH 063/100] remove debugging code Signed-off-by: ADD-SP --- tokio/src/runtime/time/wheel/mod.rs | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/tokio/src/runtime/time/wheel/mod.rs b/tokio/src/runtime/time/wheel/mod.rs index 6ea823bb4cd..37366fc206c 100644 --- a/tokio/src/runtime/time/wheel/mod.rs +++ b/tokio/src/runtime/time/wheel/mod.rs @@ -107,12 +107,6 @@ impl Wheel { match hdl.transition_to_registered(cancel_tx, thread_id) { TransitionToRegistered::Success => { - eprintln!( - "insert: {:?} => {:?}, level = {}", - crate::loom::sync::Arc::as_ptr(&hdl.entry), - std::thread::current().id(), - level - ); unsafe { self.levels[level].add_entry(hdl); } @@ -139,11 +133,6 @@ impl Wheel { /// * The entry is already registered in THIS wheel. pub(crate) unsafe fn remove(&mut self, hdl: EntryHandle) { if hdl.is_pending() { - eprintln!( - "rm_pending: {:?} => {:?}", - crate::loom::sync::Arc::as_ptr(&hdl.entry), - std::thread::current().id() - ); self.pending.remove(hdl.into()); } else { let deadline = hdl.deadline(); @@ -155,12 +144,6 @@ impl Wheel { ); let level = self.level_for(deadline); - eprintln!( - "rm_reg: {:?} => {:?}, level = {}", - crate::loom::sync::Arc::as_ptr(&hdl.entry), - std::thread::current().id(), - level - ); self.levels[level].remove_entry(hdl.clone()); } } From c47aa243740daf69675ecc9ee9728d612e331858 Mon Sep 17 00:00:00 2001 From: Qi Date: Tue, 21 Oct 2025 23:16:33 +0800 Subject: [PATCH 064/100] fix unused import error Signed-off-by: ADD-SP --- tokio/src/runtime/time/wheel/mod.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tokio/src/runtime/time/wheel/mod.rs b/tokio/src/runtime/time/wheel/mod.rs index 37366fc206c..b9f8d432d8a 100644 --- a/tokio/src/runtime/time/wheel/mod.rs +++ b/tokio/src/runtime/time/wheel/mod.rs @@ -5,7 +5,9 @@ pub(crate) use self::level::Expiration; use self::level::Level; mod entry; -pub(crate) use entry::Cancelling as EntryCancelling; +cfg_rt_and_time! { + pub(crate) use entry::Cancelling as EntryCancelling; +} pub(crate) use entry::Handle as EntryHandle; pub(crate) use entry::State as EntryState; use entry::TransitionToPending; From 47f40368a20e83feccc7d3114bc349da56ba028e Mon Sep 17 00:00:00 2001 From: Qi Date: Wed, 22 Oct 2025 09:50:59 +0800 Subject: [PATCH 065/100] remove debug print Remove debug print statement for new entry creation. --- tokio/src/runtime/time/wheel/entry.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/tokio/src/runtime/time/wheel/entry.rs b/tokio/src/runtime/time/wheel/entry.rs index af2ee56a499..41f82ea2bd6 100644 --- a/tokio/src/runtime/time/wheel/entry.rs +++ b/tokio/src/runtime/time/wheel/entry.rs @@ -128,7 +128,6 @@ impl Handle { state: Mutex::new(PrivState::Unregistered(waker.clone())), _pin: PhantomPinned, }); - eprintln!("new: {:?}", Arc::as_ptr(&entry)); Handle { entry } } From 5b53771c794422da47869806af05cbf23f6ee651 Mon Sep 17 00:00:00 2001 From: Qi Date: Wed, 22 Oct 2025 12:05:37 +0800 Subject: [PATCH 066/100] fix heap-use-after-free issue --- tokio/src/runtime/time/wheel/entry.rs | 8 ++++---- tokio/src/runtime/time/wheel/level.rs | 3 ++- tokio/src/runtime/time/wheel/mod.rs | 3 ++- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/tokio/src/runtime/time/wheel/entry.rs b/tokio/src/runtime/time/wheel/entry.rs index 41f82ea2bd6..9acc570774a 100644 --- a/tokio/src/runtime/time/wheel/entry.rs +++ b/tokio/src/runtime/time/wheel/entry.rs @@ -112,10 +112,10 @@ pub(crate) struct Handle { pub(crate) entry: Arc, } -impl From for NonNull { - fn from(handle: Handle) -> NonNull { - let ptr = Arc::as_ptr(&handle.entry); - unsafe { NonNull::new_unchecked(ptr.cast_mut()) } +impl From<&Handle> for NonNull { + fn from(hdl: &Handle) -> Self { + // Safety: entry is in an `Arc`, so the pointer is valid. + unsafe { NonNull::new_unchecked(Arc::as_ptr(&hdl.entry) as *mut Entry) } } } diff --git a/tokio/src/runtime/time/wheel/level.rs b/tokio/src/runtime/time/wheel/level.rs index cea17a6fb8b..99309bfe0fb 100644 --- a/tokio/src/runtime/time/wheel/level.rs +++ b/tokio/src/runtime/time/wheel/level.rs @@ -1,4 +1,5 @@ use super::{EntryHandle, EntryList}; +use std::ptr::NonNull; use std::{array, fmt}; /// Wheel for a single level in the timer. This wheel contains 64 slots. @@ -131,7 +132,7 @@ impl Level { pub(crate) unsafe fn remove_entry(&mut self, hdl: EntryHandle) { let slot = slot_for(hdl.deadline(), self.level); - unsafe { self.slot[slot].remove(hdl.into()) }; + unsafe { self.slot[slot].remove(NonNull::from(&hdl)) }; if self.slot[slot].is_empty() { // The bit is currently set debug_assert!(self.occupied & occupied_bit(slot) != 0); diff --git a/tokio/src/runtime/time/wheel/mod.rs b/tokio/src/runtime/time/wheel/mod.rs index b9f8d432d8a..5d8854d4511 100644 --- a/tokio/src/runtime/time/wheel/mod.rs +++ b/tokio/src/runtime/time/wheel/mod.rs @@ -18,6 +18,7 @@ pub(crate) mod cancellation_queue; use cancellation_queue::Sender; use std::array; +use std::ptr::NonNull; /// Timing wheel implementation. /// @@ -135,7 +136,7 @@ impl Wheel { /// * The entry is already registered in THIS wheel. pub(crate) unsafe fn remove(&mut self, hdl: EntryHandle) { if hdl.is_pending() { - self.pending.remove(hdl.into()); + self.pending.remove(NonNull::from(&hdl)); } else { let deadline = hdl.deadline(); debug_assert!( From 47e04e8005383a37afbfc6d04e10ee87de87aa9e Mon Sep 17 00:00:00 2001 From: Qi Date: Fri, 24 Oct 2025 23:23:25 +0800 Subject: [PATCH 067/100] remove legacy comments of `Wheel::insert()` Signed-off-by: ADD-SP --- tokio/src/runtime/time/wheel/mod.rs | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/tokio/src/runtime/time/wheel/mod.rs b/tokio/src/runtime/time/wheel/mod.rs index 5d8854d4511..da4619fd7b3 100644 --- a/tokio/src/runtime/time/wheel/mod.rs +++ b/tokio/src/runtime/time/wheel/mod.rs @@ -82,12 +82,6 @@ impl Wheel { /// /// * `hdl`: The entry handle to insert into the wheel. /// - /// # Return - /// - /// * `true`: The entry was successfully inserted. - /// * `false`: the entry has already expired, in this case, - /// the entry is not inserted into the wheel. - /// /// # Safety /// /// The caller must ensure: @@ -300,6 +294,7 @@ fn level_for(elapsed: u64, when: u64) -> usize { significant / NUM_LEVELS } +/// The return type of the [`Wheel::insert`] method. pub(crate) enum Insert { /// The entry was successfully inserted. Success, From 15b0563c6f395bb9deb934554251bed81465ab6a Mon Sep 17 00:00:00 2001 From: Qi Date: Sun, 26 Oct 2025 23:05:01 +0800 Subject: [PATCH 068/100] handle the spurious wakeup of condvar in multi-thread runtime Signed-off-by: ADD-SP --- .../runtime/scheduler/multi_thread/park.rs | 25 ++++++++++++++----- 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/tokio/src/runtime/scheduler/multi_thread/park.rs b/tokio/src/runtime/scheduler/multi_thread/park.rs index a9e4fc689ea..79a3507f87c 100644 --- a/tokio/src/runtime/scheduler/multi_thread/park.rs +++ b/tokio/src/runtime/scheduler/multi_thread/park.rs @@ -8,7 +8,7 @@ use crate::runtime::driver::{self, Driver}; use crate::util::TryLock; use std::sync::atomic::Ordering::SeqCst; -use std::time::Duration; +use std::time::{Duration, Instant}; #[cfg(loom)] use crate::runtime::park::CURRENT_THREAD_PARK_COUNT; @@ -165,13 +165,26 @@ impl Inner { Err(actual) => panic!("inconsistent park state; actual = {actual}"), } + let timeout_at = duration.map(|d| { + Instant::now() + .checked_add(d) + // best effort to avoid overflow and still provide a usable timeout + .unwrap_or(Instant::now() + Duration::from_secs(1)) + }); + loop { let is_timeout; - (m, is_timeout) = match duration { - Some(dur) => { - assert_ne!(dur, Duration::ZERO); - let (m, res) = self.condvar.wait_timeout(m, dur).unwrap(); - (m, res.timed_out()) + (m, is_timeout) = match timeout_at { + Some(timeout_at) => { + let dur = timeout_at.saturating_duration_since(Instant::now()); + if !dur.is_zero() { + // Ideally, we would use `condvar.wait_timeout_until` here, but it is not available + // in `loom`. So we manually compute the timeout. + let (m, res) = self.condvar.wait_timeout(m, dur).unwrap(); + (m, res.timed_out()) + } else { + (m, true) + } } None => (self.condvar.wait(m).unwrap(), false), }; From e3102f7cd98e68e6c6d0a827527a99250326f609 Mon Sep 17 00:00:00 2001 From: Qi Date: Sun, 9 Nov 2025 20:40:45 +0800 Subject: [PATCH 069/100] fix reports of `unsafe_op_in_unsafe_fn` Signed-off-by: ADD-SP --- tokio/src/runtime/time/wheel/cancellation_queue.rs | 4 +++- tokio/src/runtime/time/wheel/entry.rs | 12 ++++++------ tokio/src/runtime/time/wheel/mod.rs | 4 ++-- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/tokio/src/runtime/time/wheel/cancellation_queue.rs b/tokio/src/runtime/time/wheel/cancellation_queue.rs index 9fc08292c8c..a5338c6d844 100644 --- a/tokio/src/runtime/time/wheel/cancellation_queue.rs +++ b/tokio/src/runtime/time/wheel/cancellation_queue.rs @@ -84,7 +84,9 @@ impl Sender { /// /// - `hdl` must not in any cancellation queue. pub(crate) unsafe fn send(&self, hdl: EntryHandle) { - self.inner.lock().push_front(hdl); + unsafe { + self.inner.lock().push_front(hdl); + } } } diff --git a/tokio/src/runtime/time/wheel/entry.rs b/tokio/src/runtime/time/wheel/entry.rs index 9acc570774a..d57f11c9840 100644 --- a/tokio/src/runtime/time/wheel/entry.rs +++ b/tokio/src/runtime/time/wheel/entry.rs @@ -63,7 +63,7 @@ unsafe impl linked_list::Link for Entry { unsafe fn from_raw(ptr: NonNull) -> Self::Handle { Handle { - entry: Arc::from_raw(ptr.as_ptr()), + entry: unsafe { Arc::from_raw(ptr.as_ptr()) }, } } @@ -71,8 +71,8 @@ unsafe impl linked_list::Link for Entry { target: NonNull, ) -> NonNull> { let this = target.as_ptr(); - let field = std::ptr::addr_of_mut!((*this).wheel_pointers); - NonNull::new_unchecked(field) + let field = unsafe { std::ptr::addr_of_mut!((*this).wheel_pointers) }; + unsafe { NonNull::new_unchecked(field) } } } @@ -94,7 +94,7 @@ unsafe impl linked_list::Link for CancellationQueueEntry { unsafe fn from_raw(ptr: NonNull) -> Self::Handle { Handle { - entry: Arc::from_raw(ptr.as_ptr()), + entry: unsafe { Arc::from_raw(ptr.as_ptr()) }, } } @@ -102,8 +102,8 @@ unsafe impl linked_list::Link for CancellationQueueEntry { target: NonNull, ) -> NonNull> { let this = target.as_ptr(); - let field = std::ptr::addr_of_mut!((*this).cancel_pointers); - NonNull::new_unchecked(field) + let field = unsafe { std::ptr::addr_of_mut!((*this).cancel_pointers) }; + unsafe { NonNull::new_unchecked(field) } } } diff --git a/tokio/src/runtime/time/wheel/mod.rs b/tokio/src/runtime/time/wheel/mod.rs index da4619fd7b3..564a7681182 100644 --- a/tokio/src/runtime/time/wheel/mod.rs +++ b/tokio/src/runtime/time/wheel/mod.rs @@ -130,7 +130,7 @@ impl Wheel { /// * The entry is already registered in THIS wheel. pub(crate) unsafe fn remove(&mut self, hdl: EntryHandle) { if hdl.is_pending() { - self.pending.remove(NonNull::from(&hdl)); + unsafe { self.pending.remove(NonNull::from(&hdl)) }; } else { let deadline = hdl.deadline(); debug_assert!( @@ -141,7 +141,7 @@ impl Wheel { ); let level = self.level_for(deadline); - self.levels[level].remove_entry(hdl.clone()); + unsafe { self.levels[level].remove_entry(hdl.clone()) }; } } From cde17cdd03e3f7f79a1e0ada56a48dc10b9e9109 Mon Sep 17 00:00:00 2001 From: Qi Date: Thu, 13 Nov 2025 21:53:03 +0800 Subject: [PATCH 070/100] unlock the mutex before droping the waker Signed-off-by: ADD-SP --- tokio/src/runtime/time/wheel/entry.rs | 71 ++++++++++++++++++++------- 1 file changed, 53 insertions(+), 18 deletions(-) diff --git a/tokio/src/runtime/time/wheel/entry.rs b/tokio/src/runtime/time/wheel/entry.rs index d57f11c9840..627f6455658 100644 --- a/tokio/src/runtime/time/wheel/entry.rs +++ b/tokio/src/runtime/time/wheel/entry.rs @@ -145,7 +145,6 @@ impl Handle { // Since state has been updated, no need to hold the lock. drop(lock); if let PrivState::Pending(_, waker, ..) = old_state { - // Merge the wakers to ensure that the most recent waker is used. waker.wake(); } else { unreachable!() @@ -167,7 +166,6 @@ impl Handle { // Since state has been updated, no need to hold the lock. drop(lock); if let PrivState::Unregistered(old_waker) = old_state { - // Merge the wakers to ensure that the most recent waker is used. old_waker.wake(); } else { unreachable!() @@ -186,24 +184,34 @@ impl Handle { pub(crate) fn register_waker(&self, waker: &Waker) { let mut lock = self.entry.state.lock(); - match &mut *lock { + let old_waker = match &mut *lock { PrivState::Unregistered(old_waker) => { if !old_waker.will_wake(waker) { - *old_waker = waker.clone(); + Some(std::mem::replace(old_waker, waker.clone())) + } else { + None } } PrivState::Registered(_, old_waker, _) => { if !old_waker.will_wake(waker) { - *old_waker = waker.clone(); + Some(std::mem::replace(old_waker, waker.clone())) + } else { + None } } PrivState::Pending(_, old_waker, ..) => { if !old_waker.will_wake(waker) { - *old_waker = waker.clone(); + Some(std::mem::replace(old_waker, waker.clone())) + } else { + None } } - PrivState::WokenUp | PrivState::Cancelling { .. } => (), // no need to update the waker - } + PrivState::WokenUp | PrivState::Cancelling { .. } => None, // no need to update the waker + }; + + // unlock before dropping the old waker + drop(lock); + drop(old_waker); } pub(crate) fn transition_to_registered( @@ -212,21 +220,35 @@ impl Handle { thread_id: ThreadId, ) -> TransitionToRegistered { let mut lock = self.entry.state.lock(); + let state: &mut PrivState = &mut lock; - match &*lock { + let (new_state, ret) = match state { PrivState::Unregistered(waker) => { - *lock = PrivState::Registered(cancel_tx, waker.clone(), thread_id); - TransitionToRegistered::Success + let new_state = PrivState::Registered(cancel_tx, waker.clone(), thread_id); + (Some(new_state), TransitionToRegistered::Success) } // don't unlock — poisoning the `Mutex` stops others from using the bad state. state @ (PrivState::Registered(..) | PrivState::Pending(..) | PrivState::WokenUp) => { panic!("corrupted state: {state:#?}") } PrivState::Cancelling(cancelling) => match cancelling { - Cancelling::Unregistered => TransitionToRegistered::Cancelling, + Cancelling::Unregistered => (None, TransitionToRegistered::Cancelling), Cancelling::Registered | Cancelling::Pending => unreachable!(), }, + }; + + if let Some(new_state) = new_state { + // update the state and take back the old state + let old_state = std::mem::replace(state, new_state); + + if let PrivState::Unregistered(waker) = old_state { + // unlock before dropping the old waker + drop(lock); + drop(waker); + } } + + ret } pub(crate) fn transition_to_pending(&self, not_after: u64) -> TransitionToPending { @@ -235,22 +257,35 @@ impl Handle { } let mut lock = self.entry.state.lock(); - match &*lock { + let state: &mut PrivState = &mut lock; + + let (new_state, ret) = match state { // don't unlock — poisoning the `Mutex` stops others from using the bad state. PrivState::Unregistered(_) => panic!("corrupted state: State::Unregistered"), PrivState::Registered(sender, waker, thread_id) => { - *lock = PrivState::Pending(sender.clone(), waker.clone(), *thread_id); - TransitionToPending::Success + let new_state = PrivState::Pending(sender.clone(), waker.clone(), *thread_id); + (new_state, TransitionToPending::Success) } // don't unlock — poisoning the `Mutex` stops others from using the bad state. state @ (PrivState::Pending(..) | PrivState::WokenUp) => { panic!("corrupted state: {state:#?}") } PrivState::Cancelling { .. } => { - *lock = PrivState::Cancelling(Cancelling::Pending); - TransitionToPending::Cancelling + let new_state = PrivState::Cancelling(Cancelling::Pending); + (new_state, TransitionToPending::Cancelling) } + }; + + // update the state and take back the old state + let old_state = std::mem::replace(state, new_state); + + if let PrivState::Registered(_sender, waker, _thread_id) = old_state { + // unlock before dropping the old waker + drop(lock); + drop(waker); } + + ret } pub(crate) fn transition_to_cancelling(&self) { @@ -330,7 +365,7 @@ pub(crate) enum TransitionToRegistered { Cancelling, } -/// An result of the `transition_to_pending` method. +/// The result of the [`Handle::transition_to_pending`]` method. pub(crate) enum TransitionToPending { /// The entry was successfully transitioned /// to the pending state. From 9a4d4390460776452ea69dd38b3a6abdd37d144e Mon Sep 17 00:00:00 2001 From: Qi Date: Thu, 13 Nov 2025 23:52:08 +0800 Subject: [PATCH 071/100] don't cancel the timer locally due to `block_in_place` migrates the `Core` Signed-off-by: ADD-SP --- tokio/src/runtime/scheduler/util.rs | 5 ++- tokio/src/runtime/time/timer.rs | 48 ++++----------------------- tokio/src/runtime/time/wheel/entry.rs | 35 +++++++++---------- tokio/src/runtime/time/wheel/mod.rs | 10 ++---- 4 files changed, 25 insertions(+), 73 deletions(-) diff --git a/tokio/src/runtime/scheduler/util.rs b/tokio/src/runtime/scheduler/util.rs index 7fbebb11a9d..7ac8ba2702d 100644 --- a/tokio/src/runtime/scheduler/util.rs +++ b/tokio/src/runtime/scheduler/util.rs @@ -21,10 +21,9 @@ cfg_rt_and_time! { ) -> bool { use crate::runtime::time::Insert; let mut fired = false; - let thread_id = crate::runtime::context::thread_id().expect("should not be called during the thread shutdown"); // process injected timers for hdl in inject { - match unsafe { wheel.insert(hdl.clone(), tx.clone(), thread_id) } { + match unsafe { wheel.insert(hdl.clone(), tx.clone()) } { Insert::Success => {} Insert::Elapsed => { hdl.wake_unregistered(); @@ -44,7 +43,7 @@ cfg_rt_and_time! { for hdl in rx.recv_all() { match hdl.state() { // INVARIANT: unregistered entry should not be in the wheel. - EntryState::Unregistered | EntryState::Registered(..) | EntryState::Pending(..) => unreachable!(), + EntryState::Unregistered | EntryState::Registered | EntryState::Pending => unreachable!(), EntryState::Cancelling(cancelling) => match cancelling { EntryCancelling::Unregistered => (), EntryCancelling::Registered | EntryCancelling::Pending => { diff --git a/tokio/src/runtime/time/timer.rs b/tokio/src/runtime/time/timer.rs index 3cf523ba050..01a50803b2d 100644 --- a/tokio/src/runtime/time/timer.rs +++ b/tokio/src/runtime/time/timer.rs @@ -1,5 +1,4 @@ -use super::wheel::{EntryHandle, EntryState}; -use crate::runtime::context; +use super::wheel::EntryHandle; use crate::runtime::scheduler::Handle as SchedulerHandle; use crate::runtime::time::wheel::Insert; use crate::runtime::time::Context as TimeContext; @@ -35,41 +34,7 @@ impl std::fmt::Debug for Timer { impl Drop for Timer { fn drop(&mut self) { if let Some(entry) = self.entry.take() { - with_current_wheel(&self.sched_handle, |maybe_time_cx| { - let state = entry.state(); - - let thread_id = match state { - EntryState::Unregistered => { - entry.transition_to_cancelling(); - return; - } - EntryState::Registered(thread_id) | EntryState::Pending(thread_id) => thread_id, - EntryState::Cancelling(..) => unreachable!(), - EntryState::WokenUp => return, - }; - - let Ok(cur_thread_id) = context::thread_id() else { - // current thread is shutting down, we cannot determine the thread id, - // so we need to fallback to the cancellation queue. - entry.transition_to_cancelling(); - return; - }; - - if let Some(TimeContext::Running { wheel, canc_tx: _ }) = maybe_time_cx { - if thread_id == cur_thread_id { - // Safety: - // 1. entry is either in slots or pending list - // 2. entry is registered in this thread - unsafe { - wheel.remove(entry); - } - } else { - entry.transition_to_cancelling(); - } - } else { - entry.transition_to_cancelling(); - } - }); + entry.transition_to_cancelling(); } } } @@ -100,12 +65,11 @@ impl Timer { with_current_wheel(&this.sched_handle, |maybe_time_cx| { let deadline = deadline_to_tick(&this.sched_handle, this.deadline); let hdl = EntryHandle::new(deadline, cx.waker()); - let thread_id = context::thread_id().ok(); - match (maybe_time_cx, thread_id) { - (Some(TimeContext::Running { wheel, canc_tx }), Some(thread_id)) => { + match maybe_time_cx { + Some(TimeContext::Running { wheel, canc_tx }) => { // Safety: the entry is not registered yet - match unsafe { wheel.insert(hdl.clone(), canc_tx.clone(), thread_id) } { + match unsafe { wheel.insert(hdl.clone(), canc_tx.clone()) } { Insert::Success => { this.entry = Some(hdl); Poll::Pending @@ -115,7 +79,7 @@ impl Timer { } } #[cfg(feature = "rt-multi-thread")] - (Some(TimeContext::Shutdown), _) => panic!("{RUNTIME_SHUTTING_DOWN_ERROR}"), + Some(TimeContext::Shutdown) => panic!("{RUNTIME_SHUTTING_DOWN_ERROR}"), _ => { this.entry = Some(hdl.clone()); push_from_remote(&this.sched_handle, hdl); diff --git a/tokio/src/runtime/time/wheel/entry.rs b/tokio/src/runtime/time/wheel/entry.rs index 627f6455658..d60e5b2297c 100644 --- a/tokio/src/runtime/time/wheel/entry.rs +++ b/tokio/src/runtime/time/wheel/entry.rs @@ -1,6 +1,5 @@ use super::cancellation_queue::Sender; use crate::loom::sync::{Arc, Mutex}; -use crate::runtime::ThreadId; use crate::util::linked_list; use std::marker::PhantomPinned; @@ -16,12 +15,12 @@ enum PrivState { /// The entry is registered to the timer wheel, /// but not in the pending queue of the timer wheel. - Registered(Sender, Waker, ThreadId), + Registered(Sender, Waker), /// The entry is in the pending queue of the timer wheel, /// and not in any wheel level, which means that /// the entry is reached its deadline and waiting to be woken up. - Pending(Sender, Waker, ThreadId), + Pending(Sender, Waker), /// The waker has been called, and the entry is no longer in the timer wheel /// (both each wheel level and the pending queue), which means that @@ -192,7 +191,7 @@ impl Handle { None } } - PrivState::Registered(_, old_waker, _) => { + PrivState::Registered(_, old_waker) => { if !old_waker.will_wake(waker) { Some(std::mem::replace(old_waker, waker.clone())) } else { @@ -214,17 +213,13 @@ impl Handle { drop(old_waker); } - pub(crate) fn transition_to_registered( - &self, - cancel_tx: Sender, - thread_id: ThreadId, - ) -> TransitionToRegistered { + pub(crate) fn transition_to_registered(&self, cancel_tx: Sender) -> TransitionToRegistered { let mut lock = self.entry.state.lock(); let state: &mut PrivState = &mut lock; let (new_state, ret) = match state { PrivState::Unregistered(waker) => { - let new_state = PrivState::Registered(cancel_tx, waker.clone(), thread_id); + let new_state = PrivState::Registered(cancel_tx, waker.clone()); (Some(new_state), TransitionToRegistered::Success) } // don't unlock — poisoning the `Mutex` stops others from using the bad state. @@ -262,8 +257,8 @@ impl Handle { let (new_state, ret) = match state { // don't unlock — poisoning the `Mutex` stops others from using the bad state. PrivState::Unregistered(_) => panic!("corrupted state: State::Unregistered"), - PrivState::Registered(sender, waker, thread_id) => { - let new_state = PrivState::Pending(sender.clone(), waker.clone(), *thread_id); + PrivState::Registered(sender, waker) => { + let new_state = PrivState::Pending(sender.clone(), waker.clone()); (new_state, TransitionToPending::Success) } // don't unlock — poisoning the `Mutex` stops others from using the bad state. @@ -279,7 +274,7 @@ impl Handle { // update the state and take back the old state let old_state = std::mem::replace(state, new_state); - if let PrivState::Registered(_sender, waker, _thread_id) = old_state { + if let PrivState::Registered(_sender, waker) = old_state { // unlock before dropping the old waker drop(lock); drop(waker); @@ -296,14 +291,14 @@ impl Handle { PrivState::Unregistered(_) => { *lock = PrivState::Cancelling(Cancelling::Unregistered); } - PrivState::Registered(ref tx, _, _) => { + PrivState::Registered(ref tx, _) => { // Safety: entry is not in any cancellation queue unsafe { tx.send(self.clone()); } *lock = PrivState::Cancelling(Cancelling::Registered); } - PrivState::Pending(ref tx, _, _) => { + PrivState::Pending(ref tx, _) => { // Safety: entry is not in any cancellation queue unsafe { tx.send(self.clone()); @@ -324,8 +319,8 @@ impl Handle { let lock = self.entry.state.lock(); match &*lock { PrivState::Unregistered(_) => State::Unregistered, - PrivState::Registered(_, _, thread_id) => State::Registered(*thread_id), - PrivState::Pending(_, _, thread_id) => State::Pending(*thread_id), + PrivState::Registered(..) => State::Registered, + PrivState::Pending(..) => State::Pending, PrivState::WokenUp => State::WokenUp, PrivState::Cancelling(cancelling) => State::Cancelling(*cancelling), } @@ -333,7 +328,7 @@ impl Handle { pub(crate) fn is_pending(&self) -> bool { match self.state() { - State::Pending(_) => true, + State::Pending => true, State::Cancelling(cancelling) => match cancelling { Cancelling::Unregistered => unreachable!(), Cancelling::Registered => false, @@ -383,8 +378,8 @@ pub(crate) enum TransitionToPending { #[derive(Clone, Copy)] pub(crate) enum State { Unregistered, - Registered(ThreadId), - Pending(ThreadId), + Registered, + Pending, WokenUp, /// The [`Handle`] has been sent to the [`Sender`]. diff --git a/tokio/src/runtime/time/wheel/mod.rs b/tokio/src/runtime/time/wheel/mod.rs index 564a7681182..6f0706fa433 100644 --- a/tokio/src/runtime/time/wheel/mod.rs +++ b/tokio/src/runtime/time/wheel/mod.rs @@ -1,5 +1,4 @@ mod level; -use crate::runtime::ThreadId; pub(crate) use self::level::Expiration; use self::level::Level; @@ -87,12 +86,7 @@ impl Wheel { /// The caller must ensure: /// /// * The entry is not already registered in ANY wheel. - pub(crate) unsafe fn insert( - &mut self, - hdl: EntryHandle, - cancel_tx: Sender, - thread_id: ThreadId, - ) -> Insert { + pub(crate) unsafe fn insert(&mut self, hdl: EntryHandle, cancel_tx: Sender) -> Insert { let deadline = hdl.deadline(); if deadline <= self.elapsed { @@ -102,7 +96,7 @@ impl Wheel { // Get the level at which the entry should be stored let level = self.level_for(deadline); - match hdl.transition_to_registered(cancel_tx, thread_id) { + match hdl.transition_to_registered(cancel_tx) { TransitionToRegistered::Success => { unsafe { self.levels[level].add_entry(hdl); From d4959a8fa17d5c49b79eebe36387d19cb6926ba4 Mon Sep 17 00:00:00 2001 From: Qi Date: Thu, 13 Nov 2025 23:54:35 +0800 Subject: [PATCH 072/100] fix incorrect comment of `TransitionToRegistered` Signed-off-by: ADD-SP --- tokio/src/runtime/time/wheel/entry.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tokio/src/runtime/time/wheel/entry.rs b/tokio/src/runtime/time/wheel/entry.rs index d60e5b2297c..1120d449853 100644 --- a/tokio/src/runtime/time/wheel/entry.rs +++ b/tokio/src/runtime/time/wheel/entry.rs @@ -349,8 +349,7 @@ impl Handle { } } -/// An error returned when trying to transition -/// an being cancelled entry to the registered state. +/// The result of the [`Handle::transition_to_registered`]` method. pub(crate) enum TransitionToRegistered { /// The entry is being cancelled, no need to register it. Success, @@ -391,9 +390,11 @@ pub(crate) enum State { pub(crate) enum Cancelling { /// [`Entry`] is being cancelled, and is not in the timer wheel. Unregistered, + /// [`Entry`] is being cancelled, and is registered in the timer wheel, /// but not in the pending list. Registered, + /// [`Entry`] is being cancelled, and it registered in the timer wheel, /// and also in the pending list. Pending, From bc243b818e2aa42b8bafab2d8edbec918fc250fb Mon Sep 17 00:00:00 2001 From: Qi Date: Sun, 16 Nov 2025 22:57:30 +0800 Subject: [PATCH 073/100] eliminate `Option` in `Core` Signed-off-by: ADD-SP --- .../runtime/scheduler/current_thread/mod.rs | 126 +++++----- .../runtime/scheduler/multi_thread/worker.rs | 130 +++++------ tokio/src/runtime/scheduler/util.rs | 57 ++++- tokio/src/runtime/time/handle.rs | 29 ++- tokio/src/runtime/time/mod.rs | 2 + tokio/src/runtime/time/tests/mod.rs | 13 +- .../runtime/time/wheel/cancellation_queue.rs | 2 +- tokio/src/runtime/time/wheel/entry.rs | 219 +++++++++++++++--- tokio/src/runtime/time/wheel/mod.rs | 7 +- tokio/src/runtime/time/wheel/wake_queue.rs | 70 ++++++ 10 files changed, 473 insertions(+), 182 deletions(-) create mode 100644 tokio/src/runtime/time/wheel/wake_queue.rs diff --git a/tokio/src/runtime/scheduler/current_thread/mod.rs b/tokio/src/runtime/scheduler/current_thread/mod.rs index 8e4d1202ee4..d79c9410e03 100644 --- a/tokio/src/runtime/scheduler/current_thread/mod.rs +++ b/tokio/src/runtime/scheduler/current_thread/mod.rs @@ -25,6 +25,7 @@ use std::{fmt, thread}; cfg_time! { use crate::runtime::scheduler::util; use crate::runtime::time::EntryHandle; + use crate::runtime::time::WakeQueue; use crate::loom::sync::Mutex; } @@ -69,7 +70,7 @@ struct Core { tick: u32, #[cfg(feature = "time")] - time_context: Option, + time_context: crate::runtime::time::Context2, /// Runtime driver /// @@ -189,7 +190,7 @@ impl CurrentThread { tasks: VecDeque::with_capacity(INITIAL_CAPACITY), tick: 0, #[cfg(feature = "time")] - time_context: Some(crate::runtime::time::Context2::new()), + time_context: crate::runtime::time::Context2::new(), driver: Some(driver), metrics: MetricsBatch::new(&handle.shared.worker_metrics), global_queue_interval, @@ -302,14 +303,12 @@ fn shutdown2(mut core: Box, handle: &Handle) -> Box { #[cfg(feature = "time")] { - let mut time_context = core.time_context.take().unwrap(); util::time::shutdown_local_timers( - &mut time_context.wheel, - &mut time_context.canc_rx, + &mut core.time_context.wheel, + &mut core.time_context.canc_rx, handle.take_remote_timers(), &handle.driver, ); - assert!(core.time_context.replace(time_context).is_none()); } // Drain the local queue // We already shut down every task, so we just need to drop the task. @@ -459,13 +458,12 @@ impl Context { ) -> Box { debug_assert!(core.driver.is_none()); - let MaintainLocalTimer { - core, - park_duration: duration, - auto_advance_duration, - } = self.maintain_local_timers_before_parking(core, handle, duration); - let (core, ()) = self.enter(core, || { + let MaintainLocalTimer { + park_duration: duration, + auto_advance_duration, + } = self.maintain_local_timers_before_parking(handle, duration); + if let Some(duration) = duration { driver.park_timeout(&handle.driver, duration); } else { @@ -473,9 +471,11 @@ impl Context { } self.defer.wake(); + + self.maintain_local_timers_after_parking(handle, auto_advance_duration) }); - self.maintain_local_timers_after_parking(core, handle, auto_advance_duration) + core } fn enter(&self, core: Box, f: impl FnOnce() -> R) -> (Box, R) { @@ -510,26 +510,42 @@ impl Context { /// `(Box, park_duration, auto_advance_duration)` fn maintain_local_timers_before_parking( &self, - core: Box, handle: &Handle, park_duration: Option ) -> MaintainLocalTimer { - let (core, (should_yield, next_timer)) = - self.enter_with_time_context(core, |time_cx| { - util::time::remove_cancelled_timers(&mut time_cx.wheel, &mut time_cx.canc_rx); - let should_yield = util::time::insert_inject_timers( - &mut time_cx.wheel, - &time_cx.canc_tx, - handle.take_remote_timers(), - ); - let next_timer = - util::time::next_expiration_time(&time_cx.wheel, &handle.driver); - (should_yield, next_timer) - }); + let mut wake_queue = WakeQueue::new(); + + let (should_yield, next_timer) = context::with_scheduler(|maybe_cx| { + use scheduler::Context::CurrentThread; + + match maybe_cx { + Some(CurrentThread(cx)) if std::ptr::eq(Arc::as_ptr(&cx.handle), handle) => { + let mut maybe_core = cx.core.borrow_mut(); + let core = maybe_core.as_mut().expect("core missing"); + let time_cx = &mut core.time_context; + + util::time::remove_cancelled_timers(&mut time_cx.wheel, &mut time_cx.canc_rx); + util::time::insert_inject_timers( + &mut time_cx.wheel, + &time_cx.canc_tx, + handle.take_remote_timers(), + &mut wake_queue, + ); + let should_yield = !wake_queue.is_empty(); + + let next_timer = + util::time::next_expiration_time(&time_cx.wheel, &handle.driver); + + (should_yield, next_timer) + } + _bad_cx => panic!("function is not called within the exact same runtime context"), + } + }); + + wake_queue.wake_all(); if should_yield { MaintainLocalTimer { - core, park_duration: Some(Duration::ZERO), auto_advance_duration: None, } @@ -537,13 +553,11 @@ impl Context { let dur = util::time::min_duration(park_duration, next_timer); if util::time::pre_auto_advance(&handle.driver, dur) { MaintainLocalTimer { - core, park_duration: Some(Duration::ZERO), auto_advance_duration: dur, } } else { MaintainLocalTimer { - core, park_duration: dur, auto_advance_duration: None, } @@ -557,30 +571,28 @@ impl Context { /// * Process expired timers. fn maintain_local_timers_after_parking( &self, - core: Box, handle: &Handle, auto_advance_duration: Option - ) -> Box { - let (core, ()) = self.enter_with_time_context(core, |time_cx| { - util::time::post_auto_advance(&handle.driver, auto_advance_duration); - util::time::process_expired_timers(&mut time_cx.wheel, &handle.driver); - }); - core - } + ) { + let mut wake_queue = WakeQueue::new(); - /// Take out the time context from the core, - /// and then setup the [`Core`] to the thread-local [`Context`], - /// finally, run the provided closure `f` with the time context. - fn enter_with_time_context(&self, mut core: Box, f: F) -> (Box, R) - where - F: FnOnce(&mut crate::runtime::time::Context2) -> R, - { - let mut time_cx = core.time_context.take().expect("time context missing"); - let (mut core, ret) = self.enter(core, || { - f(&mut time_cx) + context::with_scheduler(|maybe_cx| { + use scheduler::Context::CurrentThread; + + match maybe_cx { + Some(CurrentThread(cx)) if std::ptr::eq(Arc::as_ptr(&cx.handle), handle) => { + let mut maybe_core = cx.core.borrow_mut(); + let core = maybe_core.as_mut().expect("core missing"); + let time_cx = &mut core.time_context; + + util::time::post_auto_advance(&handle.driver, auto_advance_duration); + util::time::process_expired_timers(&mut time_cx.wheel, &handle.driver, &mut wake_queue); + } + _bad_cx => panic!("function is not called within the exact same runtime context"), + } }); - assert!(core.time_context.replace(time_cx).is_none()); - (core, ret) + + wake_queue.wake_all(); } fn with_core(&self, f: F) -> R @@ -597,17 +609,10 @@ impl Context { { self.with_core(|maybe_core| { match maybe_core { - Some(core) => { - match core.time_context { - Some(ref mut time_context) => { - f(Some(crate::runtime::time::Context::Running { - wheel: &mut time_context.wheel, - canc_tx: &time_context.canc_tx, - })) - } - None => f(None), - } - } + Some(core) => f(Some(crate::runtime::time::Context::Running { + wheel: &mut core.time_context.wheel, + canc_tx: &core.time_context.canc_tx, + })), None => f(None), } }) @@ -1068,7 +1073,6 @@ impl Drop for CoreGuard<'_> { /// Returned by [`Context::maintain_local_timers_before_parking`]. struct MaintainLocalTimer { - core: Box, park_duration: Option, auto_advance_duration: Option, } diff --git a/tokio/src/runtime/scheduler/multi_thread/worker.rs b/tokio/src/runtime/scheduler/multi_thread/worker.rs index 9ae51e35872..5353526886e 100644 --- a/tokio/src/runtime/scheduler/multi_thread/worker.rs +++ b/tokio/src/runtime/scheduler/multi_thread/worker.rs @@ -78,6 +78,7 @@ use std::time::Duration; cfg_time! { use crate::runtime::scheduler::util; use crate::runtime::time::EntryHandle; + use crate::runtime::time::WakeQueue; } mod metrics; @@ -122,7 +123,7 @@ struct Core { run_queue: queue::Local>, #[cfg(feature = "time")] - time_context: Option, + time_context: crate::runtime::time::Context2, /// True if the worker is currently searching for more work. Searching /// involves attempting to steal from other workers. @@ -276,7 +277,7 @@ pub(super) fn create( lifo_enabled: !config.disable_lifo_slot, run_queue, #[cfg(feature = "time")] - time_context: Some(crate::runtime::time::Context2::new()), + time_context: crate::runtime::time::Context2::new(), is_searching: false, is_shutdown: false, is_traced: false, @@ -421,11 +422,6 @@ where Some(core) => core, None => return Ok(()), }; - #[cfg(feature = "time")] - assert!( - core.time_context.is_some(), - "should always be `Some` unless processing local timers" - ); // If we heavily call `spawn_blocking`, there might be no available thread to // run this core. Except for the task in the lifo_slot, all tasks can be @@ -587,14 +583,12 @@ impl Context { #[cfg(feature = "time")] { - let mut time_context = core.time_context.take().expect("time context missing"); util::time::shutdown_local_timers( - &mut time_context.wheel, - &mut time_context.canc_rx, + &mut core.time_context.wheel, + &mut core.time_context.canc_rx, self.worker.handle.take_remote_timers(), &self.worker.handle.driver, ); - core.time_context = Some(time_context); } core.pre_shutdown(&self.worker); // Signal shutdown @@ -799,18 +793,17 @@ impl Context { // Take the parker out of core let mut park = core.park.take().expect("park missing"); + // Store `core` in context + *self.core.borrow_mut() = Some(core); + // Must happens after taking out the parker, as the `Handle::schedule_local` // will delay the notify if the parker taken out. // // See comments in `Handle::schedule_local` for more details. let MaintainLocalTimer { - mut core, park_duration: duration, auto_advance_duration, - } = self.maintain_local_timers_before_parking(core, duration); - - // Store `core` in context - *self.core.borrow_mut() = Some(core); + } = self.maintain_local_timers_before_parking(duration); // Park thread if let Some(timeout) = duration { @@ -821,14 +814,14 @@ impl Context { self.defer.wake(); - // Remove `core` from context - core = self.core.borrow_mut().take().expect("core missing"); - // Must happens before placing back the parker, as the `Handle::schedule_local` // will delay the notify if the parker is still in `core`. // // See comments in `Handle::schedule_local` for more details. - core = self.maintain_local_timers_after_parking(core, auto_advance_duration); + self.maintain_local_timers_after_parking(auto_advance_duration); + + // Remove `core` from context + core = self.core.borrow_mut().take().expect("core missing"); // Place `park` back in `core` core.park = Some(park); @@ -863,26 +856,43 @@ impl Context { /// /// `(Box, park_duration, auto_advance_duration)` fn maintain_local_timers_before_parking( - &self,core: Box, + &self, park_duration: Option ) -> MaintainLocalTimer { let handle = &self.worker.handle; - let (core, (should_yield, next_timer)) = - self.enter_with_time_context(core, |time_cx| { - util::time::remove_cancelled_timers(&mut time_cx.wheel, &mut time_cx.canc_rx); - let should_yield = util::time::insert_inject_timers( - &mut time_cx.wheel, - &time_cx.canc_tx, - handle.take_remote_timers(), - ); - let next_timer = - util::time::next_expiration_time(&time_cx.wheel, &handle.driver); - (should_yield, next_timer) - }); + let mut wake_queue = WakeQueue::new(); + + let (should_yield, next_timer) = with_current(|maybe_cx| { + let cx = maybe_cx.expect("function should be called when core is present"); + assert_eq!( + Arc::as_ptr(&cx.worker.handle), + Arc::as_ptr(&self.worker.handle), + "function should be called on the exact same worker" + ); + + let mut maybe_core = cx.core.borrow_mut(); + let core = maybe_core.as_mut().expect("core missing"); + let time_cx = &mut core.time_context; + + util::time::remove_cancelled_timers(&mut time_cx.wheel, &mut time_cx.canc_rx); + util::time::insert_inject_timers( + &mut time_cx.wheel, + &time_cx.canc_tx, + handle.take_remote_timers(), + &mut wake_queue, + ); + let should_yield = !wake_queue.is_empty(); + + let next_timer = + util::time::next_expiration_time(&time_cx.wheel, &handle.driver); + + (should_yield, next_timer) + }); + + wake_queue.wake_all(); if should_yield { MaintainLocalTimer { - core, park_duration: Some(Duration::from_millis(0)), auto_advance_duration: None, } @@ -891,13 +901,11 @@ impl Context { let dur = util::time::min_duration(park_duration, next_timer); if util::time::pre_auto_advance(&handle.driver, dur) { MaintainLocalTimer { - core, park_duration: Some(Duration::ZERO), auto_advance_duration: dur, } } else { MaintainLocalTimer { - core, park_duration: dur, auto_advance_duration: None, } @@ -911,30 +919,28 @@ impl Context { /// * Process expired timers. fn maintain_local_timers_after_parking( &self, - core: Box, auto_advance_duration: Option - ) -> Box { + ) { let handle = &self.worker.handle; - let (core, ()) = self.enter_with_time_context(core, |time_cx| { + let mut wake_queue = WakeQueue::new(); + + with_current(|maybe_cx| { + let cx = maybe_cx.expect("function should be called when core is present"); + assert_eq!( + Arc::as_ptr(&cx.worker.handle), + Arc::as_ptr(&self.worker.handle), + "function should be called on the exact same worker" + ); + + let mut maybe_core = cx.core.borrow_mut(); + let core = maybe_core.as_mut().expect("core missing"); + let time_cx = &mut core.time_context; + util::time::post_auto_advance(&handle.driver, auto_advance_duration); - util::time::process_expired_timers(&mut time_cx.wheel, &handle.driver); + util::time::process_expired_timers(&mut time_cx.wheel, &handle.driver, &mut wake_queue); }); - core - } - /// Take out the time context from the core, - /// and then setup the [`Core`] to the thread-local [`Context`], - /// finally, run the provided closure `f` with the time context. - fn enter_with_time_context(&self, mut core: Box, f: F) -> (Box, R) - where - F: FnOnce(&mut crate::runtime::time::Context2) -> R, - { - let mut time_cx = core.time_context.take().expect("time context missing"); - assert!(self.core.borrow_mut().replace(core).is_none()); - let ret = f(&mut time_cx); - let mut core = self.core.borrow_mut().take().expect("core missing"); - assert!(core.time_context.replace(time_cx).is_none()); - (core, ret) + wake_queue.wake_all(); } fn with_core(&self, f: F) -> R @@ -954,15 +960,10 @@ impl Context { self.with_core(|maybe_core| { match maybe_core { Some(core) if core.is_shutdown => f(Some(crate::runtime::time::Context::Shutdown)), - Some(core) => { - match core.time_context { - Some(ref mut time_context) => f(Some(crate::runtime::time::Context::Running { - wheel: &mut time_context.wheel, - canc_tx: &time_context.canc_tx, - })), - None => f(None), - } - } + Some(core) => f(Some(crate::runtime::time::Context::Running { + wheel: &mut core.time_context.wheel, + canc_tx: &core.time_context.canc_tx, + })), None => f(None), } }) @@ -1464,7 +1465,6 @@ impl<'a> Lock for &'a Handle { /// Returned by [`Context::maintain_local_timers_before_parking`]. struct MaintainLocalTimer { - core: Box, park_duration: Option, auto_advance_duration: Option, } diff --git a/tokio/src/runtime/scheduler/util.rs b/tokio/src/runtime/scheduler/util.rs index 7ac8ba2702d..7af68367034 100644 --- a/tokio/src/runtime/scheduler/util.rs +++ b/tokio/src/runtime/scheduler/util.rs @@ -1,8 +1,10 @@ cfg_rt_and_time! { pub(crate) mod time { use crate::runtime::{scheduler::driver}; - use crate::runtime::time::{EntryHandle, EntryState, EntryCancelling, Wheel}; + use crate::runtime::time::{Wheel, WakeQueue}; + use crate::runtime::time::{EntryHandle, EntryState, EntryCancelling}; use crate::runtime::time::cancellation_queue::{Sender, Receiver}; + use crate::runtime::time::EntryTransitionToWakingUp; use std::time::Duration; pub(crate) fn min_duration(a: Option, b: Option) -> Option { @@ -18,22 +20,33 @@ cfg_rt_and_time! { wheel: &mut Wheel, tx: &Sender, inject: Vec, - ) -> bool { + wake_queue: &mut WakeQueue, + ) { use crate::runtime::time::Insert; - let mut fired = false; + // process injected timers for hdl in inject { match unsafe { wheel.insert(hdl.clone(), tx.clone()) } { Insert::Success => {} Insert::Elapsed => { - hdl.wake_unregistered(); - fired = true; + match hdl.transition_to_waking_up_unregistered() { + EntryTransitionToWakingUp::Success => { + // Safety: + // + // 1. this entry is not in the timer wheel + // 2. AND this entry is not in any cancellation queue + unsafe { + wake_queue.push_front(hdl); + } + } + EntryTransitionToWakingUp::Cancelling => { + // cancellation happens concurrently, no need to wake + } + } } Insert::Cancelling => {} } } - - fired } pub(crate) fn remove_cancelled_timers( @@ -42,8 +55,10 @@ cfg_rt_and_time! { ) { for hdl in rx.recv_all() { match hdl.state() { - // INVARIANT: unregistered entry should not be in the wheel. - EntryState::Unregistered | EntryState::Registered | EntryState::Pending => unreachable!(), + // INVARIANT: the state always be transitioned to Cancelling before being sent to cancellation queue + state @ (EntryState::Unregistered | EntryState::Registered | EntryState::Pending | EntryState::WakingUp) => { + panic!("corrupted state: {state:#?}"); + } EntryState::Cancelling(cancelling) => match cancelling { EntryCancelling::Unregistered => (), EntryCancelling::Registered | EntryCancelling::Pending => { @@ -55,7 +70,8 @@ cfg_rt_and_time! { } } } - EntryState::WokenUp => unreachable!(), + // INVARIANT: the state always be transitioned to Cancelling before being sent to cancellation queue + EntryState::WokenUp => panic!("corrupted state: `EntryState::WokenUp`"), } } } @@ -146,6 +162,7 @@ cfg_rt_and_time! { pub(crate) fn process_expired_timers( wheel: &mut Wheel, drv_hdl: &driver::Handle, + wake_queue: &mut WakeQueue, ) { drv_hdl.with_time(|maybe_time_hdl| { let Some(time_hdl) = maybe_time_hdl else { @@ -157,7 +174,7 @@ cfg_rt_and_time! { let time_source = time_hdl.time_source(); let now = time_source.now(clock); - time_hdl.process_at_time(wheel, now); + time_hdl.process_at_time(wheel, now, wake_queue); }); } @@ -176,10 +193,26 @@ cfg_rt_and_time! { remove_cancelled_timers(wheel, rx); time_hdl.shutdown(wheel); + let mut wake_queue = WakeQueue::new(); // simply wake all unregistered timers for hdl in inject { - hdl.wake_unregistered(); + match hdl.transition_to_waking_up_unregistered() { + EntryTransitionToWakingUp::Success => { + // Safety: + // + // 1. this entry is not in the timer wheel + // 2. AND this entry is not in any cancellation queue + unsafe { + wake_queue.push_front(hdl); + } + } + EntryTransitionToWakingUp::Cancelling => { + // cancellation happens concurrently, no need to wake + } + } } + + wake_queue.wake_all(); }); } } diff --git a/tokio/src/runtime/time/handle.rs b/tokio/src/runtime/time/handle.rs index ef7bc742530..164e5b6c209 100644 --- a/tokio/src/runtime/time/handle.rs +++ b/tokio/src/runtime/time/handle.rs @@ -1,4 +1,5 @@ -use crate::runtime::time::{TimeSource, Wheel}; +use crate::runtime::time::EntryTransitionToWakingUp; +use crate::runtime::time::{TimeSource, WakeQueue, Wheel}; use std::fmt; cfg_test_util! { @@ -21,7 +22,12 @@ pub(crate) struct Handle { } impl Handle { - pub(crate) fn process_at_time(&self, wheel: &mut Wheel, mut now: u64) { + pub(crate) fn process_at_time( + &self, + wheel: &mut Wheel, + mut now: u64, + wake_queue: &mut WakeQueue, + ) { if now < wheel.elapsed() { // Time went backwards! This normally shouldn't happen as the Rust language // guarantees that an Instant is monotonic, but can happen when running @@ -33,8 +39,19 @@ impl Handle { } while let Some(hdl) = wheel.poll(now) { - unsafe { - hdl.wake(); + match hdl.transition_to_waking_up() { + EntryTransitionToWakingUp::Success => { + // Safety: + // + // 1. this entry is not in the timer wheel + // 2. AND this entry is not in any cancellation queue + unsafe { + wake_queue.push_front(hdl); + } + } + EntryTransitionToWakingUp::Cancelling => { + // cancellation happens concurrently, no need to wake + } } } } @@ -44,7 +61,9 @@ impl Handle { // Advance time forward to the end of time. // This will ensure that all timers are fired. let max_tick = u64::MAX; - self.process_at_time(wheel, max_tick); + let mut wake_queue = WakeQueue::new(); + self.process_at_time(wheel, max_tick, &mut wake_queue); + wake_queue.wake_all(); } /// Returns the time source associated with this handle. diff --git a/tokio/src/runtime/time/mod.rs b/tokio/src/runtime/time/mod.rs index 2de0132117d..a2b8a1939b3 100644 --- a/tokio/src/runtime/time/mod.rs +++ b/tokio/src/runtime/time/mod.rs @@ -18,9 +18,11 @@ pub(crate) use source::TimeSource; mod wheel; cfg_rt_and_time! { pub(crate) use wheel::{Insert, EntryHandle, EntryState, EntryCancelling}; + pub(crate) use wheel::TransitionToWakingUp as EntryTransitionToWakingUp; } cfg_rt_or_time! { pub(crate) use wheel::cancellation_queue; + pub(crate) use wheel::WakeQueue; pub(crate) use wheel::Wheel; } diff --git a/tokio/src/runtime/time/tests/mod.rs b/tokio/src/runtime/time/tests/mod.rs index 9d851faca60..981002779f7 100644 --- a/tokio/src/runtime/time/tests/mod.rs +++ b/tokio/src/runtime/time/tests/mod.rs @@ -9,6 +9,7 @@ use futures::task::noop_waker_ref; use crate::loom::thread; use crate::runtime::time::timer::with_current_wheel; use crate::runtime::time::Context as TimeContext; +use crate::runtime::time::WakeQueue; use crate::runtime::Handle; use crate::sync::oneshot; @@ -46,18 +47,22 @@ async fn fire_all_timers(handle: &Handle, exit_rx: oneshot::Receiver<()>) { break; } + let mut wake_queue = WakeQueue::new(); + // In the `block_on` context, we can get the current wheel // fire all timers. with_current_wheel(&handle.inner, |maybe_wheel| match maybe_wheel { Some(TimeContext::Running { wheel, .. }) => { let time = handle.inner.driver().time(); - time.process_at_time(wheel, u64::MAX); + time.process_at_time(wheel, u64::MAX, &mut wake_queue); } #[cfg(feature = "rt-multi-thread")] Some(TimeContext::Shutdown) => panic!("runtime is shutting down"), None => panic!("no current wheel"), }); + wake_queue.wake_all(); + thread::yield_now(); } } @@ -65,15 +70,19 @@ async fn fire_all_timers(handle: &Handle, exit_rx: oneshot::Receiver<()>) { // This function must be called inside the `rt.block_on`. fn process_at_time(handle: &Handle, at: u64) { let handle = &handle.inner; + let mut wake_queue = WakeQueue::new(); + with_current_wheel(handle, |maybe_wheel| match maybe_wheel { Some(TimeContext::Running { wheel, .. }) => { let time = handle.driver().time(); - time.process_at_time(wheel, at); + time.process_at_time(wheel, at, &mut wake_queue); } #[cfg(feature = "rt-multi-thread")] Some(TimeContext::Shutdown) => panic!("runtime is shutting down"), None => panic!("no current wheel"), }); + + wake_queue.wake_all(); } fn rt(start_paused: bool) -> crate::runtime::Runtime { diff --git a/tokio/src/runtime/time/wheel/cancellation_queue.rs b/tokio/src/runtime/time/wheel/cancellation_queue.rs index a5338c6d844..eeb54f38a9b 100644 --- a/tokio/src/runtime/time/wheel/cancellation_queue.rs +++ b/tokio/src/runtime/time/wheel/cancellation_queue.rs @@ -34,7 +34,7 @@ impl Inner { /// /// Behavior is undefined if any of the following conditions are violated: /// - /// - `hdl` must not in any cancellation queue. + /// - `hdl` must not in any [`super::cancellation_queue`], and also mus not in any [`WakeQueue`]. unsafe fn push_front(&mut self, hdl: EntryHandle) { self.list.push_front(hdl); } diff --git a/tokio/src/runtime/time/wheel/entry.rs b/tokio/src/runtime/time/wheel/entry.rs index 1120d449853..a7c26284c1d 100644 --- a/tokio/src/runtime/time/wheel/entry.rs +++ b/tokio/src/runtime/time/wheel/entry.rs @@ -10,24 +10,39 @@ pub(crate) type EntryList = linked_list::LinkedList; #[derive(Debug)] enum PrivState { - /// A pure new entry, no any changes to the state. + /// A pure new entry, and it MIGHT be in the inject timer queue. Unregistered(Waker), - /// The entry is registered to the timer wheel, - /// but not in the pending queue of the timer wheel. + /// The [`Entry`] is registered in the timer wheel, + /// + /// - The [`Entry::wheel_pointers`] is currently in use by [`super::Level::slot`]. + /// - The [`Entry::extra_pointers`] is NOT currently in use. Registered(Sender, Waker), - /// The entry is in the pending queue of the timer wheel, - /// and not in any wheel level, which means that - /// the entry is reached its deadline and waiting to be woken up. + /// The [`Entry`] is expired AND in the [`super::Wheel::pending`], + /// + /// - The [`Entry::wheel_pointers`] is currently in use by [`super::Wheel::pending`]. + /// - The [`Entry::extra_pointers`] is NOT currently in use. Pending(Sender, Waker), - /// The waker has been called, and the entry is no longer in the timer wheel - /// (both each wheel level and the pending queue), which means that - /// the entry is reached its deadline and woken up. + /// The [`Entry`] is taken out from the [`super::Wheel::pending`] and in + /// the [`super::WakeQueue`], + /// + /// - The [`Entry::wheel_pointers`] is NOT currently in use. + /// - The [`Entry::extra_pointers`] is currently in use by [`super::WakeQueue`]. + WakingUp(Waker), + + /// The waker has been called, and the entry is not in timer wheel, and not in cancellation queue, + /// and also not in wake queue. + /// + /// - The [`Entry::wheel_pointers`] is NOT currently in use. + /// - The [`Entry::extra_pointers`] is NOT currently in use. WokenUp, - /// The [`Handle`] has been sent to the [`Sender`]. + /// The [`Entry`] is in the cancellation queue, + /// + /// - The [`Entry::wheel_pointers`] is MAYBE in use by [`super::Level::slot`] or [`super::Wheel::pending`]. + /// - The [`Entry::extra_pointers`] is currently in use by [`super::cancellation_queue`]. Cancelling(Cancelling), } @@ -36,8 +51,9 @@ pub(crate) struct Entry { /// The intrusive pointers used by timer wheel. wheel_pointers: linked_list::Pointers, - /// The intrusive pointer used by cancellation queue. - cancel_pointers: linked_list::Pointers, + /// The intrusive pointer used by either [`CancellationQueueEntry`]. + /// or [`WakeQueueEntry`]. + extra_pointers: linked_list::Pointers, /// The tick when this entry is scheduled to expire. deadline: u64, @@ -75,7 +91,7 @@ unsafe impl linked_list::Link for Entry { } } -/// An ZST to allow [`super::cancellation_queue`] to utilize the [`Entry::cancel_pointers`] +/// An ZST to allow [`super::cancellation_queue`] to utilize the [`Entry::extra_pointers`] /// by impl [`linked_list::Link`] as we cannot impl it on [`Entry`] /// directly due to the conflicting implementations used by [`Entry::wheel_pointers`]. /// @@ -101,7 +117,38 @@ unsafe impl linked_list::Link for CancellationQueueEntry { target: NonNull, ) -> NonNull> { let this = target.as_ptr(); - let field = unsafe { std::ptr::addr_of_mut!((*this).cancel_pointers) }; + let field = unsafe { std::ptr::addr_of_mut!((*this).extra_pointers) }; + unsafe { NonNull::new_unchecked(field) } + } +} + +/// An ZST to allow [`super::WakeQueue`] to utilize the [`Entry::extra_pointers`] +/// by impl [`linked_list::Link`] as we cannot impl it on [`Entry`] +/// directly due to the conflicting implementations used by [`Entry::wheel_pointers`]. +/// +/// This type should never be constructed. +pub(super) struct WakeQueueEntry; + +// Safety: `Entry` is always in an `Arc`. +unsafe impl linked_list::Link for WakeQueueEntry { + type Handle = Handle; + type Target = Entry; + + fn as_raw(hdl: &Self::Handle) -> NonNull { + unsafe { NonNull::new_unchecked(Arc::as_ptr(&hdl.entry).cast_mut()) } + } + + unsafe fn from_raw(ptr: NonNull) -> Self::Handle { + Handle { + entry: unsafe { Arc::from_raw(ptr.as_ptr()) }, + } + } + + unsafe fn pointers( + target: NonNull, + ) -> NonNull> { + let this = target.as_ptr(); + let field = unsafe { std::ptr::addr_of_mut!((*this).extra_pointers) }; unsafe { NonNull::new_unchecked(field) } } } @@ -122,7 +169,7 @@ impl Handle { pub(crate) fn new(deadline: u64, waker: &Waker) -> Self { let entry = Arc::new(Entry { wheel_pointers: linked_list::Pointers::new(), - cancel_pointers: linked_list::Pointers::new(), + extra_pointers: linked_list::Pointers::new(), deadline, state: Mutex::new(PrivState::Unregistered(waker.clone())), _pin: PhantomPinned, @@ -136,14 +183,16 @@ impl Handle { let mut lock = self.entry.state.lock(); match &*lock { // don't unlock — poisoning the `Mutex` stops others from using the bad state. - state @ (PrivState::Unregistered(..) | PrivState::Registered(..)) => { + state @ (PrivState::Unregistered(..) + | PrivState::Registered(..) + | PrivState::Pending(..)) => { panic!("corrupted state: {state:#?}") } - PrivState::Pending(..) => { + PrivState::WakingUp(..) => { let old_state = std::mem::replace(&mut *lock, PrivState::WokenUp); // Since state has been updated, no need to hold the lock. drop(lock); - if let PrivState::Pending(_, waker, ..) = old_state { + if let PrivState::WakingUp(waker) = old_state { waker.wake(); } else { unreachable!() @@ -171,11 +220,12 @@ impl Handle { } } // don't unlock — poisoning the `Mutex` stops others from using the bad state. - state @ (PrivState::Registered(..) | PrivState::WokenUp) => { + state @ (PrivState::Registered(..) + | PrivState::WokenUp + | PrivState::Pending(..) + | PrivState::WakingUp(..)) => { panic!("corrupted state: {state:#?}") } - // don't unlock — poisoning the `Mutex` stops others from using the bad state. - PrivState::Pending(..) => panic!("corrupted state: State::Pending"), // don't wake up cancelling entries PrivState::Cancelling { .. } => (), } @@ -205,6 +255,13 @@ impl Handle { None } } + PrivState::WakingUp(old_waker) => { + if !old_waker.will_wake(waker) { + Some(std::mem::replace(old_waker, waker.clone())) + } else { + None + } + } PrivState::WokenUp | PrivState::Cancelling { .. } => None, // no need to update the waker }; @@ -223,7 +280,10 @@ impl Handle { (Some(new_state), TransitionToRegistered::Success) } // don't unlock — poisoning the `Mutex` stops others from using the bad state. - state @ (PrivState::Registered(..) | PrivState::Pending(..) | PrivState::WokenUp) => { + state @ (PrivState::Registered(..) + | PrivState::Pending(..) + | PrivState::WakingUp(..) + | PrivState::WokenUp) => { panic!("corrupted state: {state:#?}") } PrivState::Cancelling(cancelling) => match cancelling { @@ -236,11 +296,11 @@ impl Handle { // update the state and take back the old state let old_state = std::mem::replace(state, new_state); - if let PrivState::Unregistered(waker) = old_state { - // unlock before dropping the old waker - drop(lock); - drop(waker); - } + // unlock before dropping the old waker + drop(lock); + + // this also drops the old waker if the variant contains it. + drop(old_state); } ret @@ -262,7 +322,7 @@ impl Handle { (new_state, TransitionToPending::Success) } // don't unlock — poisoning the `Mutex` stops others from using the bad state. - state @ (PrivState::Pending(..) | PrivState::WokenUp) => { + state @ (PrivState::Pending(..) | PrivState::WakingUp(..) | PrivState::WokenUp) => { panic!("corrupted state: {state:#?}") } PrivState::Cancelling { .. } => { @@ -274,15 +334,76 @@ impl Handle { // update the state and take back the old state let old_state = std::mem::replace(state, new_state); - if let PrivState::Registered(_sender, waker) = old_state { - // unlock before dropping the old waker - drop(lock); - drop(waker); - } + // unlock before dropping the old waker + drop(lock); + + // this also drops the old waker if the variant contains it. + drop(old_state); ret } + pub(crate) fn transition_to_waking_up(&self) -> TransitionToWakingUp { + let mut lock = self.entry.state.lock(); + + let old_state = match &*lock { + // don't unlock — poisoning the `Mutex` stops others from using the bad state. + state @ (PrivState::Unregistered(..) | PrivState::Registered(..)) => { + panic!("corrupted state: {state:#?}") + } + PrivState::Pending(_cancel_tx, waker) => { + let new_state = PrivState::WakingUp(waker.clone()); + std::mem::replace(&mut *lock, new_state) + } + // don't unlock — poisoning the `Mutex` stops others from using the bad state. + state @ (PrivState::WakingUp(..) | PrivState::WokenUp) => { + panic!("corrupted state: {state:#?}") + } + PrivState::Cancelling { .. } => { + // no need to transition cancelling entry + return TransitionToWakingUp::Cancelling; + } + }; + + // unlock before dropping the old waker + drop(lock); + + // this also drops the old waker if the variant contains it. + drop(old_state); + + TransitionToWakingUp::Success + } + + pub(crate) fn transition_to_waking_up_unregistered(&self) -> TransitionToWakingUp { + let mut lock = self.entry.state.lock(); + + let old_state = match &*lock { + PrivState::Unregistered(waker) => { + let waker = waker.clone(); + std::mem::replace(&mut *lock, PrivState::WakingUp(waker)) + } + // don't unlock — poisoning the `Mutex` stops others from using the bad state. + state @ (PrivState::Registered(..) + | PrivState::WokenUp + | PrivState::Pending(..) + | PrivState::WakingUp(..)) => { + panic!("corrupted state: {state:#?}") + } + PrivState::Cancelling { .. } => { + // no need to transition cancelling entry + return TransitionToWakingUp::Cancelling; + } + }; + + // unlock before dropping the old waker + drop(lock); + + // this also drops the old waker if the variant contains it. + drop(old_state); + + TransitionToWakingUp::Success + } + pub(crate) fn transition_to_cancelling(&self) { let mut lock = self.entry.state.lock(); @@ -305,6 +426,11 @@ impl Handle { } *lock = PrivState::Cancelling(Cancelling::Pending); } + PrivState::WakingUp(..) => { + // Do nothing, this is because both `WakeQueue` and `CancellationQueue` + // use the same `extra_pointers` field in `Entry`. We cannot put the entry + // into both queues at the same time due to the nature of intrusive linked list. + } PrivState::WokenUp => (), // dropping and waking up happen concurrently // don't unlock — poisoning the `Mutex` stops others from using the bad state. PrivState::Cancelling(..) => panic!("should not be called twice"), @@ -321,6 +447,7 @@ impl Handle { PrivState::Unregistered(_) => State::Unregistered, PrivState::Registered(..) => State::Registered, PrivState::Pending(..) => State::Pending, + PrivState::WakingUp(..) => State::WakingUp, PrivState::WokenUp => State::WokenUp, PrivState::Cancelling(cancelling) => State::Cancelling(*cancelling), } @@ -374,14 +501,36 @@ pub(crate) enum TransitionToPending { Cancelling, } -#[derive(Clone, Copy)] +/// The result of the [`Handle::transition_to_waking_up`]` method. +pub(crate) enum TransitionToWakingUp { + /// The entry was successfully transitioned + /// to the waking up state. + Success, + + /// The entry is being cancelled, + /// no need to transition it to the waking up state. + Cancelling, +} + +/// Public representation of the [`PrivState`] +#[derive(Debug, Clone, Copy)] pub(crate) enum State { + /// Same as [`PrivState::Unregistered`] Unregistered, + + /// Same as [`PrivState::Registered`] Registered, + + /// Same as [`PrivState::Pending`] Pending, + + /// Same as [`PrivState::WakingUp`] + WakingUp, + + /// Same as [`PrivState::WokenUp`] WokenUp, - /// The [`Handle`] has been sent to the [`Sender`]. + /// Same as [`PrivState::Cancelling`] Cancelling(Cancelling), } diff --git a/tokio/src/runtime/time/wheel/mod.rs b/tokio/src/runtime/time/wheel/mod.rs index 6f0706fa433..88405373dcb 100644 --- a/tokio/src/runtime/time/wheel/mod.rs +++ b/tokio/src/runtime/time/wheel/mod.rs @@ -11,11 +11,16 @@ pub(crate) use entry::Handle as EntryHandle; pub(crate) use entry::State as EntryState; use entry::TransitionToPending; use entry::TransitionToRegistered; -use entry::{CancellationQueueEntry, Entry, EntryList}; +pub(crate) use entry::TransitionToWakingUp; +use entry::{CancellationQueueEntry, WakeQueueEntry}; +use entry::{Entry, EntryList}; pub(crate) mod cancellation_queue; use cancellation_queue::Sender; +mod wake_queue; +pub(crate) use wake_queue::WakeQueue; + use std::array; use std::ptr::NonNull; diff --git a/tokio/src/runtime/time/wheel/wake_queue.rs b/tokio/src/runtime/time/wheel/wake_queue.rs new file mode 100644 index 00000000000..006876a3c53 --- /dev/null +++ b/tokio/src/runtime/time/wheel/wake_queue.rs @@ -0,0 +1,70 @@ +use super::{Entry, EntryHandle, EntryState}; +use crate::runtime::time::wheel::WakeQueueEntry; +use crate::util::linked_list; + +type EntryList = linked_list::LinkedList; + +/// A queue of entries that need to be woken up. +#[derive(Debug)] +pub(crate) struct WakeQueue { + list: EntryList, +} + +/// Safety: [`WakeQueue`] is protected by [`Mutex`]. +unsafe impl Send for WakeQueue {} + +/// Safety: [`WakeQueue`] is protected by [`Mutex`]. +unsafe impl Sync for WakeQueue {} + +impl Drop for WakeQueue { + fn drop(&mut self) { + // drain all entries without waking them up + while let Some(hdl) = self.list.pop_front() { + drop(hdl); + } + } +} + +impl WakeQueue { + pub(crate) fn new() -> Self { + Self { + list: EntryList::new(), + } + } + + pub(crate) fn is_empty(&self) -> bool { + self.list.is_empty() + } + + /// # Safety + /// + /// Behavior is undefined if any of the following conditions are violated: + /// + /// - `hdl` must not in any [`super::cancellation_queue`], and also mus not in any [`WakeQueue`]. + pub(crate) unsafe fn push_front(&mut self, hdl: EntryHandle) { + self.list.push_front(hdl); + } + + /// Wakes all entries in the wake queue. + /// + /// # Panics + /// + /// This function panics on any of the following conditions: + /// + /// - The entry state is in-consistent (i.e., `WokenUp` state in the wake queue). + /// - The waker panics while waking the entry. + pub(crate) fn wake_all(mut self) { + while let Some(hdl) = self.list.pop_front() { + match hdl.state() { + EntryState::Unregistered => hdl.wake_unregistered(), + state @ (EntryState::Registered | EntryState::Pending) => { + panic!("corrupted state: {state:#?}"); + } + EntryState::WakingUp => hdl.wake(), + // cancellation happens concurrently, no need to wake + EntryState::Cancelling(_) => (), + EntryState::WokenUp => panic!("corrupted state: woken up entry in wake queue"), + } + } + } +} From 0bd9db2dd1c95be04a0d6f9df707aeb8f5a080fb Mon Sep 17 00:00:00 2001 From: Qi Date: Sun, 16 Nov 2025 23:00:47 +0800 Subject: [PATCH 074/100] remove useless `Send` and `Sync` impl Signed-off-by: ADD-SP --- tokio/src/runtime/time/wheel/cancellation_queue.rs | 14 +------------- tokio/src/runtime/time/wheel/wake_queue.rs | 6 ------ 2 files changed, 1 insertion(+), 19 deletions(-) diff --git a/tokio/src/runtime/time/wheel/cancellation_queue.rs b/tokio/src/runtime/time/wheel/cancellation_queue.rs index eeb54f38a9b..73313b88d3a 100644 --- a/tokio/src/runtime/time/wheel/cancellation_queue.rs +++ b/tokio/src/runtime/time/wheel/cancellation_queue.rs @@ -10,12 +10,6 @@ struct Inner { list: EntryList, } -/// Safety: [`Inner`] is protected by [`Mutex`]. -unsafe impl Send for Inner {} - -/// Safety: [`Inner`] is protected by [`Mutex`]. -unsafe impl Sync for Inner {} - impl Drop for Inner { fn drop(&mut self) { // consume all entries @@ -34,7 +28,7 @@ impl Inner { /// /// Behavior is undefined if any of the following conditions are violated: /// - /// - `hdl` must not in any [`super::cancellation_queue`], and also mus not in any [`WakeQueue`]. + /// - `hdl` must not in any [`super::cancellation_queue`], and also mus not in any [`super::WakeQueue`]. unsafe fn push_front(&mut self, hdl: EntryHandle) { self.list.push_front(hdl); } @@ -71,12 +65,6 @@ pub(crate) struct Sender { inner: Arc>, } -/// Safety: [`Inner`] is protected by [`Mutex`]. -unsafe impl Send for Sender {} - -/// Safety: [`Inner`] is protected by [`Mutex`]. -unsafe impl Sync for Sender {} - impl Sender { /// # Safety /// diff --git a/tokio/src/runtime/time/wheel/wake_queue.rs b/tokio/src/runtime/time/wheel/wake_queue.rs index 006876a3c53..f2e04f009c8 100644 --- a/tokio/src/runtime/time/wheel/wake_queue.rs +++ b/tokio/src/runtime/time/wheel/wake_queue.rs @@ -10,12 +10,6 @@ pub(crate) struct WakeQueue { list: EntryList, } -/// Safety: [`WakeQueue`] is protected by [`Mutex`]. -unsafe impl Send for WakeQueue {} - -/// Safety: [`WakeQueue`] is protected by [`Mutex`]. -unsafe impl Sync for WakeQueue {} - impl Drop for WakeQueue { fn drop(&mut self) { // drain all entries without waking them up From 7e824dcb550d8d436d836ca5caeaeff142aed169 Mon Sep 17 00:00:00 2001 From: Qi Date: Sun, 16 Nov 2025 23:04:27 +0800 Subject: [PATCH 075/100] fixup! eliminate `Option` in `Core` --- tokio/src/runtime/scheduler/current_thread/mod.rs | 5 ++--- tokio/src/runtime/scheduler/multi_thread/worker.rs | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/tokio/src/runtime/scheduler/current_thread/mod.rs b/tokio/src/runtime/scheduler/current_thread/mod.rs index d79c9410e03..0392567c6c2 100644 --- a/tokio/src/runtime/scheduler/current_thread/mod.rs +++ b/tokio/src/runtime/scheduler/current_thread/mod.rs @@ -472,7 +472,7 @@ impl Context { self.defer.wake(); - self.maintain_local_timers_after_parking(handle, auto_advance_duration) + self.maintain_local_timers_after_parking(handle, auto_advance_duration); }); core @@ -631,10 +631,9 @@ impl Context { fn maintain_local_timers_after_parking( &self, - core: Box, _handle: &Handle, _auto_advance_duration: Option - ) -> Box { + ) { core } } // cfg_not_time! diff --git a/tokio/src/runtime/scheduler/multi_thread/worker.rs b/tokio/src/runtime/scheduler/multi_thread/worker.rs index 5353526886e..2f9f6c5f301 100644 --- a/tokio/src/runtime/scheduler/multi_thread/worker.rs +++ b/tokio/src/runtime/scheduler/multi_thread/worker.rs @@ -983,7 +983,7 @@ impl Context { &self, core: Box, _auto_advance_duration: Option - ) -> Box { + ) { core } } // cfg_not_time! From 38b49baeaaaa4c59834bfa272f34c851e023194e Mon Sep 17 00:00:00 2001 From: Qi Date: Sun, 16 Nov 2025 23:15:46 +0800 Subject: [PATCH 076/100] fix error reports of `cargo hack` Signed-off-by: ADD-SP --- tokio/src/runtime/context.rs | 36 ++++++++----------- tokio/src/runtime/mod.rs | 2 -- .../runtime/scheduler/current_thread/mod.rs | 4 +-- .../runtime/scheduler/multi_thread/worker.rs | 5 +-- tokio/src/runtime/time/mod.rs | 2 +- 5 files changed, 18 insertions(+), 31 deletions(-) diff --git a/tokio/src/runtime/context.rs b/tokio/src/runtime/context.rs index 41bcb413f63..d78935e7243 100644 --- a/tokio/src/runtime/context.rs +++ b/tokio/src/runtime/context.rs @@ -136,6 +136,21 @@ pub(crate) fn budget(f: impl FnOnce(&Cell) -> R) -> Result Result { + CONTEXT.try_with(|ctx| { + match ctx.thread_id.get() { + Some(id) => id, + None => { + let id = ThreadId::next(); + ctx.thread_id.set(Some(id)); + id + } + } + }) + } + pub(crate) fn set_current_task_id(id: Option) -> Option { CONTEXT.try_with(|ctx| ctx.current_task_id.replace(id)).unwrap_or(None) } @@ -183,24 +198,3 @@ cfg_rt! { } } } - -cfg_rt_or_time! { - use crate::runtime::ThreadId; - - pub(crate) fn thread_id() -> Result { - #[cfg(not(feature = "rt"))] - panic!("thread_id() called without the 'rt' feature enabled"); - - #[cfg(feature = "rt")] - CONTEXT.try_with(|ctx| { - match ctx.thread_id.get() { - Some(id) => id, - None => { - let id = ThreadId::next(); - ctx.thread_id.set(Some(id)); - id - } - } - }) - } -} diff --git a/tokio/src/runtime/mod.rs b/tokio/src/runtime/mod.rs index f15b8dcdea6..4b808b604f9 100644 --- a/tokio/src/runtime/mod.rs +++ b/tokio/src/runtime/mod.rs @@ -471,9 +471,7 @@ cfg_rt! { /// After thread starts / before thread stops type Callback = std::sync::Arc; -} -cfg_rt_or_time! { mod thread_id; pub(crate) use thread_id::ThreadId; } diff --git a/tokio/src/runtime/scheduler/current_thread/mod.rs b/tokio/src/runtime/scheduler/current_thread/mod.rs index 0392567c6c2..ef6ac718c4e 100644 --- a/tokio/src/runtime/scheduler/current_thread/mod.rs +++ b/tokio/src/runtime/scheduler/current_thread/mod.rs @@ -622,11 +622,10 @@ impl Context { cfg_not_time! { fn maintain_local_timers_before_parking( &self, - core: Box, _handle: &Handle, park_duration: Option ) -> MaintainLocalTimer { - MaintainLocalTimer { core, park_duration, auto_advance_duration: None } + MaintainLocalTimer { park_duration, auto_advance_duration: None } } fn maintain_local_timers_after_parking( @@ -634,7 +633,6 @@ impl Context { _handle: &Handle, _auto_advance_duration: Option ) { - core } } // cfg_not_time! } diff --git a/tokio/src/runtime/scheduler/multi_thread/worker.rs b/tokio/src/runtime/scheduler/multi_thread/worker.rs index 2f9f6c5f301..85b17de81de 100644 --- a/tokio/src/runtime/scheduler/multi_thread/worker.rs +++ b/tokio/src/runtime/scheduler/multi_thread/worker.rs @@ -973,18 +973,15 @@ impl Context { cfg_not_time! { fn maintain_local_timers_before_parking( &self, - core: Box, park_duration: Option ) -> MaintainLocalTimer { - MaintainLocalTimer { core, park_duration, auto_advance_duration: None } + MaintainLocalTimer { park_duration, auto_advance_duration: None } } fn maintain_local_timers_after_parking( &self, - core: Box, _auto_advance_duration: Option ) { - core } } // cfg_not_time! } diff --git a/tokio/src/runtime/time/mod.rs b/tokio/src/runtime/time/mod.rs index a2b8a1939b3..d8ca0f8c262 100644 --- a/tokio/src/runtime/time/mod.rs +++ b/tokio/src/runtime/time/mod.rs @@ -18,12 +18,12 @@ pub(crate) use source::TimeSource; mod wheel; cfg_rt_and_time! { pub(crate) use wheel::{Insert, EntryHandle, EntryState, EntryCancelling}; - pub(crate) use wheel::TransitionToWakingUp as EntryTransitionToWakingUp; } cfg_rt_or_time! { pub(crate) use wheel::cancellation_queue; pub(crate) use wheel::WakeQueue; pub(crate) use wheel::Wheel; + pub(crate) use wheel::TransitionToWakingUp as EntryTransitionToWakingUp; } cfg_test_util! { From e45109f46426c06bcb0cdf76de3c1410b5d27f2f Mon Sep 17 00:00:00 2001 From: Qi Date: Tue, 18 Nov 2025 22:52:23 +0800 Subject: [PATCH 077/100] simplify the `PrivState` Signed-off-by: ADD-SP --- .../runtime/scheduler/current_thread/mod.rs | 27 +- tokio/src/runtime/scheduler/mod.rs | 15 +- .../runtime/scheduler/multi_thread/worker.rs | 27 +- tokio/src/runtime/scheduler/util.rs | 91 ++-- tokio/src/runtime/time/handle.rs | 18 +- tokio/src/runtime/time/mod.rs | 12 +- tokio/src/runtime/time/tests/mod.rs | 49 +- tokio/src/runtime/time/timer.rs | 68 ++- .../time/wheel/cancellation_queue/tests.rs | 2 +- tokio/src/runtime/time/wheel/entry.rs | 449 +++--------------- tokio/src/runtime/time/wheel/mod.rs | 131 ++--- tokio/src/runtime/time/wheel/wake_queue.rs | 13 +- 12 files changed, 309 insertions(+), 593 deletions(-) diff --git a/tokio/src/runtime/scheduler/current_thread/mod.rs b/tokio/src/runtime/scheduler/current_thread/mod.rs index ef6ac718c4e..12c680bdee0 100644 --- a/tokio/src/runtime/scheduler/current_thread/mod.rs +++ b/tokio/src/runtime/scheduler/current_thread/mod.rs @@ -524,13 +524,19 @@ impl Context { let core = maybe_core.as_mut().expect("core missing"); let time_cx = &mut core.time_context; - util::time::remove_cancelled_timers(&mut time_cx.wheel, &mut time_cx.canc_rx); + util::time::process_registration_queue( + &mut time_cx.registration_queue, + &mut time_cx.wheel, + &time_cx.canc_tx, + &mut wake_queue, + ); util::time::insert_inject_timers( &mut time_cx.wheel, &time_cx.canc_tx, handle.take_remote_timers(), &mut wake_queue, ); + util::time::remove_cancelled_timers(&mut time_cx.wheel, &mut time_cx.canc_rx); let should_yield = !wake_queue.is_empty(); let next_timer = @@ -603,15 +609,28 @@ impl Context { f(core.as_mut().map(|c| c.as_mut())) } - pub(crate) fn with_wheel(&self, f: F) -> R + #[cfg(test)] + pub(crate) fn with_time_context2(&self, f: F) -> R + where + F: FnOnce(Option<&mut crate::runtime::time::Context2>) -> R, + { + self.with_core(|maybe_core| { + match maybe_core { + Some(core) => f(Some(&mut core.time_context)), + None => f(None), + } + }) + } + + pub(crate) fn with_registration_queue(&self, f: F) -> R where F: FnOnce(Option>) -> R, { self.with_core(|maybe_core| { match maybe_core { Some(core) => f(Some(crate::runtime::time::Context::Running { - wheel: &mut core.time_context.wheel, - canc_tx: &core.time_context.canc_tx, + registration_queue: &mut core.time_context.registration_queue, + elapsed: core.time_context.wheel.elapsed(), })), None => f(None), } diff --git a/tokio/src/runtime/scheduler/mod.rs b/tokio/src/runtime/scheduler/mod.rs index c65e7582152..16673b32ddb 100644 --- a/tokio/src/runtime/scheduler/mod.rs +++ b/tokio/src/runtime/scheduler/mod.rs @@ -24,7 +24,7 @@ cfg_rt_multi_thread! { pub(crate) use multi_thread::MultiThread; } -mod util; +pub(crate) mod util; use crate::runtime::driver; @@ -277,11 +277,20 @@ cfg_rt! { } cfg_time! { - pub(crate) fn with_wheel(&self, f: F) -> R + pub(crate) fn with_registration_queue(&self, f: F) -> R where F: FnOnce(Option>) -> R, { - match_flavor!(self, Context(context) => context.with_wheel(f)) + match_flavor!(self, Context(context) => context.with_registration_queue(f)) + } + + + #[cfg(test)] + pub(crate) fn with_time_context2(&self, f: F) -> R + where + F: FnOnce(Option<&mut crate::runtime::time::Context2>) -> R, + { + match_flavor!(self, Context(context) => context.with_time_context2(f)) } } diff --git a/tokio/src/runtime/scheduler/multi_thread/worker.rs b/tokio/src/runtime/scheduler/multi_thread/worker.rs index 85b17de81de..af3bdee78b3 100644 --- a/tokio/src/runtime/scheduler/multi_thread/worker.rs +++ b/tokio/src/runtime/scheduler/multi_thread/worker.rs @@ -874,13 +874,19 @@ impl Context { let core = maybe_core.as_mut().expect("core missing"); let time_cx = &mut core.time_context; - util::time::remove_cancelled_timers(&mut time_cx.wheel, &mut time_cx.canc_rx); + util::time::process_registration_queue( + &mut time_cx.registration_queue, + &mut time_cx.wheel, + &time_cx.canc_tx, + &mut wake_queue, + ); util::time::insert_inject_timers( &mut time_cx.wheel, &time_cx.canc_tx, handle.take_remote_timers(), &mut wake_queue, ); + util::time::remove_cancelled_timers(&mut time_cx.wheel, &mut time_cx.canc_rx); let should_yield = !wake_queue.is_empty(); let next_timer = @@ -953,7 +959,20 @@ impl Context { } } - pub(crate) fn with_wheel(&self, f: F) -> R + #[cfg(test)] + pub(crate) fn with_time_context2(&self, f: F) -> R + where + F: FnOnce(Option<&mut crate::runtime::time::Context2>) -> R, + { + self.with_core(|maybe_core| { + match maybe_core { + Some(core) => f(Some(&mut core.time_context)), + None => f(None), + } + }) + } + + pub(crate) fn with_registration_queue(&self, f: F) -> R where F: FnOnce(Option>) -> R, { @@ -961,8 +980,8 @@ impl Context { match maybe_core { Some(core) if core.is_shutdown => f(Some(crate::runtime::time::Context::Shutdown)), Some(core) => f(Some(crate::runtime::time::Context::Running { - wheel: &mut core.time_context.wheel, - canc_tx: &core.time_context.canc_tx, + registration_queue: &mut core.time_context.registration_queue, + elapsed: core.time_context.wheel.elapsed(), })), None => f(None), } diff --git a/tokio/src/runtime/scheduler/util.rs b/tokio/src/runtime/scheduler/util.rs index 7af68367034..d7bdba76279 100644 --- a/tokio/src/runtime/scheduler/util.rs +++ b/tokio/src/runtime/scheduler/util.rs @@ -2,9 +2,9 @@ cfg_rt_and_time! { pub(crate) mod time { use crate::runtime::{scheduler::driver}; use crate::runtime::time::{Wheel, WakeQueue}; - use crate::runtime::time::{EntryHandle, EntryState, EntryCancelling}; + use crate::runtime::time::EntryHandle; + use crate::runtime::time::RegistrationQueue; use crate::runtime::time::cancellation_queue::{Sender, Receiver}; - use crate::runtime::time::EntryTransitionToWakingUp; use std::time::Duration; pub(crate) fn min_duration(a: Option, b: Option) -> Option { @@ -16,35 +16,42 @@ cfg_rt_and_time! { } } + pub(crate) fn process_registration_queue( + registration_queue: &mut RegistrationQueue, + wheel: &mut Wheel, + tx: &Sender, + wake_queue: &mut WakeQueue, + ) { + while let Some(hdl) = registration_queue.pop_front() { + if hdl.deadline() <= wheel.elapsed() { + unsafe { + wake_queue.push_front(hdl); + } + } else { + // Safety: the entry is not registered yet + unsafe { + wheel.insert(hdl, tx.clone()); + } + } + } + } + pub(crate) fn insert_inject_timers( wheel: &mut Wheel, tx: &Sender, inject: Vec, wake_queue: &mut WakeQueue, ) { - use crate::runtime::time::Insert; - - // process injected timers for hdl in inject { - match unsafe { wheel.insert(hdl.clone(), tx.clone()) } { - Insert::Success => {} - Insert::Elapsed => { - match hdl.transition_to_waking_up_unregistered() { - EntryTransitionToWakingUp::Success => { - // Safety: - // - // 1. this entry is not in the timer wheel - // 2. AND this entry is not in any cancellation queue - unsafe { - wake_queue.push_front(hdl); - } - } - EntryTransitionToWakingUp::Cancelling => { - // cancellation happens concurrently, no need to wake - } - } + if hdl.deadline() <= wheel.elapsed() { + unsafe { + wake_queue.push_front(hdl); + } + } else { + // Safety: the entry is not registered yet + unsafe { + wheel.insert(hdl, tx.clone()); } - Insert::Cancelling => {} } } } @@ -54,24 +61,13 @@ cfg_rt_and_time! { rx: &mut Receiver, ) { for hdl in rx.recv_all() { - match hdl.state() { - // INVARIANT: the state always be transitioned to Cancelling before being sent to cancellation queue - state @ (EntryState::Unregistered | EntryState::Registered | EntryState::Pending | EntryState::WakingUp) => { - panic!("corrupted state: {state:#?}"); - } - EntryState::Cancelling(cancelling) => match cancelling { - EntryCancelling::Unregistered => (), - EntryCancelling::Registered | EntryCancelling::Pending => { - // Safety: - // 1. entry is either in slot or pending list - // 2. `rx` ensures that the entry is registered in this thread. - unsafe { - wheel.remove(hdl); - } - } + debug_assert!(hdl.is_cancelled()); + + if hdl.deadline() > wheel.elapsed() { + // Safety: the entry is registered in THIS wheel + unsafe { + wheel.remove(hdl); } - // INVARIANT: the state always be transitioned to Cancelling before being sent to cancellation queue - EntryState::WokenUp => panic!("corrupted state: `EntryState::WokenUp`"), } } } @@ -196,18 +192,9 @@ cfg_rt_and_time! { let mut wake_queue = WakeQueue::new(); // simply wake all unregistered timers for hdl in inject { - match hdl.transition_to_waking_up_unregistered() { - EntryTransitionToWakingUp::Success => { - // Safety: - // - // 1. this entry is not in the timer wheel - // 2. AND this entry is not in any cancellation queue - unsafe { - wake_queue.push_front(hdl); - } - } - EntryTransitionToWakingUp::Cancelling => { - // cancellation happens concurrently, no need to wake + if !hdl.is_cancelled() { + unsafe { + wake_queue.push_front(hdl); } } } diff --git a/tokio/src/runtime/time/handle.rs b/tokio/src/runtime/time/handle.rs index 164e5b6c209..111d46089d0 100644 --- a/tokio/src/runtime/time/handle.rs +++ b/tokio/src/runtime/time/handle.rs @@ -1,4 +1,3 @@ -use crate::runtime::time::EntryTransitionToWakingUp; use crate::runtime::time::{TimeSource, WakeQueue, Wheel}; use std::fmt; @@ -38,22 +37,7 @@ impl Handle { now = wheel.elapsed(); } - while let Some(hdl) = wheel.poll(now) { - match hdl.transition_to_waking_up() { - EntryTransitionToWakingUp::Success => { - // Safety: - // - // 1. this entry is not in the timer wheel - // 2. AND this entry is not in any cancellation queue - unsafe { - wake_queue.push_front(hdl); - } - } - EntryTransitionToWakingUp::Cancelling => { - // cancellation happens concurrently, no need to wake - } - } - } + wheel.take_expired(now, wake_queue); } pub(crate) fn shutdown(&self, wheel: &mut Wheel) { diff --git a/tokio/src/runtime/time/mod.rs b/tokio/src/runtime/time/mod.rs index d8ca0f8c262..5ebca09393e 100644 --- a/tokio/src/runtime/time/mod.rs +++ b/tokio/src/runtime/time/mod.rs @@ -17,13 +17,13 @@ pub(crate) use source::TimeSource; mod wheel; cfg_rt_and_time! { - pub(crate) use wheel::{Insert, EntryHandle, EntryState, EntryCancelling}; + pub(crate) use wheel::EntryHandle; } cfg_rt_or_time! { pub(crate) use wheel::cancellation_queue; + pub(crate) use wheel::RegistrationQueue; pub(crate) use wheel::WakeQueue; pub(crate) use wheel::Wheel; - pub(crate) use wheel::TransitionToWakingUp as EntryTransitionToWakingUp; } cfg_test_util! { @@ -144,10 +144,8 @@ cfg_rt_or_time! { pub(crate) enum Context<'a> { /// The runtime is running, we can access it. Running { - /// the local time wheel - wheel: &'a mut Wheel, - /// channel to push timers that are pending cancellation - canc_tx: &'a cancellation_queue::Sender, + registration_queue: &'a mut RegistrationQueue, + elapsed: u64, }, #[cfg(feature = "rt-multi-thread")] /// The runtime is shutting down, no timers can be registered. @@ -158,6 +156,7 @@ cfg_rt_or_time! { /// fire/cancel timers. pub(crate) struct Context2 { pub(crate) wheel: Wheel, + pub(crate) registration_queue: RegistrationQueue, pub(crate) canc_tx: cancellation_queue::Sender, pub(crate) canc_rx: cancellation_queue::Receiver, } @@ -167,6 +166,7 @@ cfg_rt_or_time! { let (canc_tx, canc_rx) = cancellation_queue::new(); Self { wheel: Wheel::new(), + registration_queue: RegistrationQueue::new(), canc_tx, canc_rx, } diff --git a/tokio/src/runtime/time/tests/mod.rs b/tokio/src/runtime/time/tests/mod.rs index 981002779f7..9259de3ce4b 100644 --- a/tokio/src/runtime/time/tests/mod.rs +++ b/tokio/src/runtime/time/tests/mod.rs @@ -7,8 +7,8 @@ use std::{task::Context, time::Duration}; use futures::task::noop_waker_ref; use crate::loom::thread; -use crate::runtime::time::timer::with_current_wheel; -use crate::runtime::time::Context as TimeContext; +use crate::runtime::scheduler::util::time::process_registration_queue; +use crate::runtime::time::timer::with_current_time_context2; use crate::runtime::time::WakeQueue; use crate::runtime::Handle; use crate::sync::oneshot; @@ -51,14 +51,18 @@ async fn fire_all_timers(handle: &Handle, exit_rx: oneshot::Receiver<()>) { // In the `block_on` context, we can get the current wheel // fire all timers. - with_current_wheel(&handle.inner, |maybe_wheel| match maybe_wheel { - Some(TimeContext::Running { wheel, .. }) => { - let time = handle.inner.driver().time(); - time.process_at_time(wheel, u64::MAX, &mut wake_queue); - } - #[cfg(feature = "rt-multi-thread")] - Some(TimeContext::Shutdown) => panic!("runtime is shutting down"), - None => panic!("no current wheel"), + with_current_time_context2(&handle.inner, |maybe_time_cx2| { + let time_cx2 = maybe_time_cx2.unwrap(); + + process_registration_queue( + &mut time_cx2.registration_queue, + &mut time_cx2.wheel, + &time_cx2.canc_tx, + &mut wake_queue, + ); + + let time = handle.inner.driver().time(); + time.process_at_time(&mut time_cx2.wheel, u64::MAX, &mut wake_queue); }); wake_queue.wake_all(); @@ -70,19 +74,22 @@ async fn fire_all_timers(handle: &Handle, exit_rx: oneshot::Receiver<()>) { // This function must be called inside the `rt.block_on`. fn process_at_time(handle: &Handle, at: u64) { let handle = &handle.inner; - let mut wake_queue = WakeQueue::new(); - with_current_wheel(handle, |maybe_wheel| match maybe_wheel { - Some(TimeContext::Running { wheel, .. }) => { - let time = handle.driver().time(); - time.process_at_time(wheel, at, &mut wake_queue); - } - #[cfg(feature = "rt-multi-thread")] - Some(TimeContext::Shutdown) => panic!("runtime is shutting down"), - None => panic!("no current wheel"), - }); + with_current_time_context2(handle, |maybe_time_cx2| { + let time_cx2 = maybe_time_cx2.unwrap(); - wake_queue.wake_all(); + let mut wake_queue = WakeQueue::new(); + process_registration_queue( + &mut time_cx2.registration_queue, + &mut time_cx2.wheel, + &time_cx2.canc_tx, + &mut wake_queue, + ); + + let time = handle.driver().time(); + time.process_at_time(&mut time_cx2.wheel, at, &mut wake_queue); + wake_queue.wake_all(); + }); } fn rt(start_paused: bool) -> crate::runtime::Runtime { diff --git a/tokio/src/runtime/time/timer.rs b/tokio/src/runtime/time/timer.rs index 01a50803b2d..2d5bde725a3 100644 --- a/tokio/src/runtime/time/timer.rs +++ b/tokio/src/runtime/time/timer.rs @@ -1,6 +1,5 @@ use super::wheel::EntryHandle; use crate::runtime::scheduler::Handle as SchedulerHandle; -use crate::runtime::time::wheel::Insert; use crate::runtime::time::Context as TimeContext; use crate::time::Instant; @@ -34,7 +33,7 @@ impl std::fmt::Debug for Timer { impl Drop for Timer { fn drop(&mut self) { if let Some(entry) = self.entry.take() { - entry.transition_to_cancelling(); + entry.cancel(); } } } @@ -62,25 +61,31 @@ impl Timer { fn register(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<()> { let this = self.get_mut(); - with_current_wheel(&this.sched_handle, |maybe_time_cx| { + with_current_registration_queue(&this.sched_handle, |maybe_time_cx| { let deadline = deadline_to_tick(&this.sched_handle, this.deadline); - let hdl = EntryHandle::new(deadline, cx.waker()); match maybe_time_cx { - Some(TimeContext::Running { wheel, canc_tx }) => { - // Safety: the entry is not registered yet - match unsafe { wheel.insert(hdl.clone(), canc_tx.clone()) } { - Insert::Success => { - this.entry = Some(hdl); - Poll::Pending - } - Insert::Elapsed => Poll::Ready(()), - Insert::Cancelling => Poll::Pending, + Some(TimeContext::Running { + registration_queue: _, + elapsed, + }) if deadline <= elapsed => Poll::Ready(()), + + Some(TimeContext::Running { + registration_queue, + elapsed: _, + }) => { + let hdl = EntryHandle::new(deadline, cx.waker().clone()); + this.entry = Some(hdl.clone()); + unsafe { + registration_queue.push_front(hdl); } + Poll::Pending } #[cfg(feature = "rt-multi-thread")] Some(TimeContext::Shutdown) => panic!("{RUNTIME_SHUTTING_DOWN_ERROR}"), + _ => { + let hdl = EntryHandle::new(deadline, cx.waker().clone()); this.entry = Some(hdl.clone()); push_from_remote(&this.sched_handle, hdl); Poll::Pending @@ -93,7 +98,7 @@ impl Timer { match self.entry.as_ref() { Some(entry) if entry.is_woken_up() => Poll::Ready(()), Some(entry) => { - entry.register_waker(cx.waker()); + entry.register_waker(cx.waker().clone()); Poll::Pending } None => self.register(cx), @@ -101,7 +106,7 @@ impl Timer { } } -pub(super) fn with_current_wheel(hdl: &SchedulerHandle, f: F) -> R +fn with_current_registration_queue(hdl: &SchedulerHandle, f: F) -> R where F: FnOnce(Option>) -> R, { @@ -124,7 +129,38 @@ where f(None) } else { context::with_scheduler(|maybe_cx| match maybe_cx { - Some(cx) => cx.with_wheel(f), + Some(cx) => cx.with_registration_queue(f), + None => f(None), + }) + } + } +} + +#[cfg(test)] +pub(super) fn with_current_time_context2(hdl: &SchedulerHandle, f: F) -> R +where + F: FnOnce(Option<&mut crate::runtime::time::Context2>) -> R, +{ + #[cfg(not(feature = "rt"))] + { + let (_, _) = (hdl, f); + panic!("Tokio runtime is not enabled, cannot access the current wheel"); + } + + #[cfg(feature = "rt")] + { + use crate::runtime::context; + + let is_same_rt = + context::with_current(|cur_hdl| cur_hdl.is_same_runtime(hdl)).unwrap_or_default(); + + if !is_same_rt { + // We don't want to create the timer in one runtime, + // but register it in a different runtime's timer wheel. + f(None) + } else { + context::with_scheduler(|maybe_cx| match maybe_cx { + Some(cx) => cx.with_time_context2(f), None => f(None), }) } diff --git a/tokio/src/runtime/time/wheel/cancellation_queue/tests.rs b/tokio/src/runtime/time/wheel/cancellation_queue/tests.rs index 17b426e23de..79eb05a2955 100644 --- a/tokio/src/runtime/time/wheel/cancellation_queue/tests.rs +++ b/tokio/src/runtime/time/wheel/cancellation_queue/tests.rs @@ -9,7 +9,7 @@ const NUM_ITEMS: usize = 16; const NUM_ITEMS: usize = 64; fn new_handle() -> EntryHandle { - EntryHandle::new(0, &noop_waker()) + EntryHandle::new(0, noop_waker()) } fn model(f: F) { diff --git a/tokio/src/runtime/time/wheel/entry.rs b/tokio/src/runtime/time/wheel/entry.rs index a7c26284c1d..204aae73728 100644 --- a/tokio/src/runtime/time/wheel/entry.rs +++ b/tokio/src/runtime/time/wheel/entry.rs @@ -9,41 +9,11 @@ use std::task::Waker; pub(crate) type EntryList = linked_list::LinkedList; #[derive(Debug)] -enum PrivState { - /// A pure new entry, and it MIGHT be in the inject timer queue. - Unregistered(Waker), - - /// The [`Entry`] is registered in the timer wheel, - /// - /// - The [`Entry::wheel_pointers`] is currently in use by [`super::Level::slot`]. - /// - The [`Entry::extra_pointers`] is NOT currently in use. - Registered(Sender, Waker), - - /// The [`Entry`] is expired AND in the [`super::Wheel::pending`], - /// - /// - The [`Entry::wheel_pointers`] is currently in use by [`super::Wheel::pending`]. - /// - The [`Entry::extra_pointers`] is NOT currently in use. - Pending(Sender, Waker), - - /// The [`Entry`] is taken out from the [`super::Wheel::pending`] and in - /// the [`super::WakeQueue`], - /// - /// - The [`Entry::wheel_pointers`] is NOT currently in use. - /// - The [`Entry::extra_pointers`] is currently in use by [`super::WakeQueue`]. - WakingUp(Waker), - - /// The waker has been called, and the entry is not in timer wheel, and not in cancellation queue, - /// and also not in wake queue. - /// - /// - The [`Entry::wheel_pointers`] is NOT currently in use. - /// - The [`Entry::extra_pointers`] is NOT currently in use. - WokenUp, - - /// The [`Entry`] is in the cancellation queue, - /// - /// - The [`Entry::wheel_pointers`] is MAYBE in use by [`super::Level::slot`] or [`super::Wheel::pending`]. - /// - The [`Entry::extra_pointers`] is currently in use by [`super::cancellation_queue`]. - Cancelling(Cancelling), +struct State { + cancelled: bool, + woken_up: bool, + waker: Option, + cancel_tx: Option, } #[derive(Debug)] @@ -58,7 +28,7 @@ pub(crate) struct Entry { /// The tick when this entry is scheduled to expire. deadline: u64, - state: Mutex, + state: Mutex, /// Make the type `!Unpin` to prevent LLVM from emitting /// the `noalias` attribute for mutable references. @@ -91,6 +61,37 @@ unsafe impl linked_list::Link for Entry { } } +/// An ZST to allow [`super::registration_queue`] to utilize the [`Entry::extra_pointers`] +/// by impl [`linked_list::Link`] as we cannot impl it on [`Entry`] +/// directly due to the conflicting implementations used by [`Entry::wheel_pointers`]. +/// +/// This type should never be constructed. +pub(super) struct RegistrationQueueEntry; + +// Safety: `Entry` is always in an `Arc`. +unsafe impl linked_list::Link for RegistrationQueueEntry { + type Handle = Handle; + type Target = Entry; + + fn as_raw(hdl: &Self::Handle) -> NonNull { + unsafe { NonNull::new_unchecked(Arc::as_ptr(&hdl.entry).cast_mut()) } + } + + unsafe fn from_raw(ptr: NonNull) -> Self::Handle { + Handle { + entry: unsafe { Arc::from_raw(ptr.as_ptr()) }, + } + } + + unsafe fn pointers( + target: NonNull, + ) -> NonNull> { + let this = target.as_ptr(); + let field = unsafe { std::ptr::addr_of_mut!((*this).extra_pointers) }; + unsafe { NonNull::new_unchecked(field) } + } +} + /// An ZST to allow [`super::cancellation_queue`] to utilize the [`Entry::extra_pointers`] /// by impl [`linked_list::Link`] as we cannot impl it on [`Entry`] /// directly due to the conflicting implementations used by [`Entry::wheel_pointers`]. @@ -166,12 +167,19 @@ impl From<&Handle> for NonNull { } impl Handle { - pub(crate) fn new(deadline: u64, waker: &Waker) -> Self { + pub(crate) fn new(deadline: u64, waker: Waker) -> Self { + let state = State { + cancelled: false, + woken_up: false, + waker: Some(waker), + cancel_tx: None, + }; + let entry = Arc::new(Entry { wheel_pointers: linked_list::Pointers::new(), extra_pointers: linked_list::Pointers::new(), deadline, - state: Mutex::new(PrivState::Unregistered(waker.clone())), + state: Mutex::new(state), _pin: PhantomPinned, }); @@ -181,259 +189,49 @@ impl Handle { /// Wake the entry if it is already in the pending queue of the timer wheel. pub(crate) fn wake(&self) { let mut lock = self.entry.state.lock(); - match &*lock { - // don't unlock — poisoning the `Mutex` stops others from using the bad state. - state @ (PrivState::Unregistered(..) - | PrivState::Registered(..) - | PrivState::Pending(..)) => { - panic!("corrupted state: {state:#?}") - } - PrivState::WakingUp(..) => { - let old_state = std::mem::replace(&mut *lock, PrivState::WokenUp); - // Since state has been updated, no need to hold the lock. - drop(lock); - if let PrivState::WakingUp(waker) = old_state { - waker.wake(); - } else { - unreachable!() - } - } + + if !lock.cancelled { + lock.woken_up = true; // don't unlock — poisoning the `Mutex` stops others from using the bad state. - PrivState::WokenUp => panic!("corrupted state: `State::WokenUp`"), - // no need to wake up cancelling entry - PrivState::Cancelling { .. } => (), + let waker = lock + .waker + .take() + .expect("waker must be present when waking up"); + // unlock before calling waker + drop(lock); + waker.wake(); } } - /// Wake the entry if it has already elapsed before registering to the timer wheel. - pub(crate) fn wake_unregistered(&self) { + pub(crate) fn register_cancel_tx(&self, cancel_tx: Sender) { let mut lock = self.entry.state.lock(); - match &*lock { - PrivState::Unregistered(_waker) => { - let old_state = std::mem::replace(&mut *lock, PrivState::WokenUp); - // Since state has been updated, no need to hold the lock. - drop(lock); - if let PrivState::Unregistered(old_waker) = old_state { - old_waker.wake(); - } else { - unreachable!() - } - } + if !lock.cancelled && !lock.woken_up { // don't unlock — poisoning the `Mutex` stops others from using the bad state. - state @ (PrivState::Registered(..) - | PrivState::WokenUp - | PrivState::Pending(..) - | PrivState::WakingUp(..)) => { - panic!("corrupted state: {state:#?}") - } - // don't wake up cancelling entries - PrivState::Cancelling { .. } => (), + assert!( + lock.cancel_tx.replace(cancel_tx).is_none(), + "cancel_tx is already registered" + ); } } - pub(crate) fn register_waker(&self, waker: &Waker) { + pub(crate) fn register_waker(&self, waker: Waker) { let mut lock = self.entry.state.lock(); - let old_waker = match &mut *lock { - PrivState::Unregistered(old_waker) => { - if !old_waker.will_wake(waker) { - Some(std::mem::replace(old_waker, waker.clone())) - } else { - None - } - } - PrivState::Registered(_, old_waker) => { - if !old_waker.will_wake(waker) { - Some(std::mem::replace(old_waker, waker.clone())) - } else { - None - } - } - PrivState::Pending(_, old_waker, ..) => { - if !old_waker.will_wake(waker) { - Some(std::mem::replace(old_waker, waker.clone())) - } else { - None - } - } - PrivState::WakingUp(old_waker) => { - if !old_waker.will_wake(waker) { - Some(std::mem::replace(old_waker, waker.clone())) - } else { - None - } - } - PrivState::WokenUp | PrivState::Cancelling { .. } => None, // no need to update the waker - }; - - // unlock before dropping the old waker - drop(lock); - drop(old_waker); - } - - pub(crate) fn transition_to_registered(&self, cancel_tx: Sender) -> TransitionToRegistered { - let mut lock = self.entry.state.lock(); - let state: &mut PrivState = &mut lock; - - let (new_state, ret) = match state { - PrivState::Unregistered(waker) => { - let new_state = PrivState::Registered(cancel_tx, waker.clone()); - (Some(new_state), TransitionToRegistered::Success) - } - // don't unlock — poisoning the `Mutex` stops others from using the bad state. - state @ (PrivState::Registered(..) - | PrivState::Pending(..) - | PrivState::WakingUp(..) - | PrivState::WokenUp) => { - panic!("corrupted state: {state:#?}") - } - PrivState::Cancelling(cancelling) => match cancelling { - Cancelling::Unregistered => (None, TransitionToRegistered::Cancelling), - Cancelling::Registered | Cancelling::Pending => unreachable!(), - }, - }; - - if let Some(new_state) = new_state { - // update the state and take back the old state - let old_state = std::mem::replace(state, new_state); - - // unlock before dropping the old waker + if !lock.cancelled && !lock.woken_up { + let maybe_old_waker = lock.waker.replace(waker); + // unlock before calling waker drop(lock); - - // this also drops the old waker if the variant contains it. - drop(old_state); - } - - ret - } - - pub(crate) fn transition_to_pending(&self, not_after: u64) -> TransitionToPending { - if self.entry.deadline > not_after { - return TransitionToPending::NotElapsed(self.entry.deadline); + drop(maybe_old_waker); } - - let mut lock = self.entry.state.lock(); - let state: &mut PrivState = &mut lock; - - let (new_state, ret) = match state { - // don't unlock — poisoning the `Mutex` stops others from using the bad state. - PrivState::Unregistered(_) => panic!("corrupted state: State::Unregistered"), - PrivState::Registered(sender, waker) => { - let new_state = PrivState::Pending(sender.clone(), waker.clone()); - (new_state, TransitionToPending::Success) - } - // don't unlock — poisoning the `Mutex` stops others from using the bad state. - state @ (PrivState::Pending(..) | PrivState::WakingUp(..) | PrivState::WokenUp) => { - panic!("corrupted state: {state:#?}") - } - PrivState::Cancelling { .. } => { - let new_state = PrivState::Cancelling(Cancelling::Pending); - (new_state, TransitionToPending::Cancelling) - } - }; - - // update the state and take back the old state - let old_state = std::mem::replace(state, new_state); - - // unlock before dropping the old waker - drop(lock); - - // this also drops the old waker if the variant contains it. - drop(old_state); - - ret - } - - pub(crate) fn transition_to_waking_up(&self) -> TransitionToWakingUp { - let mut lock = self.entry.state.lock(); - - let old_state = match &*lock { - // don't unlock — poisoning the `Mutex` stops others from using the bad state. - state @ (PrivState::Unregistered(..) | PrivState::Registered(..)) => { - panic!("corrupted state: {state:#?}") - } - PrivState::Pending(_cancel_tx, waker) => { - let new_state = PrivState::WakingUp(waker.clone()); - std::mem::replace(&mut *lock, new_state) - } - // don't unlock — poisoning the `Mutex` stops others from using the bad state. - state @ (PrivState::WakingUp(..) | PrivState::WokenUp) => { - panic!("corrupted state: {state:#?}") - } - PrivState::Cancelling { .. } => { - // no need to transition cancelling entry - return TransitionToWakingUp::Cancelling; - } - }; - - // unlock before dropping the old waker - drop(lock); - - // this also drops the old waker if the variant contains it. - drop(old_state); - - TransitionToWakingUp::Success - } - - pub(crate) fn transition_to_waking_up_unregistered(&self) -> TransitionToWakingUp { - let mut lock = self.entry.state.lock(); - - let old_state = match &*lock { - PrivState::Unregistered(waker) => { - let waker = waker.clone(); - std::mem::replace(&mut *lock, PrivState::WakingUp(waker)) - } - // don't unlock — poisoning the `Mutex` stops others from using the bad state. - state @ (PrivState::Registered(..) - | PrivState::WokenUp - | PrivState::Pending(..) - | PrivState::WakingUp(..)) => { - panic!("corrupted state: {state:#?}") - } - PrivState::Cancelling { .. } => { - // no need to transition cancelling entry - return TransitionToWakingUp::Cancelling; - } - }; - - // unlock before dropping the old waker - drop(lock); - - // this also drops the old waker if the variant contains it. - drop(old_state); - - TransitionToWakingUp::Success } - pub(crate) fn transition_to_cancelling(&self) { + pub(crate) fn cancel(&self) { let mut lock = self.entry.state.lock(); - - match *lock { - // don't unlock — poisoning the `Mutex` stops others from using the bad state. - PrivState::Unregistered(_) => { - *lock = PrivState::Cancelling(Cancelling::Unregistered); - } - PrivState::Registered(ref tx, _) => { - // Safety: entry is not in any cancellation queue - unsafe { - tx.send(self.clone()); - } - *lock = PrivState::Cancelling(Cancelling::Registered); - } - PrivState::Pending(ref tx, _) => { - // Safety: entry is not in any cancellation queue - unsafe { - tx.send(self.clone()); - } - *lock = PrivState::Cancelling(Cancelling::Pending); - } - PrivState::WakingUp(..) => { - // Do nothing, this is because both `WakeQueue` and `CancellationQueue` - // use the same `extra_pointers` field in `Entry`. We cannot put the entry - // into both queues at the same time due to the nature of intrusive linked list. + lock.cancelled = true; + if let Some(cancel_tx) = lock.cancel_tx.take() { + drop(lock); + unsafe { + cancel_tx.send(self.clone()); } - PrivState::WokenUp => (), // dropping and waking up happen concurrently - // don't unlock — poisoning the `Mutex` stops others from using the bad state. - PrivState::Cancelling(..) => panic!("should not be called twice"), } } @@ -441,32 +239,14 @@ impl Handle { self.entry.deadline } - pub(crate) fn state(&self) -> State { + pub(crate) fn is_woken_up(&self) -> bool { let lock = self.entry.state.lock(); - match &*lock { - PrivState::Unregistered(_) => State::Unregistered, - PrivState::Registered(..) => State::Registered, - PrivState::Pending(..) => State::Pending, - PrivState::WakingUp(..) => State::WakingUp, - PrivState::WokenUp => State::WokenUp, - PrivState::Cancelling(cancelling) => State::Cancelling(*cancelling), - } + lock.woken_up } - pub(crate) fn is_pending(&self) -> bool { - match self.state() { - State::Pending => true, - State::Cancelling(cancelling) => match cancelling { - Cancelling::Unregistered => unreachable!(), - Cancelling::Registered => false, - Cancelling::Pending => true, - }, - _ => false, - } - } - - pub(crate) fn is_woken_up(&self) -> bool { - matches!(*self.entry.state.lock(), PrivState::WokenUp) + pub(crate) fn is_cancelled(&self) -> bool { + let lock = self.entry.state.lock(); + lock.cancelled } #[cfg(test)] @@ -475,76 +255,3 @@ impl Handle { Arc::strong_count(&self.entry) } } - -/// The result of the [`Handle::transition_to_registered`]` method. -pub(crate) enum TransitionToRegistered { - /// The entry is being cancelled, no need to register it. - Success, - - /// The entry is being cancelled, - /// no need to transition it to the registered state. - Cancelling, -} - -/// The result of the [`Handle::transition_to_pending`]` method. -pub(crate) enum TransitionToPending { - /// The entry was successfully transitioned - /// to the pending state. - Success, - - /// The entry doesn't reached its deadline yet, - /// and the tick when it should be woken up is returned. - NotElapsed(u64), - - /// The entry is being cancelled, - /// no need to transition it to the pending state. - Cancelling, -} - -/// The result of the [`Handle::transition_to_waking_up`]` method. -pub(crate) enum TransitionToWakingUp { - /// The entry was successfully transitioned - /// to the waking up state. - Success, - - /// The entry is being cancelled, - /// no need to transition it to the waking up state. - Cancelling, -} - -/// Public representation of the [`PrivState`] -#[derive(Debug, Clone, Copy)] -pub(crate) enum State { - /// Same as [`PrivState::Unregistered`] - Unregistered, - - /// Same as [`PrivState::Registered`] - Registered, - - /// Same as [`PrivState::Pending`] - Pending, - - /// Same as [`PrivState::WakingUp`] - WakingUp, - - /// Same as [`PrivState::WokenUp`] - WokenUp, - - /// Same as [`PrivState::Cancelling`] - Cancelling(Cancelling), -} - -#[derive(Debug, Clone, Copy)] -/// Possible variants of the [`State::Cancelling`] -pub(crate) enum Cancelling { - /// [`Entry`] is being cancelled, and is not in the timer wheel. - Unregistered, - - /// [`Entry`] is being cancelled, and is registered in the timer wheel, - /// but not in the pending list. - Registered, - - /// [`Entry`] is being cancelled, and it registered in the timer wheel, - /// and also in the pending list. - Pending, -} diff --git a/tokio/src/runtime/time/wheel/mod.rs b/tokio/src/runtime/time/wheel/mod.rs index 88405373dcb..aaee46324ac 100644 --- a/tokio/src/runtime/time/wheel/mod.rs +++ b/tokio/src/runtime/time/wheel/mod.rs @@ -4,17 +4,13 @@ pub(crate) use self::level::Expiration; use self::level::Level; mod entry; -cfg_rt_and_time! { - pub(crate) use entry::Cancelling as EntryCancelling; -} pub(crate) use entry::Handle as EntryHandle; -pub(crate) use entry::State as EntryState; -use entry::TransitionToPending; -use entry::TransitionToRegistered; -pub(crate) use entry::TransitionToWakingUp; -use entry::{CancellationQueueEntry, WakeQueueEntry}; +use entry::{CancellationQueueEntry, RegistrationQueueEntry, WakeQueueEntry}; use entry::{Entry, EntryList}; +mod registration_queue; +pub(crate) use registration_queue::RegistrationQueue; + pub(crate) mod cancellation_queue; use cancellation_queue::Sender; @@ -22,7 +18,6 @@ mod wake_queue; pub(crate) use wake_queue::WakeQueue; use std::array; -use std::ptr::NonNull; /// Timing wheel implementation. /// @@ -51,9 +46,6 @@ pub(crate) struct Wheel { /// * ~ 4 hr slots / ~ 12 day range /// * ~ 12 day slots / ~ 2 yr range levels: Box<[Level; NUM_LEVELS]>, - - /// Entries queued for firing - pending: EntryList, } /// Number of levels. Each level has 64 slots. By using 6 levels with 64 slots @@ -70,7 +62,6 @@ impl Wheel { Wheel { elapsed: 0, levels: Box::new(array::from_fn(Level::new)), - pending: EntryList::new(), } } @@ -91,33 +82,25 @@ impl Wheel { /// The caller must ensure: /// /// * The entry is not already registered in ANY wheel. - pub(crate) unsafe fn insert(&mut self, hdl: EntryHandle, cancel_tx: Sender) -> Insert { + pub(crate) unsafe fn insert(&mut self, hdl: EntryHandle, cancel_tx: Sender) { let deadline = hdl.deadline(); - if deadline <= self.elapsed { - return Insert::Elapsed; - } + assert!(deadline > self.elapsed); + + hdl.register_cancel_tx(cancel_tx); // Get the level at which the entry should be stored let level = self.level_for(deadline); - - match hdl.transition_to_registered(cancel_tx) { - TransitionToRegistered::Success => { - unsafe { - self.levels[level].add_entry(hdl); - } - - debug_assert!({ - self.levels[level] - .next_expiration(self.elapsed) - .map(|e| e.deadline >= self.elapsed) - .unwrap_or(true) - }); - - Insert::Success - } - TransitionToRegistered::Cancelling => Insert::Cancelling, + unsafe { + self.levels[level].add_entry(hdl); } + + debug_assert!({ + self.levels[level] + .next_expiration(self.elapsed) + .map(|e| e.deadline >= self.elapsed) + .unwrap_or(true) + }); } /// Removes `item` from the timing wheel. @@ -128,32 +111,24 @@ impl Wheel { /// /// * The entry is already registered in THIS wheel. pub(crate) unsafe fn remove(&mut self, hdl: EntryHandle) { - if hdl.is_pending() { - unsafe { self.pending.remove(NonNull::from(&hdl)) }; - } else { - let deadline = hdl.deadline(); - debug_assert!( - self.elapsed <= deadline, - "elapsed={}; deadline={}", - self.elapsed, - deadline - ); - - let level = self.level_for(deadline); - unsafe { self.levels[level].remove_entry(hdl.clone()) }; - } + let deadline = hdl.deadline(); + debug_assert!( + self.elapsed <= deadline, + "elapsed={}; deadline={}", + self.elapsed, + deadline + ); + + let level = self.level_for(deadline); + unsafe { self.levels[level].remove_entry(hdl.clone()) }; } /// Advances the timer up to the instant represented by `now`. - pub(crate) fn poll(&mut self, now: u64) -> Option { + pub(crate) fn take_expired(&mut self, now: u64, wake_queue: &mut WakeQueue) { loop { - if let Some(hdl) = self.pending.pop_back() { - return Some(hdl); - } - match self.next_expiration() { Some(ref expiration) if expiration.deadline <= now => { - self.process_expiration(expiration); + self.process_expiration(expiration, wake_queue); self.set_elapsed(expiration.deadline); } @@ -167,21 +142,10 @@ impl Wheel { } } } - - self.pending.pop_back() } /// Returns the instant at which the next timeout expires. fn next_expiration(&self) -> Option { - if !self.pending.is_empty() { - // Expire immediately as we have things pending firing - return Some(Expiration { - level: 0, - slot: 0, - deadline: self.elapsed, - }); - } - // Check all levels for (level_num, level) in self.levels.iter().enumerate() { if let Some(expiration) = level.next_expiration(self.elapsed) { @@ -221,7 +185,11 @@ impl Wheel { /// time and the expiration time. for each in that population either /// queue it for notification (in the case of the last level) or tier /// it down to the next level (in all other cases). - pub(crate) fn process_expiration(&mut self, expiration: &Expiration) { + pub(crate) fn process_expiration( + &mut self, + expiration: &Expiration, + wake_queue: &mut WakeQueue, + ) { // Note that we need to take _all_ of the entries off the list before // processing any of them. This is important because it's possible that // those entries might need to be reinserted into the same slot. @@ -239,15 +207,17 @@ impl Wheel { debug_assert_eq!(hdl.deadline(), expiration.deadline); } - match hdl.transition_to_pending(expiration.deadline) { - TransitionToPending::Success => self.pending.push_front(hdl), - TransitionToPending::NotElapsed(when) => { - let level = level_for(expiration.deadline, when); - unsafe { - self.levels[level].add_entry(hdl); - } + let deadline = hdl.deadline(); + + if deadline > expiration.deadline { + let level = level_for(expiration.deadline, deadline); + unsafe { + self.levels[level].add_entry(hdl); + } + } else { + unsafe { + wake_queue.push_front(hdl); } - TransitionToPending::Cancelling => {} } } } @@ -293,19 +263,6 @@ fn level_for(elapsed: u64, when: u64) -> usize { significant / NUM_LEVELS } -/// The return type of the [`Wheel::insert`] method. -pub(crate) enum Insert { - /// The entry was successfully inserted. - Success, - - /// The entry has already expired, in this case, - /// the entry is not inserted into the wheel. - Elapsed, - - /// The entry is being cancelled, no need to register it. - Cancelling, -} - #[cfg(all(test, not(loom)))] mod test { use super::*; diff --git a/tokio/src/runtime/time/wheel/wake_queue.rs b/tokio/src/runtime/time/wheel/wake_queue.rs index f2e04f009c8..05790417bac 100644 --- a/tokio/src/runtime/time/wheel/wake_queue.rs +++ b/tokio/src/runtime/time/wheel/wake_queue.rs @@ -1,4 +1,4 @@ -use super::{Entry, EntryHandle, EntryState}; +use super::{Entry, EntryHandle}; use crate::runtime::time::wheel::WakeQueueEntry; use crate::util::linked_list; @@ -49,16 +49,7 @@ impl WakeQueue { /// - The waker panics while waking the entry. pub(crate) fn wake_all(mut self) { while let Some(hdl) = self.list.pop_front() { - match hdl.state() { - EntryState::Unregistered => hdl.wake_unregistered(), - state @ (EntryState::Registered | EntryState::Pending) => { - panic!("corrupted state: {state:#?}"); - } - EntryState::WakingUp => hdl.wake(), - // cancellation happens concurrently, no need to wake - EntryState::Cancelling(_) => (), - EntryState::WokenUp => panic!("corrupted state: woken up entry in wake queue"), - } + hdl.wake(); } } } From bedf05877a859b580331055db070d045e983e44c Mon Sep 17 00:00:00 2001 From: Qi Date: Tue, 18 Nov 2025 22:54:35 +0800 Subject: [PATCH 078/100] fixup! simplify the `PrivState` --- .../runtime/time/wheel/registration_queue.rs | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 tokio/src/runtime/time/wheel/registration_queue.rs diff --git a/tokio/src/runtime/time/wheel/registration_queue.rs b/tokio/src/runtime/time/wheel/registration_queue.rs new file mode 100644 index 00000000000..417ec4804d7 --- /dev/null +++ b/tokio/src/runtime/time/wheel/registration_queue.rs @@ -0,0 +1,41 @@ +use super::{Entry, EntryHandle}; +use crate::runtime::time::wheel::RegistrationQueueEntry; +use crate::util::linked_list; + +type EntryList = linked_list::LinkedList; + +/// A queue of entries that need to be registered in the timer wheel. +#[derive(Debug)] +pub(crate) struct RegistrationQueue { + list: EntryList, +} + +impl Drop for RegistrationQueue { + fn drop(&mut self) { + // drain all entries without waking them up + while let Some(hdl) = self.list.pop_front() { + drop(hdl); + } + } +} + +impl RegistrationQueue { + pub(crate) fn new() -> Self { + Self { + list: EntryList::new(), + } + } + + /// # Safety + /// + /// Behavior is undefined if any of the following conditions are violated: + /// + /// - `hdl` must not in any [`super::cancellation_queue`], and also mus not in any [`WakeQueue`]. + pub(crate) unsafe fn push_front(&mut self, hdl: EntryHandle) { + self.list.push_front(hdl); + } + + pub(crate) fn pop_front(&mut self) -> Option { + self.list.pop_front() + } +} From 1a82da102b61b863203387ef9c37f8c92a215fd6 Mon Sep 17 00:00:00 2001 From: Qi Date: Tue, 18 Nov 2025 22:56:40 +0800 Subject: [PATCH 079/100] fixup! simplify the `PrivState` --- tokio/src/runtime/time/wheel/registration_queue.rs | 2 +- tokio/src/runtime/time/wheel/wake_queue.rs | 9 +-------- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/tokio/src/runtime/time/wheel/registration_queue.rs b/tokio/src/runtime/time/wheel/registration_queue.rs index 417ec4804d7..ad5a5abab06 100644 --- a/tokio/src/runtime/time/wheel/registration_queue.rs +++ b/tokio/src/runtime/time/wheel/registration_queue.rs @@ -30,7 +30,7 @@ impl RegistrationQueue { /// /// Behavior is undefined if any of the following conditions are violated: /// - /// - `hdl` must not in any [`super::cancellation_queue`], and also mus not in any [`WakeQueue`]. + /// - [`Entry::extra_pointers`] of `hdl` must not being used. pub(crate) unsafe fn push_front(&mut self, hdl: EntryHandle) { self.list.push_front(hdl); } diff --git a/tokio/src/runtime/time/wheel/wake_queue.rs b/tokio/src/runtime/time/wheel/wake_queue.rs index 05790417bac..af034f4acd9 100644 --- a/tokio/src/runtime/time/wheel/wake_queue.rs +++ b/tokio/src/runtime/time/wheel/wake_queue.rs @@ -34,19 +34,12 @@ impl WakeQueue { /// /// Behavior is undefined if any of the following conditions are violated: /// - /// - `hdl` must not in any [`super::cancellation_queue`], and also mus not in any [`WakeQueue`]. + /// - [`Entry::extra_pointers`] of `hdl` must not being used. pub(crate) unsafe fn push_front(&mut self, hdl: EntryHandle) { self.list.push_front(hdl); } /// Wakes all entries in the wake queue. - /// - /// # Panics - /// - /// This function panics on any of the following conditions: - /// - /// - The entry state is in-consistent (i.e., `WokenUp` state in the wake queue). - /// - The waker panics while waking the entry. pub(crate) fn wake_all(mut self) { while let Some(hdl) = self.list.pop_front() { hdl.wake(); From 4b31afa76158e00ab40d4d85dcfdf0acb94c161b Mon Sep 17 00:00:00 2001 From: Qi Date: Tue, 18 Nov 2025 22:58:00 +0800 Subject: [PATCH 080/100] fixup! simplify the `PrivState` --- tokio/src/runtime/time/wheel/entry.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tokio/src/runtime/time/wheel/entry.rs b/tokio/src/runtime/time/wheel/entry.rs index 204aae73728..339b81d3040 100644 --- a/tokio/src/runtime/time/wheel/entry.rs +++ b/tokio/src/runtime/time/wheel/entry.rs @@ -18,11 +18,14 @@ struct State { #[derive(Debug)] pub(crate) struct Entry { - /// The intrusive pointers used by timer wheel. + /// The intrusive pointers used by [`super::Wheel::levels`]. wheel_pointers: linked_list::Pointers, - /// The intrusive pointer used by either [`CancellationQueueEntry`]. - /// or [`WakeQueueEntry`]. + /// The intrusive pointer used by any of the following queues: + /// + /// - [`super::RegistrationQueue`] + /// - [`super::cancellation_queue`] + /// - [`super::WakeQueue`] extra_pointers: linked_list::Pointers, /// The tick when this entry is scheduled to expire. From 058e4f36f61ce5e905e3842714ff10f4ef14f5b0 Mon Sep 17 00:00:00 2001 From: Qi Date: Tue, 18 Nov 2025 23:04:17 +0800 Subject: [PATCH 081/100] fixup! simplify the `PrivState` --- tokio/src/runtime/scheduler/current_thread/mod.rs | 3 ++- tokio/src/runtime/scheduler/mod.rs | 2 +- tokio/src/runtime/scheduler/multi_thread/worker.rs | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tokio/src/runtime/scheduler/current_thread/mod.rs b/tokio/src/runtime/scheduler/current_thread/mod.rs index 12c680bdee0..3e29d606661 100644 --- a/tokio/src/runtime/scheduler/current_thread/mod.rs +++ b/tokio/src/runtime/scheduler/current_thread/mod.rs @@ -608,7 +608,8 @@ impl Context { let mut core = self.core.borrow_mut(); f(core.as_mut().map(|c| c.as_mut())) } - + + #[cfg_attr(target_family = "wasm", allow(dead_code))] #[cfg(test)] pub(crate) fn with_time_context2(&self, f: F) -> R where diff --git a/tokio/src/runtime/scheduler/mod.rs b/tokio/src/runtime/scheduler/mod.rs index 16673b32ddb..221a46e8920 100644 --- a/tokio/src/runtime/scheduler/mod.rs +++ b/tokio/src/runtime/scheduler/mod.rs @@ -285,7 +285,7 @@ cfg_rt! { } - #[cfg(test)] + #[cfg(all(not(target_family = "wasm"), test))] pub(crate) fn with_time_context2(&self, f: F) -> R where F: FnOnce(Option<&mut crate::runtime::time::Context2>) -> R, diff --git a/tokio/src/runtime/scheduler/multi_thread/worker.rs b/tokio/src/runtime/scheduler/multi_thread/worker.rs index af3bdee78b3..d6049bb6988 100644 --- a/tokio/src/runtime/scheduler/multi_thread/worker.rs +++ b/tokio/src/runtime/scheduler/multi_thread/worker.rs @@ -959,7 +959,7 @@ impl Context { } } - #[cfg(test)] + #[cfg(all(not(target_family = "wasm"), test))] pub(crate) fn with_time_context2(&self, f: F) -> R where F: FnOnce(Option<&mut crate::runtime::time::Context2>) -> R, From 133b962ea9f26e523a0561bdf0936df0349dec96 Mon Sep 17 00:00:00 2001 From: Qi Date: Tue, 18 Nov 2025 23:05:51 +0800 Subject: [PATCH 082/100] fixup! simplify the `PrivState` --- tokio/src/runtime/scheduler/current_thread/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tokio/src/runtime/scheduler/current_thread/mod.rs b/tokio/src/runtime/scheduler/current_thread/mod.rs index 3e29d606661..cd9e3deca65 100644 --- a/tokio/src/runtime/scheduler/current_thread/mod.rs +++ b/tokio/src/runtime/scheduler/current_thread/mod.rs @@ -608,7 +608,7 @@ impl Context { let mut core = self.core.borrow_mut(); f(core.as_mut().map(|c| c.as_mut())) } - + #[cfg_attr(target_family = "wasm", allow(dead_code))] #[cfg(test)] pub(crate) fn with_time_context2(&self, f: F) -> R From bcf92c45c8255b747da6650f787fe6a457b96f94 Mon Sep 17 00:00:00 2001 From: Qi Date: Tue, 18 Nov 2025 23:09:55 +0800 Subject: [PATCH 083/100] fixup! simplify the `PrivState` --- tokio/src/runtime/scheduler/multi_thread/worker.rs | 2 +- tokio/src/runtime/time/timer.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tokio/src/runtime/scheduler/multi_thread/worker.rs b/tokio/src/runtime/scheduler/multi_thread/worker.rs index d6049bb6988..2078fb40bec 100644 --- a/tokio/src/runtime/scheduler/multi_thread/worker.rs +++ b/tokio/src/runtime/scheduler/multi_thread/worker.rs @@ -959,7 +959,7 @@ impl Context { } } - #[cfg(all(not(target_family = "wasm"), test))] + #[cfg(all(not(target_os = "wasi"), test))] pub(crate) fn with_time_context2(&self, f: F) -> R where F: FnOnce(Option<&mut crate::runtime::time::Context2>) -> R, diff --git a/tokio/src/runtime/time/timer.rs b/tokio/src/runtime/time/timer.rs index 2d5bde725a3..be7778532a8 100644 --- a/tokio/src/runtime/time/timer.rs +++ b/tokio/src/runtime/time/timer.rs @@ -136,7 +136,7 @@ where } } -#[cfg(test)] +#[cfg(all(not(target_os = "wasi"), test))] pub(super) fn with_current_time_context2(hdl: &SchedulerHandle, f: F) -> R where F: FnOnce(Option<&mut crate::runtime::time::Context2>) -> R, From ed80712f21983bc2611328a5df8132085c039bbd Mon Sep 17 00:00:00 2001 From: Qi Date: Tue, 18 Nov 2025 23:11:54 +0800 Subject: [PATCH 084/100] fixup! simplify the `PrivState` --- tokio/src/runtime/scheduler/current_thread/mod.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tokio/src/runtime/scheduler/current_thread/mod.rs b/tokio/src/runtime/scheduler/current_thread/mod.rs index cd9e3deca65..a67ada588b2 100644 --- a/tokio/src/runtime/scheduler/current_thread/mod.rs +++ b/tokio/src/runtime/scheduler/current_thread/mod.rs @@ -609,8 +609,7 @@ impl Context { f(core.as_mut().map(|c| c.as_mut())) } - #[cfg_attr(target_family = "wasm", allow(dead_code))] - #[cfg(test)] + #[cfg(all(not(target_os = "wasi"), test))] pub(crate) fn with_time_context2(&self, f: F) -> R where F: FnOnce(Option<&mut crate::runtime::time::Context2>) -> R, From 91d759fa9bb7d0ac3ae857a9d4351d1404656cf4 Mon Sep 17 00:00:00 2001 From: Qi Date: Tue, 18 Nov 2025 23:17:39 +0800 Subject: [PATCH 085/100] fixup! simplify the `PrivState` --- tokio/src/runtime/scheduler/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tokio/src/runtime/scheduler/mod.rs b/tokio/src/runtime/scheduler/mod.rs index 221a46e8920..eec1574a20c 100644 --- a/tokio/src/runtime/scheduler/mod.rs +++ b/tokio/src/runtime/scheduler/mod.rs @@ -285,7 +285,7 @@ cfg_rt! { } - #[cfg(all(not(target_family = "wasm"), test))] + #[cfg(all(not(target_os = "wasi"), test))] pub(crate) fn with_time_context2(&self, f: F) -> R where F: FnOnce(Option<&mut crate::runtime::time::Context2>) -> R, From 59a0d91968d6a46d5af62442f41b39eeb91323e8 Mon Sep 17 00:00:00 2001 From: Qi Date: Tue, 18 Nov 2025 23:17:46 +0800 Subject: [PATCH 086/100] fixup! simplify the `PrivState` --- tokio/src/runtime/scheduler/mod.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/tokio/src/runtime/scheduler/mod.rs b/tokio/src/runtime/scheduler/mod.rs index eec1574a20c..14ac6dacce2 100644 --- a/tokio/src/runtime/scheduler/mod.rs +++ b/tokio/src/runtime/scheduler/mod.rs @@ -284,7 +284,6 @@ cfg_rt! { match_flavor!(self, Context(context) => context.with_registration_queue(f)) } - #[cfg(all(not(target_os = "wasi"), test))] pub(crate) fn with_time_context2(&self, f: F) -> R where From f2d586807d35e9ddf6cccf9a87e33bcd89734c0d Mon Sep 17 00:00:00 2001 From: Qi Date: Tue, 18 Nov 2025 23:53:51 +0800 Subject: [PATCH 087/100] fixup! simplify the `PrivState` --- tokio/src/runtime/time/wheel/entry.rs | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/tokio/src/runtime/time/wheel/entry.rs b/tokio/src/runtime/time/wheel/entry.rs index 339b81d3040..97320dcc8c5 100644 --- a/tokio/src/runtime/time/wheel/entry.rs +++ b/tokio/src/runtime/time/wheel/entry.rs @@ -21,10 +21,12 @@ pub(crate) struct Entry { /// The intrusive pointers used by [`super::Wheel::levels`]. wheel_pointers: linked_list::Pointers, + /// The intrusive pointer used by [`super::cancellation_queue`]. + cancel_pointers: linked_list::Pointers, + /// The intrusive pointer used by any of the following queues: /// /// - [`super::RegistrationQueue`] - /// - [`super::cancellation_queue`] /// - [`super::WakeQueue`] extra_pointers: linked_list::Pointers, @@ -95,7 +97,7 @@ unsafe impl linked_list::Link for RegistrationQueueEntry { } } -/// An ZST to allow [`super::cancellation_queue`] to utilize the [`Entry::extra_pointers`] +/// An ZST to allow [`super::cancellation_queue`] to utilize the [`Entry::cancel_pointers`] /// by impl [`linked_list::Link`] as we cannot impl it on [`Entry`] /// directly due to the conflicting implementations used by [`Entry::wheel_pointers`]. /// @@ -121,7 +123,7 @@ unsafe impl linked_list::Link for CancellationQueueEntry { target: NonNull, ) -> NonNull> { let this = target.as_ptr(); - let field = unsafe { std::ptr::addr_of_mut!((*this).extra_pointers) }; + let field = unsafe { std::ptr::addr_of_mut!((*this).cancel_pointers) }; unsafe { NonNull::new_unchecked(field) } } } @@ -180,6 +182,7 @@ impl Handle { let entry = Arc::new(Entry { wheel_pointers: linked_list::Pointers::new(), + cancel_pointers: linked_list::Pointers::new(), extra_pointers: linked_list::Pointers::new(), deadline, state: Mutex::new(state), @@ -229,11 +232,13 @@ impl Handle { pub(crate) fn cancel(&self) { let mut lock = self.entry.state.lock(); - lock.cancelled = true; - if let Some(cancel_tx) = lock.cancel_tx.take() { - drop(lock); - unsafe { - cancel_tx.send(self.clone()); + if !lock.cancelled { + lock.cancelled = true; + if let Some(cancel_tx) = lock.cancel_tx.take() { + drop(lock); + unsafe { + cancel_tx.send(self.clone()); + } } } } From f17b43f3d1be41dcfa6066538f7344f4288bb988 Mon Sep 17 00:00:00 2001 From: Qi Date: Wed, 19 Nov 2025 00:07:37 +0800 Subject: [PATCH 088/100] fixup! simplify the `PrivState` --- tokio/src/runtime/time/wheel/entry.rs | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/tokio/src/runtime/time/wheel/entry.rs b/tokio/src/runtime/time/wheel/entry.rs index 97320dcc8c5..4abfe9514ed 100644 --- a/tokio/src/runtime/time/wheel/entry.rs +++ b/tokio/src/runtime/time/wheel/entry.rs @@ -18,14 +18,12 @@ struct State { #[derive(Debug)] pub(crate) struct Entry { - /// The intrusive pointers used by [`super::Wheel::levels`]. - wheel_pointers: linked_list::Pointers, - /// The intrusive pointer used by [`super::cancellation_queue`]. cancel_pointers: linked_list::Pointers, /// The intrusive pointer used by any of the following queues: /// + /// - [`super::Level::slot`] /// - [`super::RegistrationQueue`] /// - [`super::WakeQueue`] extra_pointers: linked_list::Pointers, @@ -61,14 +59,14 @@ unsafe impl linked_list::Link for Entry { target: NonNull, ) -> NonNull> { let this = target.as_ptr(); - let field = unsafe { std::ptr::addr_of_mut!((*this).wheel_pointers) }; + let field = unsafe { std::ptr::addr_of_mut!((*this).extra_pointers) }; unsafe { NonNull::new_unchecked(field) } } } /// An ZST to allow [`super::registration_queue`] to utilize the [`Entry::extra_pointers`] /// by impl [`linked_list::Link`] as we cannot impl it on [`Entry`] -/// directly due to the conflicting implementations used by [`Entry::wheel_pointers`]. +/// directly due to the conflicting implementations. /// /// This type should never be constructed. pub(super) struct RegistrationQueueEntry; @@ -99,7 +97,7 @@ unsafe impl linked_list::Link for RegistrationQueueEntry { /// An ZST to allow [`super::cancellation_queue`] to utilize the [`Entry::cancel_pointers`] /// by impl [`linked_list::Link`] as we cannot impl it on [`Entry`] -/// directly due to the conflicting implementations used by [`Entry::wheel_pointers`]. +/// directly due to the conflicting implementations. /// /// This type should never be constructed. pub(super) struct CancellationQueueEntry; @@ -130,7 +128,7 @@ unsafe impl linked_list::Link for CancellationQueueEntry { /// An ZST to allow [`super::WakeQueue`] to utilize the [`Entry::extra_pointers`] /// by impl [`linked_list::Link`] as we cannot impl it on [`Entry`] -/// directly due to the conflicting implementations used by [`Entry::wheel_pointers`]. +/// directly due to the conflicting implementations. /// /// This type should never be constructed. pub(super) struct WakeQueueEntry; @@ -181,7 +179,6 @@ impl Handle { }; let entry = Arc::new(Entry { - wheel_pointers: linked_list::Pointers::new(), cancel_pointers: linked_list::Pointers::new(), extra_pointers: linked_list::Pointers::new(), deadline, From aebf2b3b447ebd26c873e7c5aa38c787a3e43853 Mon Sep 17 00:00:00 2001 From: Qi Date: Wed, 19 Nov 2025 20:28:17 +0800 Subject: [PATCH 089/100] add safety comments in `entry.rs` Signed-off-by: ADD-SP --- tokio/src/runtime/time/wheel/entry.rs | 31 +++++++++++++++++++++------ 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/tokio/src/runtime/time/wheel/entry.rs b/tokio/src/runtime/time/wheel/entry.rs index 4abfe9514ed..a2217c2e50f 100644 --- a/tokio/src/runtime/time/wheel/entry.rs +++ b/tokio/src/runtime/time/wheel/entry.rs @@ -23,9 +23,25 @@ pub(crate) struct Entry { /// The intrusive pointer used by any of the following queues: /// - /// - [`super::Level::slot`] - /// - [`super::RegistrationQueue`] - /// - [`super::WakeQueue`] + /// - [`Wheel`] + /// - [`RegistrationQueue`] + /// - [`WakeQueue`] + /// + /// We can guarantee that this pointer is only used by one of the above + /// at any given time. See below for the journey of this pointer. + /// + /// Initially, this pointer is used by the [`RegistrationQueue`]. + /// + /// And then, before parking the resource driver, + /// the scheduler removes the entry from the [`RegistrationQueue`] + /// [`RegistrationQueue`] and insert it into the [`Wheel`]. + /// + /// Finally, after parking the resource driver, the scheduler removes + /// the entry from the [`Wheel`] and insert it into the [`WakeQueue`]. + /// + /// [`RegistrationQueue`]: super::RegistrationQueue + /// [`Wheel`]: super::Wheel + /// [`WakeQueue`]: super::WakeQueue extra_pointers: linked_list::Pointers, /// The tick when this entry is scheduled to expire. @@ -209,11 +225,9 @@ impl Handle { pub(crate) fn register_cancel_tx(&self, cancel_tx: Sender) { let mut lock = self.entry.state.lock(); if !lock.cancelled && !lock.woken_up { + let old_tx = lock.cancel_tx.replace(cancel_tx); // don't unlock — poisoning the `Mutex` stops others from using the bad state. - assert!( - lock.cancel_tx.replace(cancel_tx).is_none(), - "cancel_tx is already registered" - ); + assert!(old_tx.is_none(), "cancel_tx is already registered"); } } @@ -233,6 +247,9 @@ impl Handle { lock.cancelled = true; if let Some(cancel_tx) = lock.cancel_tx.take() { drop(lock); + + // Safety: we can guarantee that `self` is not in any cancellation queue + // because the `self.cancelled` was just set to `true`. unsafe { cancel_tx.send(self.clone()); } From 6f5735ebd6a50324cfefc5e578e61079673fb80e Mon Sep 17 00:00:00 2001 From: Qi Date: Wed, 19 Nov 2025 20:29:18 +0800 Subject: [PATCH 090/100] tolerate spurious wakeup Signed-off-by: ADD-SP --- tokio/src/runtime/time/wheel/entry.rs | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/tokio/src/runtime/time/wheel/entry.rs b/tokio/src/runtime/time/wheel/entry.rs index a2217c2e50f..a437b8a2353 100644 --- a/tokio/src/runtime/time/wheel/entry.rs +++ b/tokio/src/runtime/time/wheel/entry.rs @@ -211,14 +211,11 @@ impl Handle { if !lock.cancelled { lock.woken_up = true; - // don't unlock — poisoning the `Mutex` stops others from using the bad state. - let waker = lock - .waker - .take() - .expect("waker must be present when waking up"); - // unlock before calling waker - drop(lock); - waker.wake(); + if let Some(waker) = lock.waker.take() { + // unlock before calling waker + drop(lock); + waker.wake(); + } } } From 0215b54053808e579a0b687291a982621c1132d0 Mon Sep 17 00:00:00 2001 From: Qi Date: Wed, 19 Nov 2025 20:39:30 +0800 Subject: [PATCH 091/100] move the `crate::runtime::scheduler::util` out of the `cfg_rt_and_time` macros this enables rustfmt to fix the format Signed-off-by: ADD-SP --- tokio/src/runtime/scheduler/util.rs | 206 ----------------------- tokio/src/runtime/scheduler/util/mod.rs | 3 + tokio/src/runtime/scheduler/util/time.rs | 183 ++++++++++++++++++++ 3 files changed, 186 insertions(+), 206 deletions(-) delete mode 100644 tokio/src/runtime/scheduler/util.rs create mode 100644 tokio/src/runtime/scheduler/util/mod.rs create mode 100644 tokio/src/runtime/scheduler/util/time.rs diff --git a/tokio/src/runtime/scheduler/util.rs b/tokio/src/runtime/scheduler/util.rs deleted file mode 100644 index d7bdba76279..00000000000 --- a/tokio/src/runtime/scheduler/util.rs +++ /dev/null @@ -1,206 +0,0 @@ -cfg_rt_and_time! { - pub(crate) mod time { - use crate::runtime::{scheduler::driver}; - use crate::runtime::time::{Wheel, WakeQueue}; - use crate::runtime::time::EntryHandle; - use crate::runtime::time::RegistrationQueue; - use crate::runtime::time::cancellation_queue::{Sender, Receiver}; - use std::time::Duration; - - pub(crate) fn min_duration(a: Option, b: Option) -> Option { - match (a, b) { - (Some(dur_a), Some(dur_b)) => Some(std::cmp::min(dur_a, dur_b)), - (Some(dur_a), None) => Some(dur_a), - (None, Some(dur_b)) => Some(dur_b), - (None, None) => None, - } - } - - pub(crate) fn process_registration_queue( - registration_queue: &mut RegistrationQueue, - wheel: &mut Wheel, - tx: &Sender, - wake_queue: &mut WakeQueue, - ) { - while let Some(hdl) = registration_queue.pop_front() { - if hdl.deadline() <= wheel.elapsed() { - unsafe { - wake_queue.push_front(hdl); - } - } else { - // Safety: the entry is not registered yet - unsafe { - wheel.insert(hdl, tx.clone()); - } - } - } - } - - pub(crate) fn insert_inject_timers( - wheel: &mut Wheel, - tx: &Sender, - inject: Vec, - wake_queue: &mut WakeQueue, - ) { - for hdl in inject { - if hdl.deadline() <= wheel.elapsed() { - unsafe { - wake_queue.push_front(hdl); - } - } else { - // Safety: the entry is not registered yet - unsafe { - wheel.insert(hdl, tx.clone()); - } - } - } - } - - pub(crate) fn remove_cancelled_timers( - wheel: &mut Wheel, - rx: &mut Receiver, - ) { - for hdl in rx.recv_all() { - debug_assert!(hdl.is_cancelled()); - - if hdl.deadline() > wheel.elapsed() { - // Safety: the entry is registered in THIS wheel - unsafe { - wheel.remove(hdl); - } - } - } - } - - pub(crate) fn next_expiration_time( - wheel: &Wheel, - drv_hdl: &driver::Handle, - ) -> Option { - drv_hdl.with_time(|maybe_time_hdl| { - let Some(time_hdl) = maybe_time_hdl else { - // time driver is not enabled, nothing to do. - return None; - }; - - let clock = drv_hdl.clock(); - let time_source = time_hdl.time_source(); - - wheel.next_expiration_time().map(|tick| { - let now = time_source.now(clock); - time_source.tick_to_duration(tick.saturating_sub(now)) - }) - }) - } - - cfg_test_util! { - pub(crate) fn pre_auto_advance( - drv_hdl: &driver::Handle, - duration: Option, - ) -> bool { - drv_hdl.with_time(|maybe_time_hdl| { - if maybe_time_hdl.is_none() { - // time driver is not enabled, nothing to do. - return false; - } - - if duration.is_some() { - let clock = drv_hdl.clock(); - if clock.can_auto_advance() { - return true; - } - - false - } else { - false - } - }) - } - - pub(crate) fn post_auto_advance( - drv_hdl: &driver::Handle, - duration: Option, - ) { - drv_hdl.with_time(|maybe_time_hdl| { - let Some(time_hdl) = maybe_time_hdl else { - // time driver is not enabled, nothing to do. - return; - }; - - if let Some(park_duration) = duration { - let clock = drv_hdl.clock(); - if clock.can_auto_advance() - && !time_hdl.did_wake() { - if let Err(msg) = clock.advance(park_duration) { - panic!("{msg}"); - } - } - } - }) - } - } - - cfg_not_test_util! { - pub(crate) fn pre_auto_advance( - _drv_hdl: &driver::Handle, - _duration: Option, - ) -> bool { - false - } - - pub(crate) fn post_auto_advance( - _drv_hdl: &driver::Handle, - _duration: Option, - ) { - // No-op in non-test util builds - } - } - - pub(crate) fn process_expired_timers( - wheel: &mut Wheel, - drv_hdl: &driver::Handle, - wake_queue: &mut WakeQueue, - ) { - drv_hdl.with_time(|maybe_time_hdl| { - let Some(time_hdl) = maybe_time_hdl else { - // time driver is not enabled, nothing to do. - return; - }; - - let clock = drv_hdl.clock(); - let time_source = time_hdl.time_source(); - - let now = time_source.now(clock); - time_hdl.process_at_time(wheel, now, wake_queue); - }); - } - - pub(crate) fn shutdown_local_timers( - wheel: &mut Wheel, - rx: &mut Receiver, - inject: Vec, - drv_hdl: &driver::Handle, - ) { - drv_hdl.with_time(|maybe_time_hdl| { - let Some(time_hdl) = maybe_time_hdl else { - // time driver is not enabled, nothing to do. - return; - }; - - remove_cancelled_timers(wheel, rx); - time_hdl.shutdown(wheel); - - let mut wake_queue = WakeQueue::new(); - // simply wake all unregistered timers - for hdl in inject { - if !hdl.is_cancelled() { - unsafe { - wake_queue.push_front(hdl); - } - } - } - - wake_queue.wake_all(); - }); - } - } -} diff --git a/tokio/src/runtime/scheduler/util/mod.rs b/tokio/src/runtime/scheduler/util/mod.rs new file mode 100644 index 00000000000..28de2070a41 --- /dev/null +++ b/tokio/src/runtime/scheduler/util/mod.rs @@ -0,0 +1,3 @@ +cfg_rt_and_time! { + pub(crate) mod time; +} diff --git a/tokio/src/runtime/scheduler/util/time.rs b/tokio/src/runtime/scheduler/util/time.rs new file mode 100644 index 00000000000..dc833c09b17 --- /dev/null +++ b/tokio/src/runtime/scheduler/util/time.rs @@ -0,0 +1,183 @@ +use crate::runtime::scheduler::driver; +use crate::runtime::time::cancellation_queue::{Receiver, Sender}; +use crate::runtime::time::EntryHandle; +use crate::runtime::time::RegistrationQueue; +use crate::runtime::time::{WakeQueue, Wheel}; +use std::time::Duration; + +pub(crate) fn min_duration(a: Option, b: Option) -> Option { + match (a, b) { + (Some(dur_a), Some(dur_b)) => Some(std::cmp::min(dur_a, dur_b)), + (Some(dur_a), None) => Some(dur_a), + (None, Some(dur_b)) => Some(dur_b), + (None, None) => None, + } +} + +pub(crate) fn process_registration_queue( + registration_queue: &mut RegistrationQueue, + wheel: &mut Wheel, + tx: &Sender, + wake_queue: &mut WakeQueue, +) { + while let Some(hdl) = registration_queue.pop_front() { + if hdl.deadline() <= wheel.elapsed() { + unsafe { + wake_queue.push_front(hdl); + } + } else { + // Safety: the entry is not registered yet + unsafe { + wheel.insert(hdl, tx.clone()); + } + } + } +} + +pub(crate) fn insert_inject_timers( + wheel: &mut Wheel, + tx: &Sender, + inject: Vec, + wake_queue: &mut WakeQueue, +) { + for hdl in inject { + if hdl.deadline() <= wheel.elapsed() { + unsafe { + wake_queue.push_front(hdl); + } + } else { + // Safety: the entry is not registered yet + unsafe { + wheel.insert(hdl, tx.clone()); + } + } + } +} + +pub(crate) fn remove_cancelled_timers(wheel: &mut Wheel, rx: &mut Receiver) { + for hdl in rx.recv_all() { + debug_assert!(hdl.is_cancelled()); + + if hdl.deadline() > wheel.elapsed() { + // Safety: the entry is registered in THIS wheel + unsafe { + wheel.remove(hdl); + } + } + } +} + +pub(crate) fn next_expiration_time(wheel: &Wheel, drv_hdl: &driver::Handle) -> Option { + drv_hdl.with_time(|maybe_time_hdl| { + let Some(time_hdl) = maybe_time_hdl else { + // time driver is not enabled, nothing to do. + return None; + }; + + let clock = drv_hdl.clock(); + let time_source = time_hdl.time_source(); + + wheel.next_expiration_time().map(|tick| { + let now = time_source.now(clock); + time_source.tick_to_duration(tick.saturating_sub(now)) + }) + }) +} + +#[cfg(feature = "test-util")] +pub(crate) fn pre_auto_advance(drv_hdl: &driver::Handle, duration: Option) -> bool { + drv_hdl.with_time(|maybe_time_hdl| { + if maybe_time_hdl.is_none() { + // time driver is not enabled, nothing to do. + return false; + } + + if duration.is_some() { + let clock = drv_hdl.clock(); + if clock.can_auto_advance() { + return true; + } + + false + } else { + false + } + }) +} + +pub(crate) fn process_expired_timers( + wheel: &mut Wheel, + drv_hdl: &driver::Handle, + wake_queue: &mut WakeQueue, +) { + drv_hdl.with_time(|maybe_time_hdl| { + let Some(time_hdl) = maybe_time_hdl else { + // time driver is not enabled, nothing to do. + return; + }; + + let clock = drv_hdl.clock(); + let time_source = time_hdl.time_source(); + + let now = time_source.now(clock); + time_hdl.process_at_time(wheel, now, wake_queue); + }); +} + +pub(crate) fn shutdown_local_timers( + wheel: &mut Wheel, + rx: &mut Receiver, + inject: Vec, + drv_hdl: &driver::Handle, +) { + drv_hdl.with_time(|maybe_time_hdl| { + let Some(time_hdl) = maybe_time_hdl else { + // time driver is not enabled, nothing to do. + return; + }; + + remove_cancelled_timers(wheel, rx); + time_hdl.shutdown(wheel); + + let mut wake_queue = WakeQueue::new(); + // simply wake all unregistered timers + for hdl in inject { + if !hdl.is_cancelled() { + unsafe { + wake_queue.push_front(hdl); + } + } + } + + wake_queue.wake_all(); + }); +} + +#[cfg(feature = "test-util")] +pub(crate) fn post_auto_advance(drv_hdl: &driver::Handle, duration: Option) { + drv_hdl.with_time(|maybe_time_hdl| { + let Some(time_hdl) = maybe_time_hdl else { + // time driver is not enabled, nothing to do. + return; + }; + + if let Some(park_duration) = duration { + let clock = drv_hdl.clock(); + if clock.can_auto_advance() && !time_hdl.did_wake() { + if let Err(msg) = clock.advance(park_duration) { + panic!("{msg}"); + } + } + } + }) +} + +#[cfg(not(feature = "test-util"))] +pub(crate) fn pre_auto_advance(_drv_hdl: &driver::Handle, _duration: Option) -> bool { + false +} + +#[cfg(not(feature = "test-util"))] +pub(crate) fn post_auto_advance(_drv_hdl: &driver::Handle, _duration: Option) { + // No-op in non-test util builds +} From ef4c37f9ded2136666ff60182ec2c85777472f17 Mon Sep 17 00:00:00 2001 From: Qi Date: Wed, 19 Nov 2025 21:44:28 +0800 Subject: [PATCH 092/100] improve the test coverage of timer cancellation Signed-off-by: ADD-SP --- tokio/src/runtime/time/tests/mod.rs | 141 ++++++++++++++++++++++++++-- 1 file changed, 132 insertions(+), 9 deletions(-) diff --git a/tokio/src/runtime/time/tests/mod.rs b/tokio/src/runtime/time/tests/mod.rs index 9259de3ce4b..b0b3b279204 100644 --- a/tokio/src/runtime/time/tests/mod.rs +++ b/tokio/src/runtime/time/tests/mod.rs @@ -2,8 +2,6 @@ use std::future::poll_fn; use std::{task::Context, time::Duration}; - -#[cfg(not(loom))] use futures::task::noop_waker_ref; use crate::loom::thread; @@ -15,6 +13,8 @@ use crate::sync::oneshot; use super::Timer; +const EVENT_INTERVAL: u32 = 1; + fn block_on(f: impl std::future::Future) -> T { #[cfg(loom)] return loom::future::block_on(f); @@ -41,6 +41,7 @@ async fn fire_all_timers(handle: &Handle, exit_rx: oneshot::Receiver<()>) { loop { // Keep the worker thread busy, so that it can process injected // timers. + assert_eq!(EVENT_INTERVAL, 1); crate::task::yield_now().await; if !exit_rx.is_empty() { // break the loop if the thread is exiting @@ -95,12 +96,16 @@ fn process_at_time(handle: &Handle, at: u64) { fn rt(start_paused: bool) -> crate::runtime::Runtime { crate::runtime::Builder::new_current_thread() .enable_time() - .event_interval(1) + .event_interval(EVENT_INTERVAL) .start_paused(start_paused) .build() .unwrap() } +fn noop_cx() -> Context<'static> { + Context::from_waker(noop_waker_ref()) +} + #[test] fn single_timer() { model(|| { @@ -145,10 +150,10 @@ fn drop_timer() { let _ = entry .as_mut() - .poll_elapsed(&mut Context::from_waker(futures::task::noop_waker_ref())); + .poll_elapsed(&mut noop_cx()); let _ = entry .as_mut() - .poll_elapsed(&mut Context::from_waker(futures::task::noop_waker_ref())); + .poll_elapsed(&mut noop_cx()); exit_tx.send(()).unwrap(); }); @@ -178,7 +183,7 @@ fn change_waker() { let _ = entry .as_mut() - .poll_elapsed(&mut Context::from_waker(futures::task::noop_waker_ref())); + .poll_elapsed(&mut noop_cx()); // At this point, we cannot let worker thread to wake up // the timer because the waker is a noop. @@ -245,11 +250,10 @@ fn poll_process_levels() { process_at_time(handle, t); for (deadline, future) in entries.iter_mut().enumerate() { - let mut context = Context::from_waker(noop_waker_ref()); if deadline <= t as usize { - assert!(future.as_mut().poll_elapsed(&mut context).is_ready()); + assert!(future.as_mut().poll_elapsed(&mut noop_cx()).is_ready()); } else { - assert!(future.as_mut().poll_elapsed(&mut context).is_pending()); + assert!(future.as_mut().poll_elapsed(&mut noop_cx()).is_pending()); } } } @@ -277,3 +281,122 @@ fn poll_process_levels_targeted() { process_at_time(handle, 192); }) } + +#[test] +fn cancel_in_the_same_rt() { + model(|| { + let rt = rt(false); + + rt.block_on(async { + let handle = rt.handle(); + let mut timer = Box::pin(Timer::new( + handle.inner.clone(), + handle.inner.driver().clock().now() + Duration::from_secs(1), + )); + let poll = timer.as_mut().poll_elapsed(&mut noop_cx()); + assert!(poll.is_pending()); + drop(timer); + + // Since the event interval is 1, yield 3 times to ensure + // the registration queue and cancellation queue are processed. + assert_eq!(EVENT_INTERVAL, 1); + crate::task::yield_now().await; + crate::task::yield_now().await; + crate::task::yield_now().await; + }); + }) +} + +#[test] +fn cancel_in_the_different_rt() { + model(|| { + let rt1 = rt(false); + let rt2 = rt(false); + + let timer = rt1.block_on(async { + let handle = rt1.handle(); + let mut timer = Box::pin(Timer::new( + handle.inner.clone(), + handle.inner.driver().clock().now() + Duration::from_secs(1), + )); + let poll = timer.as_mut().poll_elapsed(&mut noop_cx()); + assert!(poll.is_pending()); + timer + }); + + rt2.block_on(async { + drop(timer); + }); + + rt1.block_on(async { + // Since the event interval is 1, yield 3 times to ensure + // the registration queue and cancellation queue are processed. + assert_eq!(EVENT_INTERVAL, 1); + crate::task::yield_now().await; + crate::task::yield_now().await; + crate::task::yield_now().await; + }); + }) +} + +#[test] +fn cancel_outside_of_rt() { + model(|| { + let rt = rt(false); + + let timer = rt.block_on(async { + let handle = rt.handle(); + let mut timer = Box::pin(Timer::new( + handle.inner.clone(), + handle.inner.driver().clock().now() + Duration::from_secs(1), + )); + let poll = timer.as_mut().poll_elapsed(&mut noop_cx()); + assert!(poll.is_pending()); + timer + }); + + drop(timer); + + rt.block_on(async { + // Since the event interval is 1, yield 3 times to ensure + // the registration queue and cancellation queue are processed. + assert_eq!(EVENT_INTERVAL, 1); + crate::task::yield_now().await; + crate::task::yield_now().await; + crate::task::yield_now().await; + }); + }) +} + +#[test] +fn cancel_in_different_thread() { + model(|| { + let rt = rt(false); + + let timer = rt.block_on(async { + let handle = rt.handle(); + let mut timer = Box::pin(Timer::new( + handle.inner.clone(), + handle.inner.driver().clock().now() + Duration::from_secs(1), + )); + let poll = timer.as_mut().poll_elapsed(&mut noop_cx()); + assert!(poll.is_pending()); + timer + }); + + let jh = thread::spawn(move || { + drop(timer); + }); + + rt.block_on(async { + // Since the event interval is 1, yield 3 times to ensure + // the registration queue and cancellation queue are processed. + assert_eq!(EVENT_INTERVAL, 1); + crate::task::yield_now().await; + crate::task::yield_now().await; + crate::task::yield_now().await; + }); + + jh.join().unwrap(); + }) +} From b72f92413c535b58210b41b7a76433035bdc72d8 Mon Sep 17 00:00:00 2001 From: Qi Date: Wed, 19 Nov 2025 21:46:44 +0800 Subject: [PATCH 093/100] fixup! improve the test coverage of timer cancellation --- tokio/src/runtime/time/tests/mod.rs | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/tokio/src/runtime/time/tests/mod.rs b/tokio/src/runtime/time/tests/mod.rs index b0b3b279204..d0193fa4e02 100644 --- a/tokio/src/runtime/time/tests/mod.rs +++ b/tokio/src/runtime/time/tests/mod.rs @@ -1,8 +1,8 @@ #![cfg(not(target_os = "wasi"))] +use futures::task::noop_waker_ref; use std::future::poll_fn; use std::{task::Context, time::Duration}; -use futures::task::noop_waker_ref; use crate::loom::thread; use crate::runtime::scheduler::util::time::process_registration_queue; @@ -148,12 +148,8 @@ fn drop_timer() { ); pin!(entry); - let _ = entry - .as_mut() - .poll_elapsed(&mut noop_cx()); - let _ = entry - .as_mut() - .poll_elapsed(&mut noop_cx()); + let _ = entry.as_mut().poll_elapsed(&mut noop_cx()); + let _ = entry.as_mut().poll_elapsed(&mut noop_cx()); exit_tx.send(()).unwrap(); }); @@ -181,9 +177,7 @@ fn change_waker() { ); pin!(entry); - let _ = entry - .as_mut() - .poll_elapsed(&mut noop_cx()); + let _ = entry.as_mut().poll_elapsed(&mut noop_cx()); // At this point, we cannot let worker thread to wake up // the timer because the waker is a noop. From c1a8676a315e063a742f88d05728ec35c7a82c40 Mon Sep 17 00:00:00 2001 From: Qi Date: Wed, 19 Nov 2025 22:11:58 +0800 Subject: [PATCH 094/100] revert spurious changes Signed-off-by: ADD-SP --- tokio/src/runtime/mod.rs | 6 +++--- tokio/src/runtime/thread_id.rs | 1 - 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/tokio/src/runtime/mod.rs b/tokio/src/runtime/mod.rs index 4b808b604f9..ae58ce6da86 100644 --- a/tokio/src/runtime/mod.rs +++ b/tokio/src/runtime/mod.rs @@ -456,6 +456,9 @@ cfg_rt! { 16384 }; + mod thread_id; + pub(crate) use thread_id::ThreadId; + pub(crate) mod metrics; pub use metrics::RuntimeMetrics; @@ -471,7 +474,4 @@ cfg_rt! { /// After thread starts / before thread stops type Callback = std::sync::Arc; - - mod thread_id; - pub(crate) use thread_id::ThreadId; } diff --git a/tokio/src/runtime/thread_id.rs b/tokio/src/runtime/thread_id.rs index c8c44c0b0e3..ef392897963 100644 --- a/tokio/src/runtime/thread_id.rs +++ b/tokio/src/runtime/thread_id.rs @@ -4,7 +4,6 @@ use std::num::NonZeroU64; pub(crate) struct ThreadId(NonZeroU64); impl ThreadId { - #[cfg_attr(not(feature = "rt"), expect(dead_code))] pub(crate) fn next() -> Self { use crate::loom::sync::atomic::{Ordering::Relaxed, StaticAtomicU64}; From b63057c782dd2abbbc6f3b985ff08e547c796e67 Mon Sep 17 00:00:00 2001 From: Qi Date: Tue, 25 Nov 2025 21:59:02 +0800 Subject: [PATCH 095/100] isolate two kinds of timer implementations Signed-off-by: ADD-SP --- tokio/Cargo.toml | 1 + tokio/src/io/async_write.rs | 8 +- tokio/src/macros/cfg.rs | 25 +- tokio/src/net/unix/stream.rs | 2 +- tokio/src/runtime/builder.rs | 47 +- tokio/src/runtime/driver.rs | 24 +- tokio/src/runtime/mod.rs | 127 ++++ .../runtime/scheduler/current_thread/mod.rs | 241 +----- tokio/src/runtime/scheduler/mod.rs | 94 ++- .../runtime/scheduler/multi_thread/handle.rs | 22 +- .../src/runtime/scheduler/multi_thread/mod.rs | 4 +- .../runtime/scheduler/multi_thread/worker.rs | 408 ++++++----- tokio/src/runtime/scheduler/util/mod.rs | 5 +- .../scheduler/util/{time.rs => time_alt.rs} | 10 +- tokio/src/runtime/time/entry.rs | 693 ++++++++++++++++++ tokio/src/runtime/time/handle.rs | 65 +- tokio/src/runtime/time/mod.rs | 427 +++++++++-- tokio/src/runtime/time/source.rs | 7 +- tokio/src/runtime/time/tests/mod.rs | 385 ++++------ tokio/src/runtime/time/wheel/level.rs | 20 +- tokio/src/runtime/time/wheel/mod.rs | 151 ++-- .../wheel => time_alt}/cancellation_queue.rs | 3 +- .../cancellation_queue/tests.rs | 4 +- tokio/src/runtime/time_alt/context.rs | 47 ++ .../runtime/{time/wheel => time_alt}/entry.rs | 2 +- tokio/src/runtime/time_alt/mod.rs | 21 + .../wheel => time_alt}/registration_queue.rs | 6 +- .../time_alt/registration_queue/tests.rs | 53 ++ tokio/src/runtime/{time => time_alt}/timer.rs | 83 ++- .../{time/wheel => time_alt}/wake_queue.rs | 6 +- .../src/runtime/time_alt/wake_queue/tests.rs | 66 ++ tokio/src/runtime/time_alt/wheel/level.rs | 194 +++++ tokio/src/runtime/time_alt/wheel/mod.rs | 293 ++++++++ tokio/src/time/error.rs | 5 + tokio/src/time/interval.rs | 8 +- tokio/src/time/sleep.rs | 70 +- tokio/src/util/mod.rs | 3 + tokio/tests/time_alt.rs | 112 +++ tokio/tests/time_panic.rs | 63 +- tokio/tests/time_rt.rs | 122 ++- 40 files changed, 2875 insertions(+), 1052 deletions(-) rename tokio/src/runtime/scheduler/util/{time.rs => time_alt.rs} (94%) create mode 100644 tokio/src/runtime/time/entry.rs rename tokio/src/runtime/{time/wheel => time_alt}/cancellation_queue.rs (96%) rename tokio/src/runtime/{time/wheel => time_alt}/cancellation_queue/tests.rs (96%) create mode 100644 tokio/src/runtime/time_alt/context.rs rename tokio/src/runtime/{time/wheel => time_alt}/entry.rs (99%) create mode 100644 tokio/src/runtime/time_alt/mod.rs rename tokio/src/runtime/{time/wheel => time_alt}/registration_queue.rs (91%) create mode 100644 tokio/src/runtime/time_alt/registration_queue/tests.rs rename tokio/src/runtime/{time => time_alt}/timer.rs (69%) rename tokio/src/runtime/{time/wheel => time_alt}/wake_queue.rs (92%) create mode 100644 tokio/src/runtime/time_alt/wake_queue/tests.rs create mode 100644 tokio/src/runtime/time_alt/wheel/level.rs create mode 100644 tokio/src/runtime/time_alt/wheel/mod.rs create mode 100644 tokio/tests/time_alt.rs diff --git a/tokio/Cargo.toml b/tokio/Cargo.toml index e87d2ad0381..7caea7d09d1 100644 --- a/tokio/Cargo.toml +++ b/tokio/Cargo.toml @@ -140,6 +140,7 @@ tokio-test = { version = "0.4.0", path = "../tokio-test" } tokio-stream = { version = "0.1", path = "../tokio-stream" } tokio-util = { version = "0.7", path = "../tokio-util", features = ["rt"] } futures = { version = "0.3.0", features = ["async-await"] } +futures-test = "0.3.31" mockall = "0.13.0" async-stream = "0.3" futures-concurrency = "7.6.3" diff --git a/tokio/src/io/async_write.rs b/tokio/src/io/async_write.rs index e7da1a9c03f..3c315a1bf9d 100644 --- a/tokio/src/io/async_write.rs +++ b/tokio/src/io/async_write.rs @@ -54,7 +54,7 @@ pub trait AsyncWrite { self: Pin<&mut Self>, cx: &mut Context<'_>, buf: &[u8], - ) -> Poll>; + ) -> Poll>; /// Attempts to flush the object, ensuring that any buffered data reach /// their destination. @@ -65,7 +65,7 @@ pub trait AsyncWrite { /// `Poll::Pending` and arranges for the current task (via /// `cx.waker()`) to receive a notification when the object can make /// progress towards flushing. - fn poll_flush(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll>; + fn poll_flush(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll>; /// Initiates or attempts to shut down this writer, returning success when /// the I/O connection has completely shut down. @@ -125,7 +125,7 @@ pub trait AsyncWrite { /// /// This function will panic if not called within the context of a future's /// task. - fn poll_shutdown(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll>; + fn poll_shutdown(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll>; /// Like [`poll_write`], except that it writes from a slice of buffers. /// @@ -154,7 +154,7 @@ pub trait AsyncWrite { self: Pin<&mut Self>, cx: &mut Context<'_>, bufs: &[IoSlice<'_>], - ) -> Poll> { + ) -> Poll> { let buf = bufs .iter() .find(|b| !b.is_empty()) diff --git a/tokio/src/macros/cfg.rs b/tokio/src/macros/cfg.rs index d55f9ae6d47..9af23b01cbd 100644 --- a/tokio/src/macros/cfg.rs +++ b/tokio/src/macros/cfg.rs @@ -94,6 +94,7 @@ macro_rules! cfg_atomic_waker_impl { feature = "process", feature = "rt", feature = "signal", + feature = "time", ))] #[cfg(not(loom))] $item @@ -710,27 +711,3 @@ macro_rules! cfg_io_uring { )* }; } - -macro_rules! cfg_rt_and_time{ - ($($item:item)*) => { - $( - #[cfg(all( - feature = "rt", - feature = "time", - ))] - $item - )* - }; -} - -macro_rules! cfg_rt_or_time{ - ($($item:item)*) => { - $( - #[cfg(any( - feature = "rt", - feature = "time", - ))] - $item - )* - }; -} diff --git a/tokio/src/net/unix/stream.rs b/tokio/src/net/unix/stream.rs index 26fe9e406d7..2391d54b46e 100644 --- a/tokio/src/net/unix/stream.rs +++ b/tokio/src/net/unix/stream.rs @@ -974,7 +974,7 @@ impl UnixStream { /// Unlike [`split`], the owned halves can be moved to separate tasks, however /// this comes at the cost of a heap allocation. /// - /// **Note:** Dropping the write half will shut down the write half of the + /// **Note:** Dropping the write half will only shut down the write half of the /// stream. This is equivalent to calling [`shutdown()`] on the `UnixStream`. /// /// [`split`]: Self::split() diff --git a/tokio/src/runtime/builder.rs b/tokio/src/runtime/builder.rs index 9aae69ab98f..a14d336f497 100644 --- a/tokio/src/runtime/builder.rs +++ b/tokio/src/runtime/builder.rs @@ -1,7 +1,9 @@ #![cfg_attr(loom, allow(unused_imports))] use crate::runtime::handle::Handle; -use crate::runtime::{blocking, driver, Callback, HistogramBuilder, Runtime, TaskCallback}; +use crate::runtime::{ + blocking, driver, Callback, HistogramBuilder, Runtime, TaskCallback, TimerFlavor, +}; #[cfg(tokio_unstable)] use crate::runtime::{metrics::HistogramConfiguration, LocalOptions, LocalRuntime, TaskMeta}; use crate::util::rand::{RngSeed, RngSeedGenerator}; @@ -133,6 +135,8 @@ pub struct Builder { #[cfg(tokio_unstable)] pub(super) unhandled_panic: UnhandledPanic, + + timer_flavor: TimerFlavor, } cfg_unstable! { @@ -318,6 +322,8 @@ impl Builder { metrics_poll_count_histogram: HistogramBuilder::default(), disable_lifo_slot: false, + + timer_flavor: TimerFlavor::Traditional, } } @@ -363,6 +369,39 @@ impl Builder { self } + /// Enables the alternative timer implementation, which is disabled by default. + /// + /// The alternative timer implementation is an unstable feature that may + /// provide better performance on multi-threaded runtimes with a large number + /// of worker threads. + /// + /// This option only applies to multi-threaded runtimes. Attempting to use + /// this option with any other runtime type will have no effect. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(not(target_family = "wasm"))] + /// # { + /// use tokio::runtime; + /// + /// let rt = runtime::Builder::new_multi_thread() + /// .enable_alt_timer() + /// .build() + /// .unwrap(); + /// # } + /// ``` + #[cfg(all(tokio_unstable, feature = "time", feature = "rt-multi-thread"))] + #[cfg_attr( + docsrs, + doc(cfg(all(tokio_unstable, feature = "time", feature = "rt-multi-thread"))) + )] + pub fn enable_alt_timer(&mut self) -> &mut Self { + self.enable_time(); + self.timer_flavor = TimerFlavor::Alternative; + self + } + /// Sets the number of worker threads the `Runtime` will use. /// /// This can be any number above 0 though it is advised to keep this value @@ -992,6 +1031,7 @@ impl Builder { enable_time: self.enable_time, start_paused: self.start_paused, nevents: self.nevents, + timer_flavor: self.timer_flavor, } } @@ -1544,7 +1584,9 @@ impl Builder { use crate::runtime::scheduler; use crate::runtime::Config; - let (driver, driver_handle) = driver::Driver::new(self.get_cfg())?; + let mut cfg = self.get_cfg(); + cfg.timer_flavor = TimerFlavor::Traditional; + let (driver, driver_handle) = driver::Driver::new(cfg)?; // Blocking pool let blocking_pool = blocking::create_blocking_pool(self, self.max_blocking_threads); @@ -1761,6 +1803,7 @@ cfg_rt_multi_thread! { seed_generator: seed_generator_1, metrics_poll_count_histogram: self.metrics_poll_count_histogram_builder(), }, + self.timer_flavor, ); let handle = Handle { inner: scheduler::Handle::MultiThread(handle) }; diff --git a/tokio/src/runtime/driver.rs b/tokio/src/runtime/driver.rs index a01e7341785..92b2350db9d 100644 --- a/tokio/src/runtime/driver.rs +++ b/tokio/src/runtime/driver.rs @@ -40,6 +40,7 @@ pub(crate) struct Cfg { pub(crate) enable_pause_time: bool, pub(crate) start_paused: bool, pub(crate) nevents: usize, + pub(crate) timer_flavor: crate::runtime::TimerFlavor, } impl Driver { @@ -48,7 +49,8 @@ impl Driver { let clock = create_clock(cfg.enable_pause_time, cfg.start_paused); - let (time_driver, time_handle) = create_time_driver(cfg.enable_time, io_stack, &clock); + let (time_driver, time_handle) = + create_time_driver(cfg.enable_time, cfg.timer_flavor, io_stack, &clock); Ok(( Self { inner: time_driver }, @@ -113,6 +115,7 @@ impl Handle { .expect("A Tokio 1.x context was found, but timers are disabled. Call `enable_time` on the runtime builder to enable timers.") } + #[cfg(tokio_unstable)] pub(crate) fn with_time(&self, f: F) -> R where F: FnOnce(Option<&crate::runtime::time::Handle>) -> R, @@ -288,6 +291,7 @@ cfg_time! { Enabled { driver: crate::runtime::time::Driver, }, + EnabledAlt(IoStack), Disabled(IoStack), } @@ -300,13 +304,21 @@ cfg_time! { fn create_time_driver( enable: bool, + timer_flavor: crate::runtime::TimerFlavor, io_stack: IoStack, clock: &Clock, ) -> (TimeDriver, TimeHandle) { if enable { - let (driver, handle) = crate::runtime::time::Driver::new(io_stack, clock); - - (TimeDriver::Enabled { driver }, Some(handle)) + match timer_flavor { + crate::runtime::TimerFlavor::Traditional => { + let (driver, handle) = crate::runtime::time::Driver::new(io_stack, clock); + (TimeDriver::Enabled { driver }, Some(handle)) + } + #[cfg(all(tokio_unstable, feature = "rt-multi-thread"))] + crate::runtime::TimerFlavor::Alternative => { + (TimeDriver::EnabledAlt(io_stack), Some(crate::runtime::time::Driver::new_alt(clock))) + } + } } else { (TimeDriver::Disabled(io_stack), None) } @@ -316,6 +328,7 @@ cfg_time! { pub(crate) fn park(&mut self, handle: &Handle) { match self { TimeDriver::Enabled { driver, .. } => driver.park(handle), + TimeDriver::EnabledAlt(v) => v.park(handle), TimeDriver::Disabled(v) => v.park(handle), } } @@ -323,6 +336,7 @@ cfg_time! { pub(crate) fn park_timeout(&mut self, handle: &Handle, duration: Duration) { match self { TimeDriver::Enabled { driver } => driver.park_timeout(handle, duration), + TimeDriver::EnabledAlt(v) => v.park_timeout(handle, duration), TimeDriver::Disabled(v) => v.park_timeout(handle, duration), } } @@ -330,6 +344,7 @@ cfg_time! { pub(crate) fn shutdown(&mut self, handle: &Handle) { match self { TimeDriver::Enabled { driver } => driver.shutdown(handle), + TimeDriver::EnabledAlt(v) => v.shutdown(handle), TimeDriver::Disabled(v) => v.shutdown(handle), } } @@ -348,6 +363,7 @@ cfg_not_time! { fn create_time_driver( _enable: bool, + _timer_flavor: crate::runtime::TimerFlavor, io_stack: IoStack, _clock: &Clock, ) -> (TimeDriver, TimeHandle) { diff --git a/tokio/src/runtime/mod.rs b/tokio/src/runtime/mod.rs index ae58ce6da86..92a159b38cb 100644 --- a/tokio/src/runtime/mod.rs +++ b/tokio/src/runtime/mod.rs @@ -389,8 +389,135 @@ cfg_process_driver! { mod process; } +#[cfg_attr(not(feature = "time"), allow(dead_code))] +#[derive(Debug, Copy, Clone, PartialEq)] +pub(crate) enum TimerFlavor { + Traditional, + #[cfg(all(tokio_unstable, feature = "rt-multi-thread"))] + Alternative, +} + cfg_time! { pub(crate) mod time; + + #[cfg(all(tokio_unstable, feature = "rt-multi-thread"))] + pub(crate) mod time_alt; + + use std::task::{Context, Poll}; + use std::pin::Pin; + + #[derive(Debug)] + pub(crate) enum Timer { + Traditional(time::TimerEntry), + + #[cfg(all(tokio_unstable, feature = "rt-multi-thread"))] + Alternative(time_alt::Timer), + } + + impl Timer { + #[track_caller] + pub(crate) fn new( + handle: crate::runtime::scheduler::Handle, + deadline: crate::time::Instant, + ) -> Self { + match handle.timer_flavor() { + crate::runtime::TimerFlavor::Traditional => { + Timer::Traditional(time::TimerEntry::new(handle, deadline)) + } + #[cfg(all(tokio_unstable, feature = "rt-multi-thread"))] + crate::runtime::TimerFlavor::Alternative => { + Timer::Alternative(time_alt::Timer::new(handle, deadline)) + } + } + } + + pub(crate) fn deadline(&self) -> crate::time::Instant { + match self { + Timer::Traditional(entry) => entry.deadline(), + #[cfg(all(tokio_unstable, feature = "rt-multi-thread"))] + Timer::Alternative(entry) => entry.deadline(), + } + } + + pub(crate) fn is_elapsed(&self) -> bool { + match self { + Timer::Traditional(entry) => entry.is_elapsed(), + #[cfg(all(tokio_unstable, feature = "rt-multi-thread"))] + Timer::Alternative(entry) => entry.is_elapsed(), + } + } + + pub(crate) fn flavor(self: Pin<&Self>) -> TimerFlavor { + match self.get_ref() { + Timer::Traditional(_) => TimerFlavor::Traditional, + #[cfg(all(tokio_unstable, feature = "rt-multi-thread"))] + Timer::Alternative(_) => TimerFlavor::Alternative, + } + } + + pub(crate) fn reset( + self: Pin<&mut Self>, + new_time: crate::time::Instant, + reregister: bool + ) { + // Safety: we never move the inner entries. + let this = unsafe { self.get_unchecked_mut() }; + match this { + Timer::Traditional(entry) => { + // Safety: we never move the inner entries. + unsafe { Pin::new_unchecked(entry).reset(new_time, reregister); } + } + #[cfg(all(tokio_unstable, feature = "rt-multi-thread"))] + Timer::Alternative(_) => panic!("not implemented yet"), + } + } + + pub(crate) fn poll_elapsed( + self: Pin<&mut Self>, + cx: &mut Context<'_>, + ) -> Poll> { + // Safety: we never move the inner entries. + let this = unsafe { self.get_unchecked_mut() }; + match this { + Timer::Traditional(entry) => { + // Safety: we never move the inner entries. + unsafe { Pin::new_unchecked(entry).poll_elapsed(cx) } + } + #[cfg(all(tokio_unstable, feature = "rt-multi-thread"))] + Timer::Alternative(entry) => { + // Safety: we never move the inner entries. + unsafe { Pin::new_unchecked(entry).poll_elapsed(cx).map(Ok) } + } + } + } + + #[cfg(all(tokio_unstable, feature = "rt-multi-thread"))] + pub(crate) fn scheduler_handle(&self) -> &crate::runtime::scheduler::Handle { + match self { + Timer::Traditional(_) => unreachable!("we should not call this on Traditional Timer"), + #[cfg(all(tokio_unstable, feature = "rt-multi-thread"))] + Timer::Alternative(entry) => entry.scheduler_handle(), + } + } + + #[cfg(all(tokio_unstable, feature = "tracing"))] + pub(crate) fn driver(self: Pin<&Self>) -> &crate::runtime::time::Handle { + match self.get_ref() { + Timer::Traditional(entry) => entry.driver(), + #[cfg(all(tokio_unstable, feature = "rt-multi-thread"))] + Timer::Alternative(entry) => entry.driver(), + } + } + + #[cfg(all(tokio_unstable, feature = "tracing"))] + pub(crate) fn clock(self: Pin<&Self>) -> &crate::time::Clock { + match self.get_ref() { + Timer::Traditional(entry) => entry.clock(), + #[cfg(all(tokio_unstable, feature = "rt-multi-thread"))] + Timer::Alternative(entry) => entry.clock(), + } + } + } } cfg_signal_internal_and_unix! { diff --git a/tokio/src/runtime/scheduler/current_thread/mod.rs b/tokio/src/runtime/scheduler/current_thread/mod.rs index a67ada588b2..b505035aa45 100644 --- a/tokio/src/runtime/scheduler/current_thread/mod.rs +++ b/tokio/src/runtime/scheduler/current_thread/mod.rs @@ -1,4 +1,4 @@ -use crate::loom::sync::atomic::{AtomicBool, Ordering}; +use crate::loom::sync::atomic::AtomicBool; use crate::loom::sync::Arc; use crate::runtime::driver::{self, Driver}; use crate::runtime::scheduler::{self, Defer, Inject}; @@ -22,13 +22,6 @@ use std::thread::ThreadId; use std::time::Duration; use std::{fmt, thread}; -cfg_time! { - use crate::runtime::scheduler::util; - use crate::runtime::time::EntryHandle; - use crate::runtime::time::WakeQueue; - use crate::loom::sync::Mutex; -} - /// Executes tasks on the current thread pub(crate) struct CurrentThread { /// Core scheduler data is acquired by a thread entering `block_on`. @@ -69,9 +62,6 @@ struct Core { /// Current tick tick: u32, - #[cfg(feature = "time")] - time_context: crate::runtime::time::Context2, - /// Runtime driver /// /// The driver is removed before starting to park the thread @@ -93,12 +83,6 @@ struct Shared { /// Remote run queue inject: Inject>, - #[cfg(feature = "time")] - /// Timers pending to be registered. - /// This is used to register a timer but the [`Core`] - /// is not available in the current thread. - inject_timers: Mutex>, - /// Collection of all active tasks spawned onto this executor. owned: OwnedTasks>, @@ -113,9 +97,6 @@ struct Shared { /// This scheduler only has one worker. worker_metrics: WorkerMetrics, - - /// Indicates that the runtime is shutting down. - is_shutdown: AtomicBool, } /// Thread-local context. @@ -171,14 +152,11 @@ impl CurrentThread { }, shared: Shared { inject: Inject::new(), - #[cfg(feature = "time")] - inject_timers: Mutex::new(Vec::new()), owned: OwnedTasks::new(1), woken: AtomicBool::new(false), config, scheduler_metrics: SchedulerMetrics::new(), worker_metrics, - is_shutdown: AtomicBool::new(false), }, driver: driver_handle, blocking_spawner, @@ -189,8 +167,6 @@ impl CurrentThread { let core = AtomicCell::new(Some(Box::new(Core { tasks: VecDeque::with_capacity(INITIAL_CAPACITY), tick: 0, - #[cfg(feature = "time")] - time_context: crate::runtime::time::Context2::new(), driver: Some(driver), metrics: MetricsBatch::new(&handle.shared.worker_metrics), global_queue_interval, @@ -290,8 +266,6 @@ impl CurrentThread { let core = shutdown2(core, handle); *context.core.borrow_mut() = Some(core); } - - handle.shared.is_shutdown.store(true, Ordering::SeqCst); } } @@ -301,16 +275,7 @@ fn shutdown2(mut core: Box, handle: &Handle) -> Box { // call returns. handle.shared.owned.close_and_shutdown_all(0); - #[cfg(feature = "time")] - { - util::time::shutdown_local_timers( - &mut core.time_context.wheel, - &mut core.time_context.canc_rx, - handle.take_remote_timers(), - &handle.driver, - ); - } - // Drain the local queue + // Drain local queue // We already shut down every task, so we just need to drop the task. while let Some(task) = core.next_local_task(handle) { drop(task); @@ -456,23 +421,12 @@ impl Context { driver: &mut Driver, duration: Option, ) -> Box { - debug_assert!(core.driver.is_none()); - let (core, ()) = self.enter(core, || { - let MaintainLocalTimer { - park_duration: duration, - auto_advance_duration, - } = self.maintain_local_timers_before_parking(handle, duration); - - if let Some(duration) = duration { - driver.park_timeout(&handle.driver, duration); - } else { - driver.park(&handle.driver); + match duration { + Some(dur) => driver.park_timeout(&handle.driver, dur), + None => driver.park(&handle.driver), } - self.defer.wake(); - - self.maintain_local_timers_after_parking(handle, auto_advance_duration); }); core @@ -495,165 +449,6 @@ impl Context { pub(crate) fn defer(&self, waker: &Waker) { self.defer.defer(waker); } - - cfg_time! { - /// Maintain local timers before parking the resource driver. - /// - /// * Remove cancelled timers from the local timer wheel. - /// * Register remote timers to the local timer wheel. - /// * Adjust the park duration based on - /// * the next timer expiration time. - /// * whether auto-advancing is required (feature = "test-util"). - /// - /// # Returns - /// - /// `(Box, park_duration, auto_advance_duration)` - fn maintain_local_timers_before_parking( - &self, - handle: &Handle, - park_duration: Option - ) -> MaintainLocalTimer { - let mut wake_queue = WakeQueue::new(); - - let (should_yield, next_timer) = context::with_scheduler(|maybe_cx| { - use scheduler::Context::CurrentThread; - - match maybe_cx { - Some(CurrentThread(cx)) if std::ptr::eq(Arc::as_ptr(&cx.handle), handle) => { - let mut maybe_core = cx.core.borrow_mut(); - let core = maybe_core.as_mut().expect("core missing"); - let time_cx = &mut core.time_context; - - util::time::process_registration_queue( - &mut time_cx.registration_queue, - &mut time_cx.wheel, - &time_cx.canc_tx, - &mut wake_queue, - ); - util::time::insert_inject_timers( - &mut time_cx.wheel, - &time_cx.canc_tx, - handle.take_remote_timers(), - &mut wake_queue, - ); - util::time::remove_cancelled_timers(&mut time_cx.wheel, &mut time_cx.canc_rx); - let should_yield = !wake_queue.is_empty(); - - let next_timer = - util::time::next_expiration_time(&time_cx.wheel, &handle.driver); - - (should_yield, next_timer) - } - _bad_cx => panic!("function is not called within the exact same runtime context"), - } - }); - - wake_queue.wake_all(); - - if should_yield { - MaintainLocalTimer { - park_duration: Some(Duration::ZERO), - auto_advance_duration: None, - } - } else { - let dur = util::time::min_duration(park_duration, next_timer); - if util::time::pre_auto_advance(&handle.driver, dur) { - MaintainLocalTimer { - park_duration: Some(Duration::ZERO), - auto_advance_duration: dur, - } - } else { - MaintainLocalTimer { - park_duration: dur, - auto_advance_duration: None, - } - } - } - } - - /// Maintain local timers after unparking the resource driver. - /// - /// * Auto-advance time, if required (feature = "test-util"). - /// * Process expired timers. - fn maintain_local_timers_after_parking( - &self, - handle: &Handle, - auto_advance_duration: Option - ) { - let mut wake_queue = WakeQueue::new(); - - context::with_scheduler(|maybe_cx| { - use scheduler::Context::CurrentThread; - - match maybe_cx { - Some(CurrentThread(cx)) if std::ptr::eq(Arc::as_ptr(&cx.handle), handle) => { - let mut maybe_core = cx.core.borrow_mut(); - let core = maybe_core.as_mut().expect("core missing"); - let time_cx = &mut core.time_context; - - util::time::post_auto_advance(&handle.driver, auto_advance_duration); - util::time::process_expired_timers(&mut time_cx.wheel, &handle.driver, &mut wake_queue); - } - _bad_cx => panic!("function is not called within the exact same runtime context"), - } - }); - - wake_queue.wake_all(); - } - - fn with_core(&self, f: F) -> R - where - F: FnOnce(Option<&mut Core>) -> R, - { - let mut core = self.core.borrow_mut(); - f(core.as_mut().map(|c| c.as_mut())) - } - - #[cfg(all(not(target_os = "wasi"), test))] - pub(crate) fn with_time_context2(&self, f: F) -> R - where - F: FnOnce(Option<&mut crate::runtime::time::Context2>) -> R, - { - self.with_core(|maybe_core| { - match maybe_core { - Some(core) => f(Some(&mut core.time_context)), - None => f(None), - } - }) - } - - pub(crate) fn with_registration_queue(&self, f: F) -> R - where - F: FnOnce(Option>) -> R, - { - self.with_core(|maybe_core| { - match maybe_core { - Some(core) => f(Some(crate::runtime::time::Context::Running { - registration_queue: &mut core.time_context.registration_queue, - elapsed: core.time_context.wheel.elapsed(), - })), - None => f(None), - } - }) - } - } // cfg_time! - - cfg_not_time! { - fn maintain_local_timers_before_parking( - &self, - _handle: &Handle, - park_duration: Option - ) -> MaintainLocalTimer { - MaintainLocalTimer { park_duration, auto_advance_duration: None } - } - - fn maintain_local_timers_after_parking( - &self, - _handle: &Handle, - _auto_advance_duration: Option - ) { - } - } // cfg_not_time! } // ===== impl Handle ===== @@ -802,26 +597,6 @@ impl Handle { assert_eq!(0, worker); &self.shared.worker_metrics } - - cfg_time! { - /// Push a timer handle from the remote thread. - pub(crate) fn push_remote_timer(&self, entry: EntryHandle) { - { - let mut inject_timers = self.shared.inject_timers.lock(); - inject_timers.push(entry); - } - self.driver.unpark(); - } - - pub(crate) fn take_remote_timers(&self) -> Vec { - let mut inject_timers = self.shared.inject_timers.lock(); - std::mem::take(&mut inject_timers) - } - - pub(crate) fn is_shutdown(&self) -> bool { - self.shared.is_shutdown.load(Ordering::SeqCst) - } - } } cfg_unstable_metrics! { @@ -1086,9 +861,3 @@ impl Drop for CoreGuard<'_> { } } } - -/// Returned by [`Context::maintain_local_timers_before_parking`]. -struct MaintainLocalTimer { - park_duration: Option, - auto_advance_duration: Option, -} diff --git a/tokio/src/runtime/scheduler/mod.rs b/tokio/src/runtime/scheduler/mod.rs index 14ac6dacce2..45d24ea288d 100644 --- a/tokio/src/runtime/scheduler/mod.rs +++ b/tokio/src/runtime/scheduler/mod.rs @@ -24,7 +24,7 @@ cfg_rt_multi_thread! { pub(crate) use multi_thread::MultiThread; } -pub(crate) mod util; +pub(super) mod util; use crate::runtime::driver; @@ -109,28 +109,45 @@ cfg_rt! { } } - cfg_time! { - /// Returns true if both handles belong to the same runtime instance. - pub(crate) fn is_same_runtime(&self, other: &Handle) -> bool { - match (self, other) { - (Handle::CurrentThread(a), Handle::CurrentThread(b)) => Arc::ptr_eq(a, b), - #[cfg(feature = "rt-multi-thread")] - (Handle::MultiThread(a), Handle::MultiThread(b)) => Arc::ptr_eq(a, b), - #[cfg(feature = "rt-multi-thread")] - _ => false, // different runtime types - } + #[cfg(feature = "time")] + pub(crate) fn timer_flavor(&self) -> crate::runtime::TimerFlavor { + match self { + Handle::CurrentThread(_) => crate::runtime::TimerFlavor::Traditional, + + #[cfg(feature = "rt-multi-thread")] + Handle::MultiThread(h) => h.timer_flavor, } + } - /// Returns true if the runtime is shutting down. - pub(crate) fn is_shutdown(&self) -> bool { - match_flavor!(self, Handle(h) => h.is_shutdown()) + #[cfg(all(tokio_unstable, feature = "rt-multi-thread", feature = "time"))] + /// Returns true if both handles belong to the same runtime instance. + pub(crate) fn is_same_runtime(&self, other: &Handle) -> bool { + match (self, other) { + (Handle::CurrentThread(a), Handle::CurrentThread(b)) => Arc::ptr_eq(a, b), + #[cfg(feature = "rt-multi-thread")] + (Handle::MultiThread(a), Handle::MultiThread(b)) => Arc::ptr_eq(a, b), + #[cfg(feature = "rt-multi-thread")] + _ => false, // different runtime types } + } - /// Push a timer entry that was created outside of this runtime - /// into the runtime-global queue. The pushed timer will be - /// processed by a random worker thread. - pub(crate) fn push_remote_timer(&self, entry_hdl: crate::runtime::time::EntryHandle) { - match_flavor!(self, Handle(h) => h.push_remote_timer(entry_hdl)) + #[cfg(all(tokio_unstable, feature = "rt-multi-thread", feature = "time"))] + /// Returns true if the runtime is shutting down. + pub(crate) fn is_shutdown(&self) -> bool { + match self { + Handle::CurrentThread(_) => panic!("the alternative timer implementation is not supported on CurrentThread runtime"), + Handle::MultiThread(h) => h.is_shutdown(), + } + } + + #[cfg(all(tokio_unstable, feature = "rt-multi-thread", feature = "time"))] + /// Push a timer entry that was created outside of this runtime + /// into the runtime-global queue. The pushed timer will be + /// processed by a random worker thread. + pub(crate) fn push_remote_timer(&self, entry_hdl: crate::runtime::time_alt::EntryHandle) { + match self { + Handle::CurrentThread(_) => panic!("the alternative timer implementation is not supported on CurrentThread runtime"), + Handle::MultiThread(h) => h.push_remote_timer(entry_hdl), } } @@ -276,23 +293,28 @@ cfg_rt! { match_flavor!(self, Context(context) => context.defer(waker)); } - cfg_time! { - pub(crate) fn with_registration_queue(&self, f: F) -> R - where - F: FnOnce(Option>) -> R, - { - match_flavor!(self, Context(context) => context.with_registration_queue(f)) - } - - #[cfg(all(not(target_os = "wasi"), test))] - pub(crate) fn with_time_context2(&self, f: F) -> R - where - F: FnOnce(Option<&mut crate::runtime::time::Context2>) -> R, - { - match_flavor!(self, Context(context) => context.with_time_context2(f)) + #[cfg(all(tokio_unstable, feature = "time", feature = "rt-multi-thread"))] + pub(crate) fn with_time_temp_local_context(&self, f: F) -> R + where + F: FnOnce(Option>) -> R, + { + match self { + Context::CurrentThread(_) => panic!("the alternative timer implementation is not supported on CurrentThread runtime"), + Context::MultiThread(context) => context.with_time_temp_local_context(f), } } + // #[cfg(all(tokio_unstable, feature = "time", feature = "rt-multi-thread", not(target_os = "wasi"), test))] + // pub(crate) fn with_time_local_context(&self, f: F) -> R + // where + // F: FnOnce(Option<&mut crate::runtime::time_alt::LocalContext>) -> R, + // { + // match self { + // Context::CurrentThread(_) => panic!("the alternative timer implementation is not supported on CurrentThread runtime"), + // Context::MultiThread(context) => context.with_time_local_context(f), + // } + // } + cfg_rt_multi_thread! { #[track_caller] pub(crate) fn expect_multi_thread(&self) -> &multi_thread::Context { @@ -317,5 +339,11 @@ cfg_not_rt! { pub(crate) fn current() -> Handle { panic!("{}", crate::util::error::CONTEXT_MISSING_ERROR) } + + #[cfg_attr(not(feature = "time"), allow(dead_code))] + #[track_caller] + pub(crate) fn timer_flavor(&self) -> crate::runtime::TimerFlavor { + panic!("{}", crate::util::error::CONTEXT_MISSING_ERROR) + } } } diff --git a/tokio/src/runtime/scheduler/multi_thread/handle.rs b/tokio/src/runtime/scheduler/multi_thread/handle.rs index 7c74ea007cc..14d65294c08 100644 --- a/tokio/src/runtime/scheduler/multi_thread/handle.rs +++ b/tokio/src/runtime/scheduler/multi_thread/handle.rs @@ -1,12 +1,11 @@ use crate::future::Future; -use crate::loom::sync::atomic::{AtomicBool, Ordering}; use crate::loom::sync::Arc; use crate::runtime::scheduler::multi_thread::worker; use crate::runtime::task::{Notified, Task, TaskHarnessScheduleHooks}; use crate::runtime::{ blocking, driver, task::{self, JoinHandle, SpawnLocation}, - TaskHooks, TaskMeta, + TaskHooks, TaskMeta, TimerFlavor, }; use crate::util::RngSeedGenerator; @@ -18,6 +17,9 @@ cfg_taskdump! { mod taskdump; } +#[cfg(all(tokio_unstable, feature = "time"))] +use crate::loom::sync::atomic::{AtomicBool, Ordering::SeqCst}; + /// Handle to the multi thread scheduler pub(crate) struct Handle { /// Task spawner @@ -35,6 +37,11 @@ pub(crate) struct Handle { /// User-supplied hooks to invoke for things pub(crate) task_hooks: TaskHooks, + #[cfg_attr(not(feature = "time"), allow(dead_code))] + /// Timer flavor used by the runtime + pub(crate) timer_flavor: TimerFlavor, + + #[cfg(all(tokio_unstable, feature = "time"))] /// Indicates that the runtime is shutting down. pub(crate) is_shutdown: AtomicBool, } @@ -54,15 +61,16 @@ impl Handle { Self::bind_new_task(me, future, id, spawned_at) } - cfg_time! { - pub(crate) fn is_shutdown(&self) -> bool { - self.is_shutdown.load(Ordering::SeqCst) - } + #[cfg(all(tokio_unstable, feature = "time"))] + pub(crate) fn is_shutdown(&self) -> bool { + self.is_shutdown + .load(crate::loom::sync::atomic::Ordering::SeqCst) } pub(crate) fn shutdown(&self) { - self.is_shutdown.store(true, Ordering::SeqCst); self.close(); + #[cfg(all(tokio_unstable, feature = "time"))] + self.is_shutdown.store(true, SeqCst); } #[track_caller] diff --git a/tokio/src/runtime/scheduler/multi_thread/mod.rs b/tokio/src/runtime/scheduler/multi_thread/mod.rs index d85a0ae0a2a..1c5e1a88884 100644 --- a/tokio/src/runtime/scheduler/multi_thread/mod.rs +++ b/tokio/src/runtime/scheduler/multi_thread/mod.rs @@ -41,7 +41,7 @@ use crate::loom::sync::Arc; use crate::runtime::{ blocking, driver::{self, Driver}, - scheduler, Config, + scheduler, Config, TimerFlavor, }; use crate::util::RngSeedGenerator; @@ -61,6 +61,7 @@ impl MultiThread { blocking_spawner: blocking::Spawner, seed_generator: RngSeedGenerator, config: Config, + timer_flavor: TimerFlavor, ) -> (MultiThread, Arc, Launch) { let parker = Parker::new(driver); let (handle, launch) = worker::create( @@ -70,6 +71,7 @@ impl MultiThread { blocking_spawner, seed_generator, config, + timer_flavor, ); (MultiThread, handle, launch) diff --git a/tokio/src/runtime/scheduler/multi_thread/worker.rs b/tokio/src/runtime/scheduler/multi_thread/worker.rs index 2078fb40bec..559ee5db3c8 100644 --- a/tokio/src/runtime/scheduler/multi_thread/worker.rs +++ b/tokio/src/runtime/scheduler/multi_thread/worker.rs @@ -56,7 +56,6 @@ //! the inject queue indefinitely. This would be a ref-count cycle and a memory //! leak. -use crate::loom::sync::atomic::AtomicBool; use crate::loom::sync::{Arc, Mutex}; use crate::runtime; use crate::runtime::scheduler::multi_thread::{ @@ -64,7 +63,9 @@ use crate::runtime::scheduler::multi_thread::{ }; use crate::runtime::scheduler::{inject, Defer, Lock}; use crate::runtime::task::OwnedTasks; -use crate::runtime::{blocking, driver, scheduler, task, Config, SchedulerMetrics, WorkerMetrics}; +use crate::runtime::{ + blocking, driver, scheduler, task, Config, SchedulerMetrics, TimerFlavor, WorkerMetrics, +}; use crate::runtime::{context, TaskHooks}; use crate::task::coop; use crate::util::atomic_cell::AtomicCell; @@ -75,12 +76,6 @@ use std::task::Waker; use std::thread; use std::time::Duration; -cfg_time! { - use crate::runtime::scheduler::util; - use crate::runtime::time::EntryHandle; - use crate::runtime::time::WakeQueue; -} - mod metrics; cfg_taskdump! { @@ -91,6 +86,15 @@ cfg_not_taskdump! { mod taskdump_mock; } +#[cfg(all(tokio_unstable, feature = "time"))] +use crate::loom::sync::atomic::AtomicBool; + +#[cfg(all(tokio_unstable, feature = "time"))] +use crate::runtime::time_alt; + +#[cfg(all(tokio_unstable, feature = "time"))] +use crate::runtime::scheduler::util; + /// A scheduler worker pub(super) struct Worker { /// Reference to scheduler's handle @@ -122,8 +126,8 @@ struct Core { /// The worker-local run queue. run_queue: queue::Local>, - #[cfg(feature = "time")] - time_context: crate::runtime::time::Context2, + #[cfg(all(tokio_unstable, feature = "time"))] + time_context: time_alt::LocalContext, /// True if the worker is currently searching for more work. Searching /// involves attempting to steal from other workers. @@ -204,11 +208,11 @@ pub(crate) struct Synced { /// Synchronized state for `Inject`. pub(crate) inject: inject::Synced, - #[cfg(feature = "time")] + #[cfg(all(tokio_unstable, feature = "time"))] /// Timers pending to be registered. /// This is used to register a timer but the [`Core`] /// is not available in the current thread. - inject_timers: Vec, + inject_timers: Vec, } /// Used to communicate with a worker from other threads. @@ -257,6 +261,7 @@ pub(super) fn create( blocking_spawner: blocking::Spawner, seed_generator: RngSeedGenerator, config: Config, + timer_flavor: TimerFlavor, ) -> (Arc, Launch) { let mut cores = Vec::with_capacity(size); let mut remotes = Vec::with_capacity(size); @@ -276,8 +281,8 @@ pub(super) fn create( lifo_slot: None, lifo_enabled: !config.disable_lifo_slot, run_queue, - #[cfg(feature = "time")] - time_context: crate::runtime::time::Context2::new(), + #[cfg(all(tokio_unstable, feature = "time"))] + time_context: time_alt::LocalContext::new(), is_searching: false, is_shutdown: false, is_traced: false, @@ -305,8 +310,8 @@ pub(super) fn create( synced: Mutex::new(Synced { idle: idle_synced, inject: inject_synced, - #[cfg(feature = "time")] - inject_timers: vec![], + #[cfg(all(tokio_unstable, feature = "time"))] + inject_timers: Vec::new(), }), shutdown_cores: Mutex::new(vec![]), trace_status: TraceStatus::new(remotes_len), @@ -318,6 +323,8 @@ pub(super) fn create( driver: driver_handle, blocking_spawner, seed_generator, + timer_flavor, + #[cfg(all(tokio_unstable, feature = "time"))] is_shutdown: AtomicBool::new(false), }); @@ -581,15 +588,21 @@ impl Context { } } - #[cfg(feature = "time")] + #[cfg(all(tokio_unstable, feature = "time"))] { - util::time::shutdown_local_timers( - &mut core.time_context.wheel, - &mut core.time_context.canc_rx, - self.worker.handle.take_remote_timers(), - &self.worker.handle.driver, - ); + match self.worker.handle.timer_flavor { + TimerFlavor::Traditional => {} + TimerFlavor::Alternative => { + util::time_alt::shutdown_local_timers( + &mut core.time_context.wheel, + &mut core.time_context.canc_rx, + self.worker.handle.take_remote_timers(), + &self.worker.handle.driver, + ); + } + } } + core.pre_shutdown(&self.worker); // Signal shutdown self.worker.handle.shutdown_core(core); @@ -792,18 +805,25 @@ impl Context { // Take the parker out of core let mut park = core.park.take().expect("park missing"); - // Store `core` in context *self.core.borrow_mut() = Some(core); - // Must happens after taking out the parker, as the `Handle::schedule_local` - // will delay the notify if the parker taken out. - // - // See comments in `Handle::schedule_local` for more details. - let MaintainLocalTimer { - park_duration: duration, - auto_advance_duration, - } = self.maintain_local_timers_before_parking(duration); + #[cfg(feature = "time")] + let (duration, auto_advance_duration) = match self.worker.handle.timer_flavor { + TimerFlavor::Traditional => (duration, None::), + #[cfg(tokio_unstable)] + TimerFlavor::Alternative => { + // Must happens after taking out the parker, as the `Handle::schedule_local` + // will delay the notify if the parker taken out. + // + // See comments in `Handle::schedule_local` for more details. + let MaintainLocalTimer { + park_duration: duration, + auto_advance_duration, + } = self.maintain_local_timers_before_parking(duration); + (duration, auto_advance_duration) + } + }; // Park thread if let Some(timeout) = duration { @@ -814,22 +834,30 @@ impl Context { self.defer.wake(); - // Must happens before placing back the parker, as the `Handle::schedule_local` - // will delay the notify if the parker is still in `core`. - // - // See comments in `Handle::schedule_local` for more details. - self.maintain_local_timers_after_parking(auto_advance_duration); + #[cfg(feature = "time")] + match self.worker.handle.timer_flavor { + TimerFlavor::Traditional => { + // suppress unused variable warning + let _ = auto_advance_duration; + } + #[cfg(tokio_unstable)] + TimerFlavor::Alternative => { + // Must happens before placing back the parker, as the `Handle::schedule_local` + // will delay the notify if the parker is still in `core`. + // + // See comments in `Handle::schedule_local` for more details. + self.maintain_local_timers_after_parking(auto_advance_duration); + } + } // Remove `core` from context core = self.core.borrow_mut().take().expect("core missing"); // Place `park` back in `core` core.park = Some(park); - if core.should_notify_others() { self.worker.handle.notify_parked_local(); } - core } @@ -843,166 +871,148 @@ impl Context { } } - cfg_time! { - /// Maintain local timers before parking the resource driver. - /// - /// * Remove cancelled timers from the local timer wheel. - /// * Register remote timers to the local timer wheel. - /// * Adjust the park duration based on - /// * the next timer expiration time. - /// * whether auto-advancing is required (feature = "test-util"). - /// - /// # Returns - /// - /// `(Box, park_duration, auto_advance_duration)` - fn maintain_local_timers_before_parking( - &self, - park_duration: Option - ) -> MaintainLocalTimer { - let handle = &self.worker.handle; - let mut wake_queue = WakeQueue::new(); - - let (should_yield, next_timer) = with_current(|maybe_cx| { - let cx = maybe_cx.expect("function should be called when core is present"); - assert_eq!( - Arc::as_ptr(&cx.worker.handle), - Arc::as_ptr(&self.worker.handle), - "function should be called on the exact same worker" - ); - - let mut maybe_core = cx.core.borrow_mut(); - let core = maybe_core.as_mut().expect("core missing"); - let time_cx = &mut core.time_context; - - util::time::process_registration_queue( - &mut time_cx.registration_queue, - &mut time_cx.wheel, - &time_cx.canc_tx, - &mut wake_queue, - ); - util::time::insert_inject_timers( - &mut time_cx.wheel, - &time_cx.canc_tx, - handle.take_remote_timers(), - &mut wake_queue, - ); - util::time::remove_cancelled_timers(&mut time_cx.wheel, &mut time_cx.canc_rx); - let should_yield = !wake_queue.is_empty(); - - let next_timer = - util::time::next_expiration_time(&time_cx.wheel, &handle.driver); - - (should_yield, next_timer) - }); + #[cfg(all(tokio_unstable, feature = "time"))] + /// Maintain local timers before parking the resource driver. + /// + /// * Remove cancelled timers from the local timer wheel. + /// * Register remote timers to the local timer wheel. + /// * Adjust the park duration based on + /// * the next timer expiration time. + /// * whether auto-advancing is required (feature = "test-util"). + /// + /// # Returns + /// + /// `(Box, park_duration, auto_advance_duration)` + fn maintain_local_timers_before_parking( + &self, + park_duration: Option, + ) -> MaintainLocalTimer { + let handle = &self.worker.handle; + let mut wake_queue = time_alt::WakeQueue::new(); + + let (should_yield, next_timer) = with_current(|maybe_cx| { + let cx = maybe_cx.expect("function should be called when core is present"); + assert_eq!( + Arc::as_ptr(&cx.worker.handle), + Arc::as_ptr(&self.worker.handle), + "function should be called on the exact same worker" + ); + + let mut maybe_core = cx.core.borrow_mut(); + let core = maybe_core.as_mut().expect("core missing"); + let time_cx = &mut core.time_context; + + util::time_alt::process_registration_queue( + &mut time_cx.registration_queue, + &mut time_cx.wheel, + &time_cx.canc_tx, + &mut wake_queue, + ); + util::time_alt::insert_inject_timers( + &mut time_cx.wheel, + &time_cx.canc_tx, + handle.take_remote_timers(), + &mut wake_queue, + ); + util::time_alt::remove_cancelled_timers(&mut time_cx.wheel, &mut time_cx.canc_rx); + let should_yield = !wake_queue.is_empty(); + + let next_timer = util::time_alt::next_expiration_time(&time_cx.wheel, &handle.driver); - wake_queue.wake_all(); + (should_yield, next_timer) + }); - if should_yield { + wake_queue.wake_all(); + + if should_yield { + MaintainLocalTimer { + park_duration: Some(Duration::from_millis(0)), + auto_advance_duration: None, + } + } else { + // get the minimum duration + let dur = util::time_alt::min_duration(park_duration, next_timer); + if util::time_alt::pre_auto_advance(&handle.driver, dur) { MaintainLocalTimer { - park_duration: Some(Duration::from_millis(0)), - auto_advance_duration: None, + park_duration: Some(Duration::ZERO), + auto_advance_duration: dur, } } else { - // get the minimum duration - let dur = util::time::min_duration(park_duration, next_timer); - if util::time::pre_auto_advance(&handle.driver, dur) { - MaintainLocalTimer { - park_duration: Some(Duration::ZERO), - auto_advance_duration: dur, - } - } else { - MaintainLocalTimer { - park_duration: dur, - auto_advance_duration: None, - } + MaintainLocalTimer { + park_duration: dur, + auto_advance_duration: None, } } } + } - /// Maintain local timers after unparking the resource driver. - /// - /// * Auto-advance time, if required (feature = "test-util"). - /// * Process expired timers. - fn maintain_local_timers_after_parking( - &self, - auto_advance_duration: Option - ) { - let handle = &self.worker.handle; - let mut wake_queue = WakeQueue::new(); + #[cfg(all(tokio_unstable, feature = "time"))] + /// Maintain local timers after unparking the resource driver. + /// + /// * Auto-advance time, if required (feature = "test-util"). + /// * Process expired timers. + fn maintain_local_timers_after_parking(&self, auto_advance_duration: Option) { + let handle = &self.worker.handle; + let mut wake_queue = time_alt::WakeQueue::new(); - with_current(|maybe_cx| { - let cx = maybe_cx.expect("function should be called when core is present"); - assert_eq!( - Arc::as_ptr(&cx.worker.handle), - Arc::as_ptr(&self.worker.handle), - "function should be called on the exact same worker" - ); - - let mut maybe_core = cx.core.borrow_mut(); - let core = maybe_core.as_mut().expect("core missing"); - let time_cx = &mut core.time_context; - - util::time::post_auto_advance(&handle.driver, auto_advance_duration); - util::time::process_expired_timers(&mut time_cx.wheel, &handle.driver, &mut wake_queue); - }); + with_current(|maybe_cx| { + let cx = maybe_cx.expect("function should be called when core is present"); + assert_eq!( + Arc::as_ptr(&cx.worker.handle), + Arc::as_ptr(&self.worker.handle), + "function should be called on the exact same worker" + ); - wake_queue.wake_all(); - } + let mut maybe_core = cx.core.borrow_mut(); + let core = maybe_core.as_mut().expect("core missing"); + let time_cx = &mut core.time_context; - fn with_core(&self, f: F) -> R - where - F: FnOnce(Option<&mut Core>) -> R, - { - match self.core.borrow_mut().as_mut() { - Some(core) => f(Some(core)), - None => f(None), - } - } + util::time_alt::post_auto_advance(&handle.driver, auto_advance_duration); + util::time_alt::process_expired_timers( + &mut time_cx.wheel, + &handle.driver, + &mut wake_queue, + ); + }); - #[cfg(all(not(target_os = "wasi"), test))] - pub(crate) fn with_time_context2(&self, f: F) -> R - where - F: FnOnce(Option<&mut crate::runtime::time::Context2>) -> R, - { - self.with_core(|maybe_core| { - match maybe_core { - Some(core) => f(Some(&mut core.time_context)), - None => f(None), - } - }) - } + wake_queue.wake_all(); + } - pub(crate) fn with_registration_queue(&self, f: F) -> R - where - F: FnOnce(Option>) -> R, - { - self.with_core(|maybe_core| { - match maybe_core { - Some(core) if core.is_shutdown => f(Some(crate::runtime::time::Context::Shutdown)), - Some(core) => f(Some(crate::runtime::time::Context::Running { - registration_queue: &mut core.time_context.registration_queue, - elapsed: core.time_context.wheel.elapsed(), - })), - None => f(None), - } - }) - } - } // cfg_time! - - cfg_not_time! { - fn maintain_local_timers_before_parking( - &self, - park_duration: Option - ) -> MaintainLocalTimer { - MaintainLocalTimer { park_duration, auto_advance_duration: None } + #[cfg(all(tokio_unstable, feature = "time"))] + fn with_core(&self, f: F) -> R + where + F: FnOnce(Option<&mut Core>) -> R, + { + match self.core.borrow_mut().as_mut() { + Some(core) => f(Some(core)), + None => f(None), } + } - fn maintain_local_timers_after_parking( - &self, - _auto_advance_duration: Option - ) { - } - } // cfg_not_time! + // #[cfg(all(tokio_unstable, feature = "time", not(target_os = "wasi"), test))] + // pub(crate) fn with_time_local_context(&self, f: F) -> R + // where + // F: FnOnce(Option<&mut time_alt::LocalContext>) -> R, + // { + // self.with_core(|maybe_core| match maybe_core { + // Some(core) => f(Some(&mut core.time_context)), + // None => f(None), + // }) + // } + + #[cfg(all(tokio_unstable, feature = "time"))] + pub(crate) fn with_time_temp_local_context(&self, f: F) -> R + where + F: FnOnce(Option>) -> R, + { + self.with_core(|maybe_core| match maybe_core { + Some(core) if core.is_shutdown => f(Some(time_alt::TempLocalContext::new_shutdown())), + Some(core) => f(Some(time_alt::TempLocalContext::new_running( + &mut core.time_context, + ))), + None => f(None), + }) + } } impl Core { @@ -1341,23 +1351,24 @@ impl Handle { } } - cfg_time! { - /// Push a timer handle from the remote thread. - pub(crate) fn push_remote_timer(&self, hdl: EntryHandle) { - { - let mut synced = self.shared.synced.lock(); - synced.inject_timers.push(hdl); - } - self.notify_parked_remote(); + #[cfg(all(tokio_unstable, feature = "time"))] + pub(crate) fn push_remote_timer(&self, hdl: time_alt::EntryHandle) { + assert_eq!(self.timer_flavor, TimerFlavor::Alternative,); + { + let mut synced = self.shared.synced.lock(); + synced.inject_timers.push(hdl); } + self.notify_parked_remote(); + } - pub(crate) fn take_remote_timers(&self) -> Vec { - // It's ok to lost the race, as another worker is - // draining the inject_timers. - match self.shared.synced.try_lock() { - Some(mut synced) => std::mem::take(&mut synced.inject_timers), - None => Vec::new(), - } + #[cfg(all(tokio_unstable, feature = "time"))] + pub(crate) fn take_remote_timers(&self) -> Vec { + assert_eq!(self.timer_flavor, TimerFlavor::Alternative,); + // It's ok to lost the race, as another worker is + // draining the inject_timers. + match self.shared.synced.try_lock() { + Some(mut synced) => std::mem::take(&mut synced.inject_timers), + None => Vec::new(), } } @@ -1479,6 +1490,7 @@ impl<'a> Lock for &'a Handle { } } +#[cfg(all(tokio_unstable, feature = "time"))] /// Returned by [`Context::maintain_local_timers_before_parking`]. struct MaintainLocalTimer { park_duration: Option, diff --git a/tokio/src/runtime/scheduler/util/mod.rs b/tokio/src/runtime/scheduler/util/mod.rs index 28de2070a41..bea582887fe 100644 --- a/tokio/src/runtime/scheduler/util/mod.rs +++ b/tokio/src/runtime/scheduler/util/mod.rs @@ -1,3 +1,2 @@ -cfg_rt_and_time! { - pub(crate) mod time; -} +#[cfg(all(tokio_unstable, feature = "time", feature = "rt-multi-thread"))] +pub(in crate::runtime) mod time_alt; diff --git a/tokio/src/runtime/scheduler/util/time.rs b/tokio/src/runtime/scheduler/util/time_alt.rs similarity index 94% rename from tokio/src/runtime/scheduler/util/time.rs rename to tokio/src/runtime/scheduler/util/time_alt.rs index dc833c09b17..e6ea35843ac 100644 --- a/tokio/src/runtime/scheduler/util/time.rs +++ b/tokio/src/runtime/scheduler/util/time_alt.rs @@ -1,8 +1,6 @@ use crate::runtime::scheduler::driver; -use crate::runtime::time::cancellation_queue::{Receiver, Sender}; -use crate::runtime::time::EntryHandle; -use crate::runtime::time::RegistrationQueue; -use crate::runtime::time::{WakeQueue, Wheel}; +use crate::runtime::time_alt::cancellation_queue::{Receiver, Sender}; +use crate::runtime::time_alt::{EntryHandle, RegistrationQueue, WakeQueue, Wheel}; use std::time::Duration; pub(crate) fn min_duration(a: Option, b: Option) -> Option { @@ -120,7 +118,7 @@ pub(crate) fn process_expired_timers( let time_source = time_hdl.time_source(); let now = time_source.now(clock); - time_hdl.process_at_time(wheel, now, wake_queue); + time_hdl.process_at_time_alt(wheel, now, wake_queue); }); } @@ -137,7 +135,7 @@ pub(crate) fn shutdown_local_timers( }; remove_cancelled_timers(wheel, rx); - time_hdl.shutdown(wheel); + time_hdl.shutdown_alt(wheel); let mut wake_queue = WakeQueue::new(); // simply wake all unregistered timers diff --git a/tokio/src/runtime/time/entry.rs b/tokio/src/runtime/time/entry.rs new file mode 100644 index 00000000000..736105d5abf --- /dev/null +++ b/tokio/src/runtime/time/entry.rs @@ -0,0 +1,693 @@ +//! Timer state structures. +//! +//! This module contains the heart of the intrusive timer implementation, and as +//! such the structures inside are full of tricky concurrency and unsafe code. +//! +//! # Ground rules +//! +//! The heart of the timer implementation here is the [`TimerShared`] structure, +//! shared between the [`TimerEntry`] and the driver. Generally, we permit access +//! to [`TimerShared`] ONLY via either 1) a mutable reference to [`TimerEntry`] or +//! 2) a held driver lock. +//! +//! It follows from this that any changes made while holding BOTH 1 and 2 will +//! be reliably visible, regardless of ordering. This is because of the `acq/rel` +//! fences on the driver lock ensuring ordering with 2, and rust mutable +//! reference rules for 1 (a mutable reference to an object can't be passed +//! between threads without an `acq/rel` barrier, and same-thread we have local +//! happens-before ordering). +//! +//! # State field +//! +//! Each timer has a state field associated with it. This field contains either +//! the current scheduled time, or a special flag value indicating its state. +//! This state can either indicate that the timer is on the 'pending' queue (and +//! thus will be fired with an `Ok(())` result soon) or that it has already been +//! fired/deregistered. +//! +//! This single state field allows for code that is firing the timer to +//! synchronize with any racing `reset` calls reliably. +//! +//! # Registered vs true timeouts +//! +//! To allow for the use case of a timeout that is periodically reset before +//! expiration to be as lightweight as possible, we support optimistically +//! lock-free timer resets, in the case where a timer is rescheduled to a later +//! point than it was originally scheduled for. +//! +//! This is accomplished by lazily rescheduling timers. That is, we update the +//! state field with the true expiration of the timer from the holder of +//! the [`TimerEntry`]. When the driver services timers (ie, whenever it's +//! walking lists of timers), it checks this "true when" value, and reschedules +//! based on it. +//! +//! We do, however, also need to track what the expiration time was when we +//! originally registered the timer; this is used to locate the right linked +//! list when the timer is being cancelled. +//! This is referred to as the `registered_when` internally. +//! +//! There is of course a race condition between timer reset and timer +//! expiration. If the driver fails to observe the updated expiration time, it +//! could trigger expiration of the timer too early. However, because +//! [`mark_pending`][mark_pending] performs a compare-and-swap, it will identify this race and +//! refuse to mark the timer as pending. +//! +//! [mark_pending]: TimerHandle::mark_pending + +use crate::loom::cell::UnsafeCell; +use crate::loom::sync::atomic::AtomicU64; +use crate::loom::sync::atomic::Ordering; + +use crate::runtime::scheduler; +use crate::sync::AtomicWaker; +use crate::time::Instant; +use crate::util::linked_list; + +use pin_project_lite::pin_project; +use std::task::{Context, Poll, Waker}; +use std::{marker::PhantomPinned, pin::Pin, ptr::NonNull}; + +type TimerResult = Result<(), crate::time::error::Error>; + +pub(in crate::runtime::time) const STATE_DEREGISTERED: u64 = u64::MAX; +const STATE_PENDING_FIRE: u64 = STATE_DEREGISTERED - 1; +const STATE_MIN_VALUE: u64 = STATE_PENDING_FIRE; +/// The largest safe integer to use for ticks. +/// +/// This value should be updated if any other signal values are added above. +pub(super) const MAX_SAFE_MILLIS_DURATION: u64 = STATE_MIN_VALUE - 1; + +/// This structure holds the current shared state of the timer - its scheduled +/// time (if registered), or otherwise the result of the timer completing, as +/// well as the registered waker. +/// +/// Generally, the `StateCell` is only permitted to be accessed from two contexts: +/// Either a thread holding the corresponding `&mut TimerEntry`, or a thread +/// holding the timer driver lock. The write actions on the `StateCell` amount to +/// passing "ownership" of the `StateCell` between these contexts; moving a timer +/// from the `TimerEntry` to the driver requires _both_ holding the `&mut +/// TimerEntry` and the driver lock, while moving it back (firing the timer) +/// requires only the driver lock. +pub(super) struct StateCell { + /// Holds either the scheduled expiration time for this timer, or (if the + /// timer has been fired and is unregistered), `u64::MAX`. + state: AtomicU64, + /// If the timer is fired (an Acquire order read on state shows + /// `u64::MAX`), holds the result that should be returned from + /// polling the timer. Otherwise, the contents are unspecified and reading + /// without holding the driver lock is undefined behavior. + result: UnsafeCell, + /// The currently-registered waker + waker: AtomicWaker, +} + +impl Default for StateCell { + fn default() -> Self { + Self::new() + } +} + +impl std::fmt::Debug for StateCell { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "StateCell({:?})", self.read_state()) + } +} + +impl StateCell { + fn new() -> Self { + Self { + state: AtomicU64::new(STATE_DEREGISTERED), + result: UnsafeCell::new(Ok(())), + waker: AtomicWaker::new(), + } + } + + fn is_pending(&self) -> bool { + self.state.load(Ordering::Relaxed) == STATE_PENDING_FIRE + } + + /// Returns the current expiration time, or None if not currently scheduled. + fn when(&self) -> Option { + let cur_state = self.state.load(Ordering::Relaxed); + + if cur_state == STATE_DEREGISTERED { + None + } else { + Some(cur_state) + } + } + + /// If the timer is completed, returns the result of the timer. Otherwise, + /// returns None and registers the waker. + fn poll(&self, waker: &Waker) -> Poll { + // We must register first. This ensures that either `fire` will + // observe the new waker, or we will observe a racing fire to have set + // the state, or both. + self.waker.register_by_ref(waker); + + self.read_state() + } + + fn read_state(&self) -> Poll { + let cur_state = self.state.load(Ordering::Acquire); + + if cur_state == STATE_DEREGISTERED { + // SAFETY: The driver has fired this timer; this involves writing + // the result, and then writing (with release ordering) the state + // field. + Poll::Ready(unsafe { self.result.with(|p| *p) }) + } else { + Poll::Pending + } + } + + /// Marks this timer as being moved to the pending list, if its scheduled + /// time is not after `not_after`. + /// + /// If the timer is scheduled for a time after `not_after`, returns an Err + /// containing the current scheduled time. + /// + /// SAFETY: Must hold the driver lock. + unsafe fn mark_pending(&self, not_after: u64) -> Result<(), u64> { + // Quick initial debug check to see if the timer is already fired. Since + // firing the timer can only happen with the driver lock held, we know + // we shouldn't be able to "miss" a transition to a fired state, even + // with relaxed ordering. + let mut cur_state = self.state.load(Ordering::Relaxed); + + loop { + // improve the error message for things like + // https://github.com/tokio-rs/tokio/issues/3675 + assert!( + cur_state < STATE_MIN_VALUE, + "mark_pending called when the timer entry is in an invalid state" + ); + + if cur_state > not_after { + break Err(cur_state); + } + + match self.state.compare_exchange_weak( + cur_state, + STATE_PENDING_FIRE, + Ordering::AcqRel, + Ordering::Acquire, + ) { + Ok(_) => break Ok(()), + Err(actual_state) => cur_state = actual_state, + } + } + } + + /// Fires the timer, setting the result to the provided result. + /// + /// Returns: + /// * `Some(waker)` - if fired and a waker needs to be invoked once the + /// driver lock is released + /// * `None` - if fired and a waker does not need to be invoked, or if + /// already fired + /// + /// SAFETY: The driver lock must be held. + unsafe fn fire(&self, result: TimerResult) -> Option { + // Quick initial check to see if the timer is already fired. Since + // firing the timer can only happen with the driver lock held, we know + // we shouldn't be able to "miss" a transition to a fired state, even + // with relaxed ordering. + let cur_state = self.state.load(Ordering::Relaxed); + if cur_state == STATE_DEREGISTERED { + return None; + } + + // SAFETY: We assume the driver lock is held and the timer is not + // fired, so only the driver is accessing this field. + // + // We perform a release-ordered store to state below, to ensure this + // write is visible before the state update is visible. + unsafe { self.result.with_mut(|p| *p = result) }; + + self.state.store(STATE_DEREGISTERED, Ordering::Release); + + self.waker.take_waker() + } + + /// Marks the timer as registered (poll will return None) and sets the + /// expiration time. + /// + /// While this function is memory-safe, it should only be called from a + /// context holding both `&mut TimerEntry` and the driver lock. + fn set_expiration(&self, timestamp: u64) { + debug_assert!(timestamp < STATE_MIN_VALUE); + + // We can use relaxed ordering because we hold the driver lock and will + // fence when we release the lock. + self.state.store(timestamp, Ordering::Relaxed); + } + + /// Attempts to adjust the timer to a new timestamp. + /// + /// If the timer has already been fired, is pending firing, or the new + /// timestamp is earlier than the old timestamp, (or occasionally + /// spuriously) returns Err without changing the timer's state. In this + /// case, the timer must be deregistered and re-registered. + fn extend_expiration(&self, new_timestamp: u64) -> Result<(), ()> { + let mut prior = self.state.load(Ordering::Relaxed); + loop { + if new_timestamp < prior || prior >= STATE_MIN_VALUE { + return Err(()); + } + + match self.state.compare_exchange_weak( + prior, + new_timestamp, + Ordering::AcqRel, + Ordering::Acquire, + ) { + Ok(_) => return Ok(()), + Err(true_prior) => prior = true_prior, + } + } + } + + /// Returns true if the state of this timer indicates that the timer might + /// be registered with the driver. This check is performed with relaxed + /// ordering, but is conservative - if it returns false, the timer is + /// definitely _not_ registered. + pub(super) fn might_be_registered(&self) -> bool { + self.state.load(Ordering::Relaxed) != STATE_DEREGISTERED + } +} + +pin_project! { + // A timer entry. + // + // This is the handle to a timer that is controlled by the requester of the + // timer. As this participates in intrusive data structures, it must be pinned + // before polling. + #[derive(Debug)] + pub(crate) struct TimerEntry { + // Arc reference to the runtime handle. We can only free the driver after + // deregistering everything from their respective timer wheels. + driver: scheduler::Handle, + // Shared inner structure; this is part of an intrusive linked list, and + // therefore other references can exist to it while mutable references to + // Entry exist. + // + // This is manipulated only under the inner mutex. + #[pin] + inner: Option, + // Deadline for the timer. This is used to register on the first + // poll, as we can't register prior to being pinned. + deadline: Instant, + // Whether the deadline has been registered. + registered: bool, + } + + impl PinnedDrop for TimerEntry { + fn drop(this: Pin<&mut Self>) { + this.cancel(); + } + } +} + +unsafe impl Send for TimerEntry {} +unsafe impl Sync for TimerEntry {} + +/// An `TimerHandle` is the (non-enforced) "unique" pointer from the driver to the +/// timer entry. Generally, at most one `TimerHandle` exists for a timer at a time +/// (enforced by the timer state machine). +/// +/// SAFETY: An `TimerHandle` is essentially a raw pointer, and the usual caveats +/// of pointer safety apply. In particular, `TimerHandle` does not itself enforce +/// that the timer does still exist; however, normally an `TimerHandle` is created +/// immediately before registering the timer, and is consumed when firing the +/// timer, to help minimize mistakes. Still, because `TimerHandle` cannot enforce +/// memory safety, all operations are unsafe. +#[derive(Debug)] +pub(crate) struct TimerHandle { + inner: NonNull, +} + +pub(super) type EntryList = crate::util::linked_list::LinkedList; + +/// The shared state structure of a timer. This structure is shared between the +/// frontend (`Entry`) and driver backend. +/// +/// Note that this structure is located inside the `TimerEntry` structure. +pub(crate) struct TimerShared { + /// A link within the doubly-linked list of timers on a particular level and + /// slot. Valid only if state is equal to Registered. + /// + /// Only accessed under the entry lock. + pointers: linked_list::Pointers, + + /// The time when the [`TimerEntry`] was registered into the Wheel, + /// [`STATE_DEREGISTERED`] means it is not registered. + /// + /// Generally owned by the driver, but is accessed by the entry when not + /// registered. + /// + /// We use relaxed ordering for both loading and storing since this value + /// is only accessed either when holding the driver lock or through mutable + /// references to [`TimerEntry`]. + registered_when: AtomicU64, + + /// Current state. This records whether the timer entry is currently under + /// the ownership of the driver, and if not, its current state (not + /// complete, fired, error, etc). + state: StateCell, + + _p: PhantomPinned, +} + +unsafe impl Send for TimerShared {} +unsafe impl Sync for TimerShared {} + +impl std::fmt::Debug for TimerShared { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("TimerShared") + .field( + "registered_when", + &self.registered_when.load(Ordering::Relaxed), + ) + .field("state", &self.state) + .finish() + } +} + +generate_addr_of_methods! { + impl<> TimerShared { + unsafe fn addr_of_pointers(self: NonNull) -> NonNull> { + &self.pointers + } + } +} + +impl TimerShared { + pub(super) fn new() -> Self { + Self { + registered_when: AtomicU64::new(0), + pointers: linked_list::Pointers::new(), + state: StateCell::default(), + _p: PhantomPinned, + } + } + + /// Gets the cached time-of-expiration value. + pub(super) fn registered_when(&self) -> u64 { + // Cached-when is only accessed under the driver lock, so we can use relaxed + self.registered_when.load(Ordering::Relaxed) + } + + /// Gets the true time-of-expiration value, and copies it into the cached + /// time-of-expiration value. + /// + /// SAFETY: Must be called with the driver lock held, and when this entry is + /// not in any timer wheel lists. + pub(super) unsafe fn sync_when(&self) -> u64 { + let true_when = self.true_when(); + + self.registered_when.store(true_when, Ordering::Relaxed); + + true_when + } + + /// Sets the cached time-of-expiration value. + /// + /// SAFETY: Must be called with the driver lock held, and when this entry is + /// not in any timer wheel lists. + unsafe fn set_registered_when(&self, when: u64) { + self.registered_when.store(when, Ordering::Relaxed); + } + + /// Returns the true time-of-expiration value, with relaxed memory ordering. + pub(super) fn true_when(&self) -> u64 { + self.state.when().expect("Timer already fired") + } + + /// Sets the true time-of-expiration value, even if it is less than the + /// current expiration or the timer is deregistered. + /// + /// SAFETY: Must only be called with the driver lock held and the entry not + /// in the timer wheel. + pub(super) unsafe fn set_expiration(&self, t: u64) { + self.state.set_expiration(t); + self.registered_when.store(t, Ordering::Relaxed); + } + + /// Sets the true time-of-expiration only if it is after the current. + pub(super) fn extend_expiration(&self, t: u64) -> Result<(), ()> { + self.state.extend_expiration(t) + } + + /// Returns a `TimerHandle` for this timer. + pub(super) fn handle(&self) -> TimerHandle { + TimerHandle { + inner: NonNull::from(self), + } + } + + /// Returns true if the state of this timer indicates that the timer might + /// be registered with the driver. This check is performed with relaxed + /// ordering, but is conservative - if it returns false, the timer is + /// definitely _not_ registered. + pub(super) fn might_be_registered(&self) -> bool { + self.state.might_be_registered() + } +} + +unsafe impl linked_list::Link for TimerShared { + type Handle = TimerHandle; + + type Target = TimerShared; + + fn as_raw(handle: &Self::Handle) -> NonNull { + handle.inner + } + + unsafe fn from_raw(ptr: NonNull) -> Self::Handle { + TimerHandle { inner: ptr } + } + + unsafe fn pointers( + target: NonNull, + ) -> NonNull> { + unsafe { TimerShared::addr_of_pointers(target) } + } +} + +// ===== impl Entry ===== + +impl TimerEntry { + #[track_caller] + pub(crate) fn new(handle: scheduler::Handle, deadline: Instant) -> Self { + // Panic if the time driver is not enabled + let _ = handle.driver().time(); + + Self { + driver: handle, + inner: None, + deadline, + registered: false, + } + } + + fn inner(&self) -> Option<&TimerShared> { + self.inner.as_ref() + } + + fn init_inner(self: Pin<&mut Self>) { + match self.inner { + Some(_) => {} + None => self.project().inner.set(Some(TimerShared::new())), + } + } + + pub(crate) fn deadline(&self) -> Instant { + self.deadline + } + + pub(crate) fn is_elapsed(&self) -> bool { + let Some(inner) = self.inner() else { + return false; + }; + + // Is this timer still in the timer wheel? + let deregistered = !inner.might_be_registered(); + + // Once the timer has expired, + // it will be taken out of the wheel and be fired. + // + // So if we have already registered the timer into the wheel, + // but now it is not in the wheel, it means that it has been + // fired. + // + // +--------------+-----------------+----------+ + // | deregistered | self.registered | output | + // +--------------+-----------------+----------+ + // | true | false | false | <- never been registered + // +--------------+-----------------+----------+ + // | false | false | false | <- never been registered + // +--------------+-----------------+----------+ + // | true | true | true | <- registered into the wheel, + // | | | | and then taken out of the wheel. + // +--------------+-----------------+----------+ + // | false | true | false | <- still registered in the wheel + // +--------------+-----------------+----------+ + deregistered && self.registered + } + + /// Cancels and deregisters the timer. This operation is irreversible. + pub(crate) fn cancel(self: Pin<&mut Self>) { + // Avoid calling the `clear_entry` method, because it has not been initialized yet. + let Some(inner) = self.inner() else { + return; + }; + + // We need to perform an acq/rel fence with the driver thread, and the + // simplest way to do so is to grab the driver lock. + // + // Why is this necessary? We're about to release this timer's memory for + // some other non-timer use. However, we've been doing a bunch of + // relaxed (or even non-atomic) writes from the driver thread, and we'll + // be doing more from _this thread_ (as this memory is interpreted as + // something else). + // + // It is critical to ensure that, from the point of view of the driver, + // those future non-timer writes happen-after the timer is fully fired, + // and from the purpose of this thread, the driver's writes all + // happen-before we drop the timer. This in turn requires us to perform + // an acquire-release barrier in _both_ directions between the driver + // and dropping thread. + // + // The lock acquisition in clear_entry serves this purpose. All of the + // driver manipulations happen with the lock held, so we can just take + // the lock and be sure that this drop happens-after everything the + // driver did so far and happens-before everything the driver does in + // the future. While we have the lock held, we also go ahead and + // deregister the entry if necessary. + unsafe { self.driver().clear_entry(NonNull::from(inner)) }; + } + + pub(crate) fn reset(mut self: Pin<&mut Self>, new_time: Instant, reregister: bool) { + let this = self.as_mut().project(); + *this.deadline = new_time; + *this.registered = reregister; + + let tick = self.driver().time_source().deadline_to_tick(new_time); + let inner = match self.inner() { + Some(inner) => inner, + None => { + self.as_mut().init_inner(); + self.inner() + .expect("inner should already be initialized by `this.init_inner()`") + } + }; + + if inner.extend_expiration(tick).is_ok() { + return; + } + + if reregister { + unsafe { + self.driver() + .reregister(&self.driver.driver().io, tick, inner.into()); + } + } + } + + pub(crate) fn poll_elapsed( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + ) -> Poll> { + assert!( + !self.driver().is_shutdown(), + "{}", + crate::util::error::RUNTIME_SHUTTING_DOWN_ERROR + ); + + if !self.registered { + let deadline = self.deadline; + self.as_mut().reset(deadline, true); + } + + let inner = self + .inner() + .expect("inner should already be initialized by `self.reset()`"); + inner.state.poll(cx.waker()) + } + + pub(crate) fn driver(&self) -> &super::Handle { + self.driver.driver().time() + } + + #[cfg(all(tokio_unstable, feature = "tracing"))] + pub(crate) fn clock(&self) -> &super::Clock { + self.driver.driver().clock() + } +} + +impl TimerHandle { + pub(super) unsafe fn registered_when(&self) -> u64 { + unsafe { self.inner.as_ref().registered_when() } + } + + pub(super) unsafe fn sync_when(&self) -> u64 { + unsafe { self.inner.as_ref().sync_when() } + } + + pub(super) unsafe fn is_pending(&self) -> bool { + unsafe { self.inner.as_ref().state.is_pending() } + } + + /// Forcibly sets the true and cached expiration times to the given tick. + /// + /// SAFETY: The caller must ensure that the handle remains valid, the driver + /// lock is held, and that the timer is not in any wheel linked lists. + pub(super) unsafe fn set_expiration(&self, tick: u64) { + unsafe { + self.inner.as_ref().set_expiration(tick); + } + } + + /// Attempts to mark this entry as pending. If the expiration time is after + /// `not_after`, however, returns an Err with the current expiration time. + /// + /// If an `Err` is returned, the `registered_when` value will be updated to this + /// new expiration time. + /// + /// SAFETY: The caller must ensure that the handle remains valid, the driver + /// lock is held, and that the timer is not in any wheel linked lists. + /// After returning Ok, the entry must be added to the pending list. + pub(super) unsafe fn mark_pending(&self, not_after: u64) -> Result<(), u64> { + match unsafe { self.inner.as_ref().state.mark_pending(not_after) } { + Ok(()) => { + // mark this as being on the pending queue in registered_when + unsafe { + self.inner.as_ref().set_registered_when(STATE_DEREGISTERED); + } + Ok(()) + } + Err(tick) => { + unsafe { + self.inner.as_ref().set_registered_when(tick); + } + Err(tick) + } + } + } + + /// Attempts to transition to a terminal state. If the state is already a + /// terminal state, does nothing. + /// + /// Because the entry might be dropped after the state is moved to a + /// terminal state, this function consumes the handle to ensure we don't + /// access the entry afterwards. + /// + /// Returns the last-registered waker, if any. + /// + /// SAFETY: The driver lock must be held while invoking this function, and + /// the entry must not be in any wheel linked lists. + pub(super) unsafe fn fire(self, completed_state: TimerResult) -> Option { + unsafe { self.inner.as_ref().state.fire(completed_state) } + } +} diff --git a/tokio/src/runtime/time/handle.rs b/tokio/src/runtime/time/handle.rs index 111d46089d0..33319031cc1 100644 --- a/tokio/src/runtime/time/handle.rs +++ b/tokio/src/runtime/time/handle.rs @@ -1,69 +1,34 @@ -use crate::runtime::time::{TimeSource, WakeQueue, Wheel}; +use crate::runtime::time::TimeSource; use std::fmt; -cfg_test_util! { - use crate::loom::sync::Arc; - use crate::loom::sync::atomic::{AtomicBool, Ordering}; -} - /// Handle to time driver instance. pub(crate) struct Handle { pub(super) time_source: TimeSource, - - // When `true`, a call to `park_timeout` should immediately return and time - // should not advance. One reason for this to be `true` is if the task - // passed to `Runtime::block_on` called `task::yield_now()`. - // - // While it may look racy, it only has any effect when the clock is paused - // and pausing the clock is restricted to a single-threaded runtime. - #[cfg(feature = "test-util")] - pub(super) did_wake: Arc, + pub(super) inner: super::Inner, } impl Handle { - pub(crate) fn process_at_time( - &self, - wheel: &mut Wheel, - mut now: u64, - wake_queue: &mut WakeQueue, - ) { - if now < wheel.elapsed() { - // Time went backwards! This normally shouldn't happen as the Rust language - // guarantees that an Instant is monotonic, but can happen when running - // Linux in a VM on a Windows host due to std incorrectly trusting the - // hardware clock to be monotonic. - // - // See for more information. - now = wheel.elapsed(); - } - - wheel.take_expired(now, wake_queue); - } - - pub(crate) fn shutdown(&self, wheel: &mut Wheel) { - // self.is_shutdown.store(true, Ordering::SeqCst); - // Advance time forward to the end of time. - // This will ensure that all timers are fired. - let max_tick = u64::MAX; - let mut wake_queue = WakeQueue::new(); - self.process_at_time(wheel, max_tick, &mut wake_queue); - wake_queue.wake_all(); - } - /// Returns the time source associated with this handle. pub(crate) fn time_source(&self) -> &TimeSource { &self.time_source } + /// Checks whether the driver has been shutdown. + pub(super) fn is_shutdown(&self) -> bool { + self.inner.is_shutdown() + } + /// Track that the driver is being unparked pub(crate) fn unpark(&self) { #[cfg(feature = "test-util")] - self.did_wake.store(true, Ordering::SeqCst); - } - - cfg_test_util! { - pub(crate) fn did_wake(&self) -> bool { - self.did_wake.swap(false, Ordering::SeqCst) + match self.inner { + super::Inner::Traditional { ref did_wake, .. } => { + did_wake.store(true, std::sync::atomic::Ordering::SeqCst); + } + #[cfg(all(tokio_unstable, feature = "rt-multi-thread"))] + super::Inner::Alternative { ref did_wake, .. } => { + did_wake.store(true, std::sync::atomic::Ordering::SeqCst); + } } } } diff --git a/tokio/src/runtime/time/mod.rs b/tokio/src/runtime/time/mod.rs index 5ebca09393e..cecd5d0f25e 100644 --- a/tokio/src/runtime/time/mod.rs +++ b/tokio/src/runtime/time/mod.rs @@ -6,8 +6,9 @@ //! Time driver. -mod timer; -pub(crate) use timer::Timer; +mod entry; +pub(crate) use entry::TimerEntry; +use entry::{EntryList, TimerHandle, TimerShared, MAX_SAFE_MILLIS_DURATION}; mod handle; pub(crate) use self::handle::Handle; @@ -16,23 +17,19 @@ mod source; pub(crate) use source::TimeSource; mod wheel; -cfg_rt_and_time! { - pub(crate) use wheel::EntryHandle; -} -cfg_rt_or_time! { - pub(crate) use wheel::cancellation_queue; - pub(crate) use wheel::RegistrationQueue; - pub(crate) use wheel::WakeQueue; - pub(crate) use wheel::Wheel; -} -cfg_test_util! { - use crate::loom::sync::Arc; -} +#[cfg(all(tokio_unstable, feature = "rt-multi-thread"))] +use super::time_alt; use crate::loom::sync::atomic::{AtomicBool, Ordering}; -use crate::runtime::driver::{self, IoStack}; +use crate::loom::sync::Mutex; +use crate::runtime::driver::{self, IoHandle, IoStack}; +use crate::time::error::Error; use crate::time::{Clock, Duration}; +use crate::util::WakeList; + +use std::fmt; +use std::{num::NonZeroU64, ptr::NonNull}; /// Time implementation that drives [`Sleep`][sleep], [`Interval`][interval], and [`Timeout`][timeout]. /// @@ -93,8 +90,49 @@ use crate::time::{Clock, Duration}; pub(crate) struct Driver { /// Parker to delegate to. park: IoStack, +} + +enum Inner { + Traditional { + // The state is split like this so `Handle` can access `is_shutdown` without locking the mutex + state: Mutex, + + /// True if the driver is being shutdown. + is_shutdown: AtomicBool, - is_shutdown: AtomicBool, + // When `true`, a call to `park_timeout` should immediately return and time + // should not advance. One reason for this to be `true` is if the task + // passed to `Runtime::block_on` called `task::yield_now()`. + // + // While it may look racy, it only has any effect when the clock is paused + // and pausing the clock is restricted to a single-threaded runtime. + #[cfg(feature = "test-util")] + did_wake: AtomicBool, + }, + + #[cfg(all(tokio_unstable, feature = "rt-multi-thread"))] + Alternative { + /// True if the driver is being shutdown. + is_shutdown: AtomicBool, + + // When `true`, a call to `park_timeout` should immediately return and time + // should not advance. One reason for this to be `true` is if the task + // passed to `Runtime::block_on` called `task::yield_now()`. + // + // While it may look racy, it only has any effect when the clock is paused + // and pausing the clock is restricted to a single-threaded runtime. + #[cfg(feature = "test-util")] + did_wake: AtomicBool, + }, +} + +/// Time state shared which must be protected by a `Mutex` +struct InnerState { + /// The earliest time at which we promise to wake up without unparking. + next_wake: Option, + + /// Timer wheel. + wheel: wheel::Wheel, } // ===== impl Driver ===== @@ -109,68 +147,345 @@ impl Driver { let handle = Handle { time_source, - #[cfg(feature = "test-util")] - did_wake: Arc::new(AtomicBool::new(false)), - }; + inner: Inner::Traditional { + state: Mutex::new(InnerState { + next_wake: None, + wheel: wheel::Wheel::new(), + }), + is_shutdown: AtomicBool::new(false), - let driver = Driver { - park, - is_shutdown: AtomicBool::new(false), + #[cfg(feature = "test-util")] + did_wake: AtomicBool::new(false), + }, }; + let driver = Driver { park }; + (driver, handle) } + #[cfg(all(tokio_unstable, feature = "rt-multi-thread"))] + pub(crate) fn new_alt(clock: &Clock) -> Handle { + let time_source = TimeSource::new(clock); + + Handle { + time_source, + inner: Inner::Alternative { + is_shutdown: AtomicBool::new(false), + #[cfg(feature = "test-util")] + did_wake: AtomicBool::new(false), + }, + } + } + pub(crate) fn park(&mut self, handle: &driver::Handle) { - self.park.park(handle); + self.park_internal(handle, None); } pub(crate) fn park_timeout(&mut self, handle: &driver::Handle, duration: Duration) { - self.park.park_timeout(handle, duration); + self.park_internal(handle, Some(duration)); } pub(crate) fn shutdown(&mut self, rt_handle: &driver::Handle) { - if self.is_shutdown.load(Ordering::SeqCst) { + let handle = rt_handle.time(); + + if handle.is_shutdown() { return; } - self.is_shutdown.store(true, Ordering::SeqCst); + match &handle.inner { + Inner::Traditional { is_shutdown, .. } => { + is_shutdown.store(true, Ordering::SeqCst); + } + #[cfg(all(tokio_unstable, feature = "rt-multi-thread"))] + Inner::Alternative { is_shutdown, .. } => { + is_shutdown.store(true, Ordering::SeqCst); + } + } + + // Advance time forward to the end of time. + + handle.process_at_time(u64::MAX); + self.park.shutdown(rt_handle); } + + fn park_internal(&mut self, rt_handle: &driver::Handle, limit: Option) { + let handle = rt_handle.time(); + let mut lock = handle.inner.lock(); + + assert!(!handle.is_shutdown()); + + let next_wake = lock.wheel.next_expiration_time(); + lock.next_wake = + next_wake.map(|t| NonZeroU64::new(t).unwrap_or_else(|| NonZeroU64::new(1).unwrap())); + + drop(lock); + + match next_wake { + Some(when) => { + let now = handle.time_source.now(rt_handle.clock()); + // Note that we effectively round up to 1ms here - this avoids + // very short-duration microsecond-resolution sleeps that the OS + // might treat as zero-length. + let mut duration = handle + .time_source + .tick_to_duration(when.saturating_sub(now)); + + if duration > Duration::from_millis(0) { + if let Some(limit) = limit { + duration = std::cmp::min(limit, duration); + } + + self.park_thread_timeout(rt_handle, duration); + } else { + self.park.park_timeout(rt_handle, Duration::from_secs(0)); + } + } + None => { + if let Some(duration) = limit { + self.park_thread_timeout(rt_handle, duration); + } else { + self.park.park(rt_handle); + } + } + } + + // Process pending timers after waking up + handle.process(rt_handle.clock()); + } + + cfg_test_util! { + fn park_thread_timeout(&mut self, rt_handle: &driver::Handle, duration: Duration) { + let handle = rt_handle.time(); + let clock = rt_handle.clock(); + + if clock.can_auto_advance() { + self.park.park_timeout(rt_handle, Duration::from_secs(0)); + + // If the time driver was woken, then the park completed + // before the "duration" elapsed (usually caused by a + // yield in `Runtime::block_on`). In this case, we don't + // advance the clock. + if !handle.did_wake() { + // Simulate advancing time + if let Err(msg) = clock.advance(duration) { + panic!("{}", msg); + } + } + } else { + self.park.park_timeout(rt_handle, duration); + } + } + } + + cfg_not_test_util! { + fn park_thread_timeout(&mut self, rt_handle: &driver::Handle, duration: Duration) { + self.park.park_timeout(rt_handle, duration); + } + } } -cfg_rt_or_time! { - /// Local context for the time driver, used when creating timers. - pub(crate) enum Context<'a> { - /// The runtime is running, we can access it. - Running { - registration_queue: &'a mut RegistrationQueue, - elapsed: u64, - }, - #[cfg(feature = "rt-multi-thread")] - /// The runtime is shutting down, no timers can be registered. - Shutdown, - } - - /// Local context for the time driver, used when the runtime wants to - /// fire/cancel timers. - pub(crate) struct Context2 { - pub(crate) wheel: Wheel, - pub(crate) registration_queue: RegistrationQueue, - pub(crate) canc_tx: cancellation_queue::Sender, - pub(crate) canc_rx: cancellation_queue::Receiver, - } - - impl Context2 { - pub(crate) fn new() -> Self { - let (canc_tx, canc_rx) = cancellation_queue::new(); - Self { - wheel: Wheel::new(), - registration_queue: RegistrationQueue::new(), - canc_tx, - canc_rx, +impl Handle { + pub(self) fn process(&self, clock: &Clock) { + let now = self.time_source().now(clock); + + self.process_at_time(now); + } + + pub(self) fn process_at_time(&self, mut now: u64) { + let mut waker_list = WakeList::new(); + + let mut lock = self.inner.lock(); + + if now < lock.wheel.elapsed() { + // Time went backwards! This normally shouldn't happen as the Rust language + // guarantees that an Instant is monotonic, but can happen when running + // Linux in a VM on a Windows host due to std incorrectly trusting the + // hardware clock to be monotonic. + // + // See for more information. + now = lock.wheel.elapsed(); + } + + while let Some(entry) = lock.wheel.poll(now) { + debug_assert!(unsafe { entry.is_pending() }); + + // SAFETY: We hold the driver lock, and just removed the entry from any linked lists. + if let Some(waker) = unsafe { entry.fire(Ok(())) } { + waker_list.push(waker); + + if !waker_list.can_push() { + // Wake a batch of wakers. To avoid deadlock, we must do this with the lock temporarily dropped. + drop(lock); + + waker_list.wake_all(); + + lock = self.inner.lock(); + } } } + + lock.next_wake = lock + .wheel + .poll_at() + .map(|t| NonZeroU64::new(t).unwrap_or_else(|| NonZeroU64::new(1).unwrap())); + + drop(lock); + + waker_list.wake_all(); + } + + #[cfg(all(tokio_unstable, feature = "rt-multi-thread"))] + pub(crate) fn process_at_time_alt( + &self, + wheel: &mut time_alt::Wheel, + mut now: u64, + wake_queue: &mut time_alt::WakeQueue, + ) { + if now < wheel.elapsed() { + // Time went backwards! This normally shouldn't happen as the Rust language + // guarantees that an Instant is monotonic, but can happen when running + // Linux in a VM on a Windows host due to std incorrectly trusting the + // hardware clock to be monotonic. + // + // See for more information. + now = wheel.elapsed(); + } + + wheel.take_expired(now, wake_queue); + } + + #[cfg(all(tokio_unstable, feature = "rt-multi-thread"))] + pub(crate) fn shutdown_alt(&self, wheel: &mut time_alt::Wheel) { + // self.is_shutdown.store(true, Ordering::SeqCst); + // Advance time forward to the end of time. + // This will ensure that all timers are fired. + let max_tick = u64::MAX; + let mut wake_queue = time_alt::WakeQueue::new(); + self.process_at_time_alt(wheel, max_tick, &mut wake_queue); + wake_queue.wake_all(); + } + + /// Removes a registered timer from the driver. + /// + /// The timer will be moved to the cancelled state. Wakers will _not_ be + /// invoked. If the timer is already completed, this function is a no-op. + /// + /// This function always acquires the driver lock, even if the entry does + /// not appear to be registered. + /// + /// SAFETY: The timer must not be registered with some other driver, and + /// `add_entry` must not be called concurrently. + pub(self) unsafe fn clear_entry(&self, entry: NonNull) { + unsafe { + let mut lock = self.inner.lock(); + + if entry.as_ref().might_be_registered() { + lock.wheel.remove(entry); + } + + entry.as_ref().handle().fire(Ok(())); + } + } + + /// Removes and re-adds an entry to the driver. + /// + /// SAFETY: The timer must be either unregistered, or registered with this + /// driver. No other threads are allowed to concurrently manipulate the + /// timer at all (the current thread should hold an exclusive reference to + /// the `TimerEntry`) + pub(self) unsafe fn reregister( + &self, + unpark: &IoHandle, + new_tick: u64, + entry: NonNull, + ) { + let waker = unsafe { + let mut lock = self.inner.lock(); + + // We may have raced with a firing/deregistration, so check before + // deregistering. + if unsafe { entry.as_ref().might_be_registered() } { + lock.wheel.remove(entry); + } + + // Now that we have exclusive control of this entry, mint a handle to reinsert it. + let entry = entry.as_ref().handle(); + + if self.is_shutdown() { + unsafe { entry.fire(Err(crate::time::error::Error::shutdown())) } + } else { + entry.set_expiration(new_tick); + + // Note: We don't have to worry about racing with some other resetting + // thread, because add_entry and reregister require exclusive control of + // the timer entry. + match unsafe { lock.wheel.insert(entry) } { + Ok(when) => { + if lock + .next_wake + .map(|next_wake| when < next_wake.get()) + .unwrap_or(true) + { + unpark.unpark(); + } + + None + } + Err((entry, crate::time::error::InsertError::Elapsed)) => unsafe { + entry.fire(Ok(())) + }, + } + } + + // Must release lock before invoking waker to avoid the risk of deadlock. + }; + + // The timer was fired synchronously as a result of the reregistration. + // Wake the waker; this is needed because we might reset _after_ a poll, + // and otherwise the task won't be awoken to poll again. + if let Some(waker) = waker { + waker.wake(); + } + } + + cfg_test_util! { + pub(super) fn did_wake(&self) -> bool { + match &self.inner { + Inner::Traditional { did_wake, .. } => did_wake.swap(false, Ordering::SeqCst), + #[cfg(all(tokio_unstable, feature = "rt-multi-thread"))] + Inner::Alternative { did_wake, .. } => did_wake.swap(false, Ordering::SeqCst), + } + } + } +} + +// ===== impl Inner ===== + +impl Inner { + /// Locks the driver's inner structure + pub(super) fn lock(&self) -> crate::loom::sync::MutexGuard<'_, InnerState> { + match self { + Inner::Traditional { state, .. } => state.lock(), + #[cfg(all(tokio_unstable, feature = "rt-multi-thread"))] + Inner::Alternative { .. } => unreachable!("unreachable in alternative timer"), + } + } + + // Check whether the driver has been shutdown + pub(super) fn is_shutdown(&self) -> bool { + match self { + Inner::Traditional { is_shutdown, .. } => is_shutdown.load(Ordering::SeqCst), + #[cfg(all(tokio_unstable, feature = "rt-multi-thread"))] + Inner::Alternative { is_shutdown, .. } => is_shutdown.load(Ordering::SeqCst), + } + } +} + +impl fmt::Debug for Inner { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt.debug_struct("Inner").finish() } } diff --git a/tokio/src/runtime/time/source.rs b/tokio/src/runtime/time/source.rs index 8f53c63d2d9..e3ba8d790c0 100644 --- a/tokio/src/runtime/time/source.rs +++ b/tokio/src/runtime/time/source.rs @@ -1,3 +1,4 @@ +use super::MAX_SAFE_MILLIS_DURATION; use crate::time::{Clock, Duration, Instant}; /// A structure which handles conversion from Instants to `u64` timestamps. @@ -21,7 +22,11 @@ impl TimeSource { pub(crate) fn instant_to_tick(&self, t: Instant) -> u64 { // round up let dur: Duration = t.saturating_duration_since(self.start_time); - dur.as_millis().try_into().unwrap_or(u64::MAX) + let ms = dur + .as_millis() + .try_into() + .unwrap_or(MAX_SAFE_MILLIS_DURATION); + ms.min(MAX_SAFE_MILLIS_DURATION) } pub(crate) fn tick_to_duration(&self, t: u64) -> Duration { diff --git a/tokio/src/runtime/time/tests/mod.rs b/tokio/src/runtime/time/tests/mod.rs index d0193fa4e02..33c4a5366d1 100644 --- a/tokio/src/runtime/time/tests/mod.rs +++ b/tokio/src/runtime/time/tests/mod.rs @@ -1,19 +1,15 @@ #![cfg(not(target_os = "wasi"))] -use futures::task::noop_waker_ref; -use std::future::poll_fn; use std::{task::Context, time::Duration}; -use crate::loom::thread; -use crate::runtime::scheduler::util::time::process_registration_queue; -use crate::runtime::time::timer::with_current_time_context2; -use crate::runtime::time::WakeQueue; -use crate::runtime::Handle; -use crate::sync::oneshot; +#[cfg(not(loom))] +use futures::task::noop_waker_ref; -use super::Timer; +use crate::loom::sync::atomic::{AtomicBool, Ordering}; +use crate::loom::sync::Arc; +use crate::loom::thread; -const EVENT_INTERVAL: u32 = 1; +use super::TimerEntry; fn block_on(f: impl std::future::Future) -> T { #[cfg(loom)] @@ -25,7 +21,6 @@ fn block_on(f: impl std::future::Future) -> T { .build() .unwrap(); rt.block_on(f) - // futures::executor::block_on(f) } } @@ -37,97 +32,38 @@ fn model(f: impl Fn() + Send + Sync + 'static) { f(); } -async fn fire_all_timers(handle: &Handle, exit_rx: oneshot::Receiver<()>) { - loop { - // Keep the worker thread busy, so that it can process injected - // timers. - assert_eq!(EVENT_INTERVAL, 1); - crate::task::yield_now().await; - if !exit_rx.is_empty() { - // break the loop if the thread is exiting - break; - } - - let mut wake_queue = WakeQueue::new(); - - // In the `block_on` context, we can get the current wheel - // fire all timers. - with_current_time_context2(&handle.inner, |maybe_time_cx2| { - let time_cx2 = maybe_time_cx2.unwrap(); - - process_registration_queue( - &mut time_cx2.registration_queue, - &mut time_cx2.wheel, - &time_cx2.canc_tx, - &mut wake_queue, - ); - - let time = handle.inner.driver().time(); - time.process_at_time(&mut time_cx2.wheel, u64::MAX, &mut wake_queue); - }); - - wake_queue.wake_all(); - - thread::yield_now(); - } -} - -// This function must be called inside the `rt.block_on`. -fn process_at_time(handle: &Handle, at: u64) { - let handle = &handle.inner; - - with_current_time_context2(handle, |maybe_time_cx2| { - let time_cx2 = maybe_time_cx2.unwrap(); - - let mut wake_queue = WakeQueue::new(); - process_registration_queue( - &mut time_cx2.registration_queue, - &mut time_cx2.wheel, - &time_cx2.canc_tx, - &mut wake_queue, - ); - - let time = handle.driver().time(); - time.process_at_time(&mut time_cx2.wheel, at, &mut wake_queue); - wake_queue.wake_all(); - }); -} - fn rt(start_paused: bool) -> crate::runtime::Runtime { crate::runtime::Builder::new_current_thread() .enable_time() - .event_interval(EVENT_INTERVAL) .start_paused(start_paused) .build() .unwrap() } -fn noop_cx() -> Context<'static> { - Context::from_waker(noop_waker_ref()) -} - #[test] fn single_timer() { model(|| { let rt = rt(false); let handle = rt.handle(); - let (exit_tx, exit_rx) = oneshot::channel(); let handle_ = handle.clone(); let jh = thread::spawn(move || { - let entry = Timer::new( + let entry = TimerEntry::new( handle_.inner.clone(), handle_.inner.driver().clock().now() + Duration::from_secs(1), ); pin!(entry); - block_on(poll_fn(|cx| entry.as_mut().poll_elapsed(cx))); - exit_tx.send(()).unwrap(); + block_on(std::future::poll_fn(|cx| entry.as_mut().poll_elapsed(cx))).unwrap(); }); - rt.block_on(async move { - fire_all_timers(handle, exit_rx).await; - }); + thread::yield_now(); + + let time = handle.inner.driver().time(); + let clock = handle.inner.driver().clock(); + + // advance 2s + time.process_at_time(time.time_source().now(clock) + 2_000_000_000); jh.join().unwrap(); }) @@ -138,24 +74,30 @@ fn drop_timer() { model(|| { let rt = rt(false); let handle = rt.handle(); - let (exit_tx, exit_rx) = oneshot::channel(); let handle_ = handle.clone(); let jh = thread::spawn(move || { - let entry = Timer::new( + let entry = TimerEntry::new( handle_.inner.clone(), handle_.inner.driver().clock().now() + Duration::from_secs(1), ); pin!(entry); - let _ = entry.as_mut().poll_elapsed(&mut noop_cx()); - let _ = entry.as_mut().poll_elapsed(&mut noop_cx()); - exit_tx.send(()).unwrap(); + let _ = entry + .as_mut() + .poll_elapsed(&mut Context::from_waker(futures::task::noop_waker_ref())); + let _ = entry + .as_mut() + .poll_elapsed(&mut Context::from_waker(futures::task::noop_waker_ref())); }); - rt.block_on(async move { - fire_all_timers(handle, exit_rx).await; - }); + thread::yield_now(); + + let time = handle.inner.driver().time(); + let clock = handle.inner.driver().clock(); + + // advance 2s in the future. + time.process_at_time(time.time_source().now(clock) + 2_000_000_000); jh.join().unwrap(); }) @@ -166,46 +108,83 @@ fn change_waker() { model(|| { let rt = rt(false); let handle = rt.handle(); - let (exit_tx, exit_rx) = oneshot::channel(); - let (change_waker_tx, change_waker_rx) = oneshot::channel(); let handle_ = handle.clone(); let jh = thread::spawn(move || { - let entry = Timer::new( + let entry = TimerEntry::new( handle_.inner.clone(), handle_.inner.driver().clock().now() + Duration::from_secs(1), ); pin!(entry); - let _ = entry.as_mut().poll_elapsed(&mut noop_cx()); - - // At this point, we cannot let worker thread to wake up - // the timer because the waker is a noop. - // Let's say the timer has been woken up at this point, - // the following poll is basically polling a future that has completed - // (already returned `Ready`),which is not encouraged. - - let mut maybe_change_waker_tx = Some(change_waker_tx); - block_on(poll_fn(|cx| { - let p = entry.as_mut().poll_elapsed(cx); - if let Some(tx) = maybe_change_waker_tx.take() { - // notify the worker thread that the waker is useable now - tx.send(()).unwrap(); - } - p - })); - - // notify the worker thread to exit - exit_tx.send(()).unwrap(); + let _ = entry + .as_mut() + .poll_elapsed(&mut Context::from_waker(futures::task::noop_waker_ref())); + + block_on(std::future::poll_fn(|cx| entry.as_mut().poll_elapsed(cx))).unwrap(); }); - change_waker_rx.blocking_recv().unwrap(); + thread::yield_now(); + + let time = handle.inner.driver().time(); + let clock = handle.inner.driver().clock(); + + // advance 2s + time.process_at_time(time.time_source().now(clock) + 2_000_000_000); + + jh.join().unwrap(); + }) +} + +#[test] +fn reset_future() { + model(|| { + let finished_early = Arc::new(AtomicBool::new(false)); + + let rt = rt(false); + let handle = rt.handle(); - rt.block_on(async move { - fire_all_timers(handle, exit_rx).await; + let handle_ = handle.clone(); + let finished_early_ = finished_early.clone(); + let start = handle.inner.driver().clock().now(); + + let jh = thread::spawn(move || { + let entry = TimerEntry::new(handle_.inner.clone(), start + Duration::from_secs(1)); + pin!(entry); + + let _ = entry + .as_mut() + .poll_elapsed(&mut Context::from_waker(futures::task::noop_waker_ref())); + + entry.as_mut().reset(start + Duration::from_secs(2), true); + + // shouldn't complete before 2s + block_on(std::future::poll_fn(|cx| entry.as_mut().poll_elapsed(cx))).unwrap(); + + finished_early_.store(true, Ordering::Relaxed); }); + thread::yield_now(); + + let handle = handle.inner.driver().time(); + + handle.process_at_time( + handle + .time_source() + .instant_to_tick(start + Duration::from_millis(1500)), + ); + + assert!(!finished_early.load(Ordering::Relaxed)); + + handle.process_at_time( + handle + .time_source() + .instant_to_tick(start + Duration::from_millis(2500)), + ); + jh.join().unwrap(); + + assert!(finished_early.load(Ordering::Relaxed)); }) } @@ -226,32 +205,31 @@ fn poll_process_levels() { let mut entries = vec![]; - rt.block_on(async { - for i in 0..normal_or_miri(1024, 64) { - let mut entry = Box::pin(Timer::new( - handle.inner.clone(), - handle.inner.driver().clock().now() + Duration::from_millis(i), - )); + for i in 0..normal_or_miri(1024, 64) { + let mut entry = Box::pin(TimerEntry::new( + handle.inner.clone(), + handle.inner.driver().clock().now() + Duration::from_millis(i), + )); - let _ = entry - .as_mut() - .poll_elapsed(&mut Context::from_waker(noop_waker_ref())); + let _ = entry + .as_mut() + .poll_elapsed(&mut Context::from_waker(noop_waker_ref())); - entries.push(entry); - } + entries.push(entry); + } - for t in 1..normal_or_miri(1024, 64) { - process_at_time(handle, t); + for t in 1..normal_or_miri(1024, 64) { + handle.inner.driver().time().process_at_time(t as u64); - for (deadline, future) in entries.iter_mut().enumerate() { - if deadline <= t as usize { - assert!(future.as_mut().poll_elapsed(&mut noop_cx()).is_ready()); - } else { - assert!(future.as_mut().poll_elapsed(&mut noop_cx()).is_pending()); - } + for (deadline, future) in entries.iter_mut().enumerate() { + let mut context = Context::from_waker(noop_waker_ref()); + if deadline <= t { + assert!(future.as_mut().poll_elapsed(&mut context).is_ready()); + } else { + assert!(future.as_mut().poll_elapsed(&mut context).is_pending()); } } - }); + } } #[test] @@ -262,135 +240,30 @@ fn poll_process_levels_targeted() { let rt = rt(true); let handle = rt.handle(); - rt.block_on(async { - let e1 = Timer::new( - handle.inner.clone(), - handle.inner.driver().clock().now() + Duration::from_millis(193), - ); - pin!(e1); - - process_at_time(handle, 62); - assert!(e1.as_mut().poll_elapsed(&mut context).is_pending()); - process_at_time(handle, 192); - process_at_time(handle, 192); - }) -} - -#[test] -fn cancel_in_the_same_rt() { - model(|| { - let rt = rt(false); - - rt.block_on(async { - let handle = rt.handle(); - let mut timer = Box::pin(Timer::new( - handle.inner.clone(), - handle.inner.driver().clock().now() + Duration::from_secs(1), - )); - let poll = timer.as_mut().poll_elapsed(&mut noop_cx()); - assert!(poll.is_pending()); - drop(timer); - - // Since the event interval is 1, yield 3 times to ensure - // the registration queue and cancellation queue are processed. - assert_eq!(EVENT_INTERVAL, 1); - crate::task::yield_now().await; - crate::task::yield_now().await; - crate::task::yield_now().await; - }); - }) -} - -#[test] -fn cancel_in_the_different_rt() { - model(|| { - let rt1 = rt(false); - let rt2 = rt(false); - - let timer = rt1.block_on(async { - let handle = rt1.handle(); - let mut timer = Box::pin(Timer::new( - handle.inner.clone(), - handle.inner.driver().clock().now() + Duration::from_secs(1), - )); - let poll = timer.as_mut().poll_elapsed(&mut noop_cx()); - assert!(poll.is_pending()); - timer - }); - - rt2.block_on(async { - drop(timer); - }); - - rt1.block_on(async { - // Since the event interval is 1, yield 3 times to ensure - // the registration queue and cancellation queue are processed. - assert_eq!(EVENT_INTERVAL, 1); - crate::task::yield_now().await; - crate::task::yield_now().await; - crate::task::yield_now().await; - }); - }) -} - -#[test] -fn cancel_outside_of_rt() { - model(|| { - let rt = rt(false); - - let timer = rt.block_on(async { - let handle = rt.handle(); - let mut timer = Box::pin(Timer::new( - handle.inner.clone(), - handle.inner.driver().clock().now() + Duration::from_secs(1), - )); - let poll = timer.as_mut().poll_elapsed(&mut noop_cx()); - assert!(poll.is_pending()); - timer - }); + let e1 = TimerEntry::new( + handle.inner.clone(), + handle.inner.driver().clock().now() + Duration::from_millis(193), + ); + pin!(e1); - drop(timer); + let handle = handle.inner.driver().time(); - rt.block_on(async { - // Since the event interval is 1, yield 3 times to ensure - // the registration queue and cancellation queue are processed. - assert_eq!(EVENT_INTERVAL, 1); - crate::task::yield_now().await; - crate::task::yield_now().await; - crate::task::yield_now().await; - }); - }) + handle.process_at_time(62); + assert!(e1.as_mut().poll_elapsed(&mut context).is_pending()); + handle.process_at_time(192); + handle.process_at_time(192); } #[test] -fn cancel_in_different_thread() { - model(|| { - let rt = rt(false); - - let timer = rt.block_on(async { - let handle = rt.handle(); - let mut timer = Box::pin(Timer::new( - handle.inner.clone(), - handle.inner.driver().clock().now() + Duration::from_secs(1), - )); - let poll = timer.as_mut().poll_elapsed(&mut noop_cx()); - assert!(poll.is_pending()); - timer - }); +#[cfg(not(loom))] +fn instant_to_tick_max() { + use crate::runtime::time::entry::MAX_SAFE_MILLIS_DURATION; - let jh = thread::spawn(move || { - drop(timer); - }); + let rt = rt(true); + let handle = rt.handle().inner.driver().time(); - rt.block_on(async { - // Since the event interval is 1, yield 3 times to ensure - // the registration queue and cancellation queue are processed. - assert_eq!(EVENT_INTERVAL, 1); - crate::task::yield_now().await; - crate::task::yield_now().await; - crate::task::yield_now().await; - }); + let start_time = handle.time_source.start_time(); + let long_future = start_time + std::time::Duration::from_millis(MAX_SAFE_MILLIS_DURATION + 1); - jh.join().unwrap(); - }) + assert!(handle.time_source.instant_to_tick(long_future) <= MAX_SAFE_MILLIS_DURATION); } diff --git a/tokio/src/runtime/time/wheel/level.rs b/tokio/src/runtime/time/wheel/level.rs index 99309bfe0fb..2fd20e56f40 100644 --- a/tokio/src/runtime/time/wheel/level.rs +++ b/tokio/src/runtime/time/wheel/level.rs @@ -1,6 +1,6 @@ -use super::{EntryHandle, EntryList}; -use std::ptr::NonNull; -use std::{array, fmt}; +use crate::runtime::time::{EntryList, TimerHandle, TimerShared}; + +use std::{array, fmt, ptr::NonNull}; /// Wheel for a single level in the timer. This wheel contains 64 slots. pub(crate) struct Level { @@ -119,20 +119,18 @@ impl Level { Some(slot) } - pub(crate) unsafe fn add_entry(&mut self, hdl: EntryHandle) { - // Safety: the associated entry must be valid. - let deadline = hdl.deadline(); - let slot = slot_for(deadline, self.level); + pub(crate) unsafe fn add_entry(&mut self, item: TimerHandle) { + let slot = slot_for(unsafe { item.registered_when() }, self.level); - self.slot[slot].push_front(hdl); + self.slot[slot].push_front(item); self.occupied |= occupied_bit(slot); } - pub(crate) unsafe fn remove_entry(&mut self, hdl: EntryHandle) { - let slot = slot_for(hdl.deadline(), self.level); + pub(crate) unsafe fn remove_entry(&mut self, item: NonNull) { + let slot = slot_for(unsafe { item.as_ref().registered_when() }, self.level); - unsafe { self.slot[slot].remove(NonNull::from(&hdl)) }; + unsafe { self.slot[slot].remove(item) }; if self.slot[slot].is_empty() { // The bit is currently set debug_assert!(self.occupied & occupied_bit(slot) != 0); diff --git a/tokio/src/runtime/time/wheel/mod.rs b/tokio/src/runtime/time/wheel/mod.rs index aaee46324ac..89adc8f2dcb 100644 --- a/tokio/src/runtime/time/wheel/mod.rs +++ b/tokio/src/runtime/time/wheel/mod.rs @@ -1,23 +1,14 @@ -mod level; +use crate::runtime::time::{TimerHandle, TimerShared}; +use crate::time::error::InsertError; +mod level; pub(crate) use self::level::Expiration; use self::level::Level; -mod entry; -pub(crate) use entry::Handle as EntryHandle; -use entry::{CancellationQueueEntry, RegistrationQueueEntry, WakeQueueEntry}; -use entry::{Entry, EntryList}; - -mod registration_queue; -pub(crate) use registration_queue::RegistrationQueue; - -pub(crate) mod cancellation_queue; -use cancellation_queue::Sender; - -mod wake_queue; -pub(crate) use wake_queue::WakeQueue; +use std::{array, ptr::NonNull}; -use std::array; +use super::entry::STATE_DEREGISTERED; +use super::EntryList; /// Timing wheel implementation. /// @@ -46,6 +37,9 @@ pub(crate) struct Wheel { /// * ~ 4 hr slots / ~ 12 day range /// * ~ 12 day slots / ~ 2 yr range levels: Box<[Level; NUM_LEVELS]>, + + /// Entries queued for firing + pending: EntryList, } /// Number of levels. Each level has 64 slots. By using 6 levels with 64 slots @@ -62,6 +56,7 @@ impl Wheel { Wheel { elapsed: 0, levels: Box::new(array::from_fn(Level::new)), + pending: EntryList::new(), } } @@ -75,24 +70,38 @@ impl Wheel { /// /// # Arguments /// - /// * `hdl`: The entry handle to insert into the wheel. + /// * `item`: The item to insert into the wheel. /// - /// # Safety + /// # Return /// - /// The caller must ensure: + /// Returns `Ok` when the item is successfully inserted, `Err` otherwise. /// - /// * The entry is not already registered in ANY wheel. - pub(crate) unsafe fn insert(&mut self, hdl: EntryHandle, cancel_tx: Sender) { - let deadline = hdl.deadline(); - - assert!(deadline > self.elapsed); + /// `Err(Elapsed)` indicates that `when` represents an instant that has + /// already passed. In this case, the caller should fire the timeout + /// immediately. + /// + /// `Err(Invalid)` indicates an invalid `when` argument as been supplied. + /// + /// # Safety + /// + /// This function registers item into an intrusive linked list. The caller + /// must ensure that `item` is pinned and will not be dropped without first + /// being deregistered. + pub(crate) unsafe fn insert( + &mut self, + item: TimerHandle, + ) -> Result { + let when = unsafe { item.sync_when() }; - hdl.register_cancel_tx(cancel_tx); + if when <= self.elapsed { + return Err((item, InsertError::Elapsed)); + } // Get the level at which the entry should be stored - let level = self.level_for(deadline); + let level = self.level_for(when); + unsafe { - self.levels[level].add_entry(hdl); + self.levels[level].add_entry(item); } debug_assert!({ @@ -101,34 +110,45 @@ impl Wheel { .map(|e| e.deadline >= self.elapsed) .unwrap_or(true) }); + + Ok(when) } /// Removes `item` from the timing wheel. - /// - /// # Safety - /// - /// The caller must ensure: - /// - /// * The entry is already registered in THIS wheel. - pub(crate) unsafe fn remove(&mut self, hdl: EntryHandle) { - let deadline = hdl.deadline(); - debug_assert!( - self.elapsed <= deadline, - "elapsed={}; deadline={}", - self.elapsed, - deadline - ); + pub(crate) unsafe fn remove(&mut self, item: NonNull) { + unsafe { + let when = item.as_ref().registered_when(); + if when == STATE_DEREGISTERED { + self.pending.remove(item); + } else { + debug_assert!( + self.elapsed <= when, + "elapsed={}; when={}", + self.elapsed, + when + ); + + let level = self.level_for(when); + self.levels[level].remove_entry(item); + } + } + } - let level = self.level_for(deadline); - unsafe { self.levels[level].remove_entry(hdl.clone()) }; + /// Instant at which to poll. + pub(crate) fn poll_at(&self) -> Option { + self.next_expiration().map(|expiration| expiration.deadline) } /// Advances the timer up to the instant represented by `now`. - pub(crate) fn take_expired(&mut self, now: u64, wake_queue: &mut WakeQueue) { + pub(crate) fn poll(&mut self, now: u64) -> Option { loop { + if let Some(handle) = self.pending.pop_back() { + return Some(handle); + } + match self.next_expiration() { Some(ref expiration) if expiration.deadline <= now => { - self.process_expiration(expiration, wake_queue); + self.process_expiration(expiration); self.set_elapsed(expiration.deadline); } @@ -142,10 +162,21 @@ impl Wheel { } } } + + self.pending.pop_back() } /// Returns the instant at which the next timeout expires. fn next_expiration(&self) -> Option { + if !self.pending.is_empty() { + // Expire immediately as we have things pending firing + return Some(Expiration { + level: 0, + slot: 0, + deadline: self.elapsed, + }); + } + // Check all levels for (level_num, level) in self.levels.iter().enumerate() { if let Some(expiration) = level.next_expiration(self.elapsed) { @@ -162,7 +193,7 @@ impl Wheel { /// Returns the tick at which this timer wheel next needs to perform some /// processing, or None if there are no timers registered. - pub(crate) fn next_expiration_time(&self) -> Option { + pub(super) fn next_expiration_time(&self) -> Option { self.next_expiration().map(|ex| ex.deadline) } @@ -185,11 +216,7 @@ impl Wheel { /// time and the expiration time. for each in that population either /// queue it for notification (in the case of the last level) or tier /// it down to the next level (in all other cases). - pub(crate) fn process_expiration( - &mut self, - expiration: &Expiration, - wake_queue: &mut WakeQueue, - ) { + pub(crate) fn process_expiration(&mut self, expiration: &Expiration) { // Note that we need to take _all_ of the entries off the list before // processing any of them. This is important because it's possible that // those entries might need to be reinserted into the same slot. @@ -202,21 +229,23 @@ impl Wheel { // those entries again or we'll end up in an infinite loop. let mut entries = self.take_entries(expiration); - while let Some(hdl) = entries.pop_back() { + while let Some(item) = entries.pop_back() { if expiration.level == 0 { - debug_assert_eq!(hdl.deadline(), expiration.deadline); + debug_assert_eq!(unsafe { item.registered_when() }, expiration.deadline); } - let deadline = hdl.deadline(); - - if deadline > expiration.deadline { - let level = level_for(expiration.deadline, deadline); - unsafe { - self.levels[level].add_entry(hdl); + // Try to expire the entry; this is cheap (doesn't synchronize) if + // the timer is not expired, and updates registered_when. + match unsafe { item.mark_pending(expiration.deadline) } { + Ok(()) => { + // Item was expired + self.pending.push_front(item); } - } else { - unsafe { - wake_queue.push_front(hdl); + Err(expiration_tick) => { + let level = level_for(expiration.deadline, expiration_tick); + unsafe { + self.levels[level].add_entry(item); + } } } } diff --git a/tokio/src/runtime/time/wheel/cancellation_queue.rs b/tokio/src/runtime/time_alt/cancellation_queue.rs similarity index 96% rename from tokio/src/runtime/time/wheel/cancellation_queue.rs rename to tokio/src/runtime/time_alt/cancellation_queue.rs index 73313b88d3a..cfbd1ad2fa1 100644 --- a/tokio/src/runtime/time/wheel/cancellation_queue.rs +++ b/tokio/src/runtime/time_alt/cancellation_queue.rs @@ -1,6 +1,5 @@ -use super::{Entry, EntryHandle}; +use super::{CancellationQueueEntry, Entry, EntryHandle}; use crate::loom::sync::{Arc, Mutex}; -use crate::runtime::time::wheel::CancellationQueueEntry; use crate::util::linked_list; type EntryList = linked_list::LinkedList; diff --git a/tokio/src/runtime/time/wheel/cancellation_queue/tests.rs b/tokio/src/runtime/time_alt/cancellation_queue/tests.rs similarity index 96% rename from tokio/src/runtime/time/wheel/cancellation_queue/tests.rs rename to tokio/src/runtime/time_alt/cancellation_queue/tests.rs index 79eb05a2955..b20e316ac9d 100644 --- a/tokio/src/runtime/time/wheel/cancellation_queue/tests.rs +++ b/tokio/src/runtime/time_alt/cancellation_queue/tests.rs @@ -92,8 +92,6 @@ fn drop_iter_should_not_leak_memory() { drop(rx.recv_all()); - for hdl in hdls { - assert_eq!(hdl.inner_strong_count(), 1); - } + assert!(hdls.into_iter().all(|hdl| hdl.inner_strong_count() == 1)); }); } diff --git a/tokio/src/runtime/time_alt/context.rs b/tokio/src/runtime/time_alt/context.rs new file mode 100644 index 00000000000..76035634a26 --- /dev/null +++ b/tokio/src/runtime/time_alt/context.rs @@ -0,0 +1,47 @@ +use super::{cancellation_queue, RegistrationQueue, Wheel}; + +/// Local context for the time driver, used when the runtime wants to +/// fire/cancel timers. +pub(crate) struct LocalContext { + pub(crate) wheel: Wheel, + pub(crate) registration_queue: RegistrationQueue, + pub(crate) canc_tx: cancellation_queue::Sender, + pub(crate) canc_rx: cancellation_queue::Receiver, +} + +impl LocalContext { + pub(crate) fn new() -> Self { + let (canc_tx, canc_rx) = cancellation_queue::new(); + Self { + wheel: Wheel::new(), + registration_queue: RegistrationQueue::new(), + canc_tx, + canc_rx, + } + } +} + +pub(crate) enum TempLocalContext<'a> { + /// The runtime is running, we can access it. + Running { + registration_queue: &'a mut RegistrationQueue, + elapsed: u64, + }, + #[cfg(feature = "rt-multi-thread")] + /// The runtime is shutting down, no timers can be registered. + Shutdown, +} + +impl<'a> TempLocalContext<'a> { + pub(crate) fn new_running(cx: &'a mut LocalContext) -> Self { + TempLocalContext::Running { + registration_queue: &mut cx.registration_queue, + elapsed: cx.wheel.elapsed(), + } + } + + #[cfg(feature = "rt-multi-thread")] + pub(crate) fn new_shutdown() -> Self { + TempLocalContext::Shutdown + } +} diff --git a/tokio/src/runtime/time/wheel/entry.rs b/tokio/src/runtime/time_alt/entry.rs similarity index 99% rename from tokio/src/runtime/time/wheel/entry.rs rename to tokio/src/runtime/time_alt/entry.rs index a437b8a2353..b7b5627e0b1 100644 --- a/tokio/src/runtime/time/wheel/entry.rs +++ b/tokio/src/runtime/time_alt/entry.rs @@ -6,7 +6,7 @@ use std::marker::PhantomPinned; use std::ptr::NonNull; use std::task::Waker; -pub(crate) type EntryList = linked_list::LinkedList; +pub(super) type EntryList = linked_list::LinkedList; #[derive(Debug)] struct State { diff --git a/tokio/src/runtime/time_alt/mod.rs b/tokio/src/runtime/time_alt/mod.rs new file mode 100644 index 00000000000..1ba4321b08b --- /dev/null +++ b/tokio/src/runtime/time_alt/mod.rs @@ -0,0 +1,21 @@ +pub(crate) mod context; +pub(super) use context::{LocalContext, TempLocalContext}; + +pub(crate) mod cancellation_queue; + +mod entry; +pub(crate) use entry::Handle as EntryHandle; +use entry::{CancellationQueueEntry, RegistrationQueueEntry, WakeQueueEntry}; +use entry::{Entry, EntryList}; + +mod registration_queue; +pub(crate) use registration_queue::RegistrationQueue; + +mod timer; +pub(crate) use timer::Timer; + +mod wheel; +pub(super) use wheel::Wheel; + +mod wake_queue; +pub(crate) use wake_queue::WakeQueue; diff --git a/tokio/src/runtime/time/wheel/registration_queue.rs b/tokio/src/runtime/time_alt/registration_queue.rs similarity index 91% rename from tokio/src/runtime/time/wheel/registration_queue.rs rename to tokio/src/runtime/time_alt/registration_queue.rs index ad5a5abab06..d135e5b213b 100644 --- a/tokio/src/runtime/time/wheel/registration_queue.rs +++ b/tokio/src/runtime/time_alt/registration_queue.rs @@ -1,5 +1,4 @@ -use super::{Entry, EntryHandle}; -use crate::runtime::time::wheel::RegistrationQueueEntry; +use super::{Entry, EntryHandle, RegistrationQueueEntry}; use crate::util::linked_list; type EntryList = linked_list::LinkedList; @@ -39,3 +38,6 @@ impl RegistrationQueue { self.list.pop_front() } } + +#[cfg(test)] +mod tests; diff --git a/tokio/src/runtime/time_alt/registration_queue/tests.rs b/tokio/src/runtime/time_alt/registration_queue/tests.rs new file mode 100644 index 00000000000..b6b3699fa3d --- /dev/null +++ b/tokio/src/runtime/time_alt/registration_queue/tests.rs @@ -0,0 +1,53 @@ +use super::*; + +use futures::task::noop_waker; + +#[cfg(loom)] +const NUM_ITEMS: usize = 16; + +#[cfg(not(loom))] +const NUM_ITEMS: usize = 64; + +fn new_handle() -> EntryHandle { + EntryHandle::new(0, noop_waker()) +} + +fn model(f: F) { + #[cfg(loom)] + loom::model(f); + + #[cfg(not(loom))] + f(); +} + +#[test] +fn sanity() { + model(|| { + let mut queue = RegistrationQueue::new(); + for _ in 0..NUM_ITEMS { + unsafe { + queue.push_front(new_handle()); + } + } + for _ in 0..NUM_ITEMS { + assert!(queue.pop_front().is_some()); + } + assert!(queue.pop_front().is_none()); + }); +} + +#[test] +fn drop_should_not_leak_memory() { + model(|| { + let mut queue = RegistrationQueue::new(); + + let hdls = (0..NUM_ITEMS).map(|_| new_handle()).collect::>(); + for hdl in hdls.iter() { + unsafe { queue.push_front(hdl.clone()) }; + } + + drop(queue); + + assert!(hdls.into_iter().all(|hdl| hdl.inner_strong_count() == 1)); + }); +} diff --git a/tokio/src/runtime/time/timer.rs b/tokio/src/runtime/time_alt/timer.rs similarity index 69% rename from tokio/src/runtime/time/timer.rs rename to tokio/src/runtime/time_alt/timer.rs index be7778532a8..e76a16a5034 100644 --- a/tokio/src/runtime/time/timer.rs +++ b/tokio/src/runtime/time_alt/timer.rs @@ -1,6 +1,5 @@ -use super::wheel::EntryHandle; +use super::{EntryHandle, TempLocalContext}; use crate::runtime::scheduler::Handle as SchedulerHandle; -use crate::runtime::time::Context as TimeContext; use crate::time::Instant; use std::pin::Pin; @@ -61,16 +60,16 @@ impl Timer { fn register(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<()> { let this = self.get_mut(); - with_current_registration_queue(&this.sched_handle, |maybe_time_cx| { + with_current_temp_local_context(&this.sched_handle, |maybe_time_cx| { let deadline = deadline_to_tick(&this.sched_handle, this.deadline); match maybe_time_cx { - Some(TimeContext::Running { + Some(TempLocalContext::Running { registration_queue: _, elapsed, }) if deadline <= elapsed => Poll::Ready(()), - Some(TimeContext::Running { + Some(TempLocalContext::Running { registration_queue, elapsed: _, }) => { @@ -82,7 +81,7 @@ impl Timer { Poll::Pending } #[cfg(feature = "rt-multi-thread")] - Some(TimeContext::Shutdown) => panic!("{RUNTIME_SHUTTING_DOWN_ERROR}"), + Some(TempLocalContext::Shutdown) => panic!("{RUNTIME_SHUTTING_DOWN_ERROR}"), _ => { let hdl = EntryHandle::new(deadline, cx.waker().clone()); @@ -104,42 +103,25 @@ impl Timer { None => self.register(cx), } } -} -fn with_current_registration_queue(hdl: &SchedulerHandle, f: F) -> R -where - F: FnOnce(Option>) -> R, -{ - #[cfg(not(feature = "rt"))] - { - let (_, _) = (hdl, f); - panic!("Tokio runtime is not enabled, cannot access the current wheel"); + pub(crate) fn scheduler_handle(&self) -> &SchedulerHandle { + &self.sched_handle } - #[cfg(feature = "rt")] - { - use crate::runtime::context; - - let is_same_rt = - context::with_current(|cur_hdl| cur_hdl.is_same_runtime(hdl)).unwrap_or_default(); + #[cfg(all(tokio_unstable, feature = "tracing"))] + pub(crate) fn driver(&self) -> &crate::runtime::time::Handle { + self.sched_handle.driver().time() + } - if !is_same_rt { - // We don't want to create the timer in one runtime, - // but register it in a different runtime's timer wheel. - f(None) - } else { - context::with_scheduler(|maybe_cx| match maybe_cx { - Some(cx) => cx.with_registration_queue(f), - None => f(None), - }) - } + #[cfg(all(tokio_unstable, feature = "tracing"))] + pub(crate) fn clock(&self) -> &crate::time::Clock { + self.sched_handle.driver().clock() } } -#[cfg(all(not(target_os = "wasi"), test))] -pub(super) fn with_current_time_context2(hdl: &SchedulerHandle, f: F) -> R +fn with_current_temp_local_context(hdl: &SchedulerHandle, f: F) -> R where - F: FnOnce(Option<&mut crate::runtime::time::Context2>) -> R, + F: FnOnce(Option>) -> R, { #[cfg(not(feature = "rt"))] { @@ -160,13 +142,44 @@ where f(None) } else { context::with_scheduler(|maybe_cx| match maybe_cx { - Some(cx) => cx.with_time_context2(f), + Some(cx) => cx.with_time_temp_local_context(f), None => f(None), }) } } } +// #[cfg(all(not(target_os = "wasi"), test))] +// pub(super) fn with_current_local_context(hdl: &SchedulerHandle, f: F) -> R +// where +// F: FnOnce(Option<&mut super::LocalContext>) -> R, +// { +// #[cfg(not(feature = "rt"))] +// { +// let (_, _) = (hdl, f); +// panic!("Tokio runtime is not enabled, cannot access the current wheel"); +// } + +// #[cfg(feature = "rt")] +// { +// use crate::runtime::context; + +// let is_same_rt = +// context::with_current(|cur_hdl| cur_hdl.is_same_runtime(hdl)).unwrap_or_default(); + +// if !is_same_rt { +// // We don't want to create the timer in one runtime, +// // but register it in a different runtime's timer wheel. +// f(None) +// } else { +// context::with_scheduler(|maybe_cx| match maybe_cx { +// Some(cx) => cx.with_time_local_context(f), +// None => f(None), +// }) +// } +// } +// } + fn push_from_remote(sched_hdl: &SchedulerHandle, entry_hdl: EntryHandle) { #[cfg(not(feature = "rt"))] { diff --git a/tokio/src/runtime/time/wheel/wake_queue.rs b/tokio/src/runtime/time_alt/wake_queue.rs similarity index 92% rename from tokio/src/runtime/time/wheel/wake_queue.rs rename to tokio/src/runtime/time_alt/wake_queue.rs index af034f4acd9..90ab9f6d287 100644 --- a/tokio/src/runtime/time/wheel/wake_queue.rs +++ b/tokio/src/runtime/time_alt/wake_queue.rs @@ -1,5 +1,4 @@ -use super::{Entry, EntryHandle}; -use crate::runtime::time::wheel::WakeQueueEntry; +use super::{Entry, EntryHandle, WakeQueueEntry}; use crate::util::linked_list; type EntryList = linked_list::LinkedList; @@ -46,3 +45,6 @@ impl WakeQueue { } } } + +#[cfg(test)] +mod tests; diff --git a/tokio/src/runtime/time_alt/wake_queue/tests.rs b/tokio/src/runtime/time_alt/wake_queue/tests.rs new file mode 100644 index 00000000000..f0449ee912b --- /dev/null +++ b/tokio/src/runtime/time_alt/wake_queue/tests.rs @@ -0,0 +1,66 @@ +use super::*; + +use futures_test::task::{new_count_waker, AwokenCount}; + +#[cfg(loom)] +const NUM_ITEMS: usize = 16; + +#[cfg(not(loom))] +const NUM_ITEMS: usize = 64; + +fn new_handle() -> (EntryHandle, AwokenCount) { + let (waker, count) = new_count_waker(); + (EntryHandle::new(0, waker), count) +} + +fn model(f: F) { + #[cfg(loom)] + loom::model(f); + + #[cfg(not(loom))] + f(); +} + +#[test] +fn sanity() { + model(|| { + let mut queue = WakeQueue::new(); + let mut counts = Vec::new(); + + for _ in 0..NUM_ITEMS { + let (hdl, count) = new_handle(); + counts.push(count); + unsafe { + queue.push_front(hdl); + } + } + assert!(!queue.is_empty()); + queue.wake_all(); + assert!(counts.into_iter().all(|c| c.get() == 1)); + }); +} + +#[test] +fn drop_should_not_leak_memory() { + model(|| { + let mut queue = WakeQueue::new(); + + let mut hdls = vec![]; + let mut counts = vec![]; + for _ in 0..NUM_ITEMS { + let (hdl, count) = new_handle(); + hdls.push(hdl); + counts.push(count); + } + + for hdl in hdls.iter() { + unsafe { queue.push_front(hdl.clone()) }; + } + + drop(queue); + + assert!(hdls.into_iter().all(|hdl| hdl.inner_strong_count() == 1)); + // drop should not wake any entries + assert!(counts.into_iter().all(|count| count.get() == 0)); + }); +} diff --git a/tokio/src/runtime/time_alt/wheel/level.rs b/tokio/src/runtime/time_alt/wheel/level.rs new file mode 100644 index 00000000000..99309bfe0fb --- /dev/null +++ b/tokio/src/runtime/time_alt/wheel/level.rs @@ -0,0 +1,194 @@ +use super::{EntryHandle, EntryList}; +use std::ptr::NonNull; +use std::{array, fmt}; + +/// Wheel for a single level in the timer. This wheel contains 64 slots. +pub(crate) struct Level { + level: usize, + + /// Bit field tracking which slots currently contain entries. + /// + /// Using a bit field to track slots that contain entries allows avoiding a + /// scan to find entries. This field is updated when entries are added or + /// removed from a slot. + /// + /// The least-significant bit represents slot zero. + occupied: u64, + + /// Slots. We access these via the EntryInner `current_list` as well, so this needs to be an `UnsafeCell`. + slot: [EntryList; LEVEL_MULT], +} + +/// Indicates when a slot must be processed next. +#[derive(Debug)] +pub(crate) struct Expiration { + /// The level containing the slot. + pub(crate) level: usize, + + /// The slot index. + pub(crate) slot: usize, + + /// The instant at which the slot needs to be processed. + pub(crate) deadline: u64, +} + +/// Level multiplier. +/// +/// Being a power of 2 is very important. +const LEVEL_MULT: usize = 64; + +impl Level { + pub(crate) fn new(level: usize) -> Level { + Level { + level, + occupied: 0, + slot: array::from_fn(|_| EntryList::default()), + } + } + + /// Finds the slot that needs to be processed next and returns the slot and + /// `Instant` at which this slot must be processed. + pub(crate) fn next_expiration(&self, now: u64) -> Option { + // Use the `occupied` bit field to get the index of the next slot that + // needs to be processed. + let slot = self.next_occupied_slot(now)?; + + // From the slot index, calculate the `Instant` at which it needs to be + // processed. This value *must* be in the future with respect to `now`. + + let level_range = level_range(self.level); + let slot_range = slot_range(self.level); + + // Compute the start date of the current level by masking the low bits + // of `now` (`level_range` is a power of 2). + let level_start = now & !(level_range - 1); + let mut deadline = level_start + slot as u64 * slot_range; + + if deadline <= now { + // A timer is in a slot "prior" to the current time. This can occur + // because we do not have an infinite hierarchy of timer levels, and + // eventually a timer scheduled for a very distant time might end up + // being placed in a slot that is beyond the end of all of the + // arrays. + // + // To deal with this, we first limit timers to being scheduled no + // more than MAX_DURATION ticks in the future; that is, they're at + // most one rotation of the top level away. Then, we force timers + // that logically would go into the top+1 level, to instead go into + // the top level's slots. + // + // What this means is that the top level's slots act as a + // pseudo-ring buffer, and we rotate around them indefinitely. If we + // compute a deadline before now, and it's the top level, it + // therefore means we're actually looking at a slot in the future. + debug_assert_eq!(self.level, super::NUM_LEVELS - 1); + + deadline += level_range; + } + + debug_assert!( + deadline >= now, + "deadline={:016X}; now={:016X}; level={}; lr={:016X}, sr={:016X}, slot={}; occupied={:b}", + deadline, + now, + self.level, + level_range, + slot_range, + slot, + self.occupied + ); + + Some(Expiration { + level: self.level, + slot, + deadline, + }) + } + + fn next_occupied_slot(&self, now: u64) -> Option { + if self.occupied == 0 { + return None; + } + + // Get the slot for now using Maths + let now_slot = (now / slot_range(self.level)) as usize; + let occupied = self.occupied.rotate_right(now_slot as u32); + let zeros = occupied.trailing_zeros() as usize; + let slot = (zeros + now_slot) % LEVEL_MULT; + + Some(slot) + } + + pub(crate) unsafe fn add_entry(&mut self, hdl: EntryHandle) { + // Safety: the associated entry must be valid. + let deadline = hdl.deadline(); + let slot = slot_for(deadline, self.level); + + self.slot[slot].push_front(hdl); + + self.occupied |= occupied_bit(slot); + } + + pub(crate) unsafe fn remove_entry(&mut self, hdl: EntryHandle) { + let slot = slot_for(hdl.deadline(), self.level); + + unsafe { self.slot[slot].remove(NonNull::from(&hdl)) }; + if self.slot[slot].is_empty() { + // The bit is currently set + debug_assert!(self.occupied & occupied_bit(slot) != 0); + + // Unset the bit + self.occupied ^= occupied_bit(slot); + } + } + + pub(crate) fn take_slot(&mut self, slot: usize) -> EntryList { + self.occupied &= !occupied_bit(slot); + + std::mem::take(&mut self.slot[slot]) + } +} + +impl fmt::Debug for Level { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt.debug_struct("Level") + .field("occupied", &self.occupied) + .finish() + } +} + +fn occupied_bit(slot: usize) -> u64 { + 1 << slot +} + +fn slot_range(level: usize) -> u64 { + LEVEL_MULT.pow(level as u32) as u64 +} + +fn level_range(level: usize) -> u64 { + LEVEL_MULT as u64 * slot_range(level) +} + +/// Converts a duration (milliseconds) and a level to a slot position. +fn slot_for(duration: u64, level: usize) -> usize { + ((duration >> (level * 6)) % LEVEL_MULT as u64) as usize +} + +#[cfg(all(test, not(loom)))] +mod test { + use super::*; + + #[test] + fn test_slot_for() { + for pos in 0..64 { + assert_eq!(pos as usize, slot_for(pos, 0)); + } + + for level in 1..5 { + for pos in level..64 { + let a = pos * 64_usize.pow(level as u32); + assert_eq!(pos, slot_for(a as u64, level)); + } + } + } +} diff --git a/tokio/src/runtime/time_alt/wheel/mod.rs b/tokio/src/runtime/time_alt/wheel/mod.rs new file mode 100644 index 00000000000..f66a91c150c --- /dev/null +++ b/tokio/src/runtime/time_alt/wheel/mod.rs @@ -0,0 +1,293 @@ +mod level; +pub(crate) use self::level::Expiration; +use self::level::Level; + +use super::cancellation_queue::Sender; +use super::{EntryHandle, EntryList, WakeQueue}; + +use std::array; + +/// Timing wheel implementation. +/// +/// This type provides the hashed timing wheel implementation that backs `Timer` +/// and `DelayQueue`. +/// +/// The structure is generic over `T: Stack`. This allows handling timeout data +/// being stored on the heap or in a slab. In order to support the latter case, +/// the slab must be passed into each function allowing the implementation to +/// lookup timer entries. +/// +/// See `Timer` documentation for some implementation notes. +#[derive(Debug)] +pub(crate) struct Wheel { + /// The number of milliseconds elapsed since the wheel started. + elapsed: u64, + + /// Timer wheel. + /// + /// Levels: + /// + /// * 1 ms slots / 64 ms range + /// * 64 ms slots / ~ 4 sec range + /// * ~ 4 sec slots / ~ 4 min range + /// * ~ 4 min slots / ~ 4 hr range + /// * ~ 4 hr slots / ~ 12 day range + /// * ~ 12 day slots / ~ 2 yr range + levels: Box<[Level; NUM_LEVELS]>, +} + +/// Number of levels. Each level has 64 slots. By using 6 levels with 64 slots +/// each, the timer is able to track time up to 2 years into the future with a +/// precision of 1 millisecond. +const NUM_LEVELS: usize = 6; + +/// The maximum duration of a `Sleep`. +pub(super) const MAX_DURATION: u64 = (1 << (6 * NUM_LEVELS)) - 1; + +impl Wheel { + /// Creates a new timing wheel. + pub(crate) fn new() -> Wheel { + Wheel { + elapsed: 0, + levels: Box::new(array::from_fn(Level::new)), + } + } + + /// Returns the number of milliseconds that have elapsed since the timing + /// wheel's creation. + pub(crate) fn elapsed(&self) -> u64 { + self.elapsed + } + + /// Inserts an entry into the timing wheel. + /// + /// # Arguments + /// + /// * `hdl`: The entry handle to insert into the wheel. + /// + /// # Safety + /// + /// The caller must ensure: + /// + /// * The entry is not already registered in ANY wheel. + pub(crate) unsafe fn insert(&mut self, hdl: EntryHandle, cancel_tx: Sender) { + let deadline = hdl.deadline(); + + assert!(deadline > self.elapsed); + + hdl.register_cancel_tx(cancel_tx); + + // Get the level at which the entry should be stored + let level = self.level_for(deadline); + unsafe { + self.levels[level].add_entry(hdl); + } + + debug_assert!({ + self.levels[level] + .next_expiration(self.elapsed) + .map(|e| e.deadline >= self.elapsed) + .unwrap_or(true) + }); + } + + /// Removes `item` from the timing wheel. + /// + /// # Safety + /// + /// The caller must ensure: + /// + /// * The entry is already registered in THIS wheel. + pub(crate) unsafe fn remove(&mut self, hdl: EntryHandle) { + let deadline = hdl.deadline(); + debug_assert!( + self.elapsed <= deadline, + "elapsed={}; deadline={}", + self.elapsed, + deadline + ); + + let level = self.level_for(deadline); + unsafe { self.levels[level].remove_entry(hdl.clone()) }; + } + + /// Advances the timer up to the instant represented by `now`. + pub(crate) fn take_expired(&mut self, now: u64, wake_queue: &mut WakeQueue) { + loop { + match self.next_expiration() { + Some(ref expiration) if expiration.deadline <= now => { + self.process_expiration(expiration, wake_queue); + + self.set_elapsed(expiration.deadline); + } + _ => { + // in this case the poll did not indicate an expiration + // _and_ we were not able to find a next expiration in + // the current list of timers. advance to the poll's + // current time and do nothing else. + self.set_elapsed(now); + break; + } + } + } + } + + /// Returns the instant at which the next timeout expires. + fn next_expiration(&self) -> Option { + // Check all levels + for (level_num, level) in self.levels.iter().enumerate() { + if let Some(expiration) = level.next_expiration(self.elapsed) { + // There cannot be any expirations at a higher level that happen + // before this one. + debug_assert!(self.no_expirations_before(level_num + 1, expiration.deadline)); + + return Some(expiration); + } + } + + None + } + + /// Returns the tick at which this timer wheel next needs to perform some + /// processing, or None if there are no timers registered. + pub(crate) fn next_expiration_time(&self) -> Option { + self.next_expiration().map(|ex| ex.deadline) + } + + /// Used for debug assertions + fn no_expirations_before(&self, start_level: usize, before: u64) -> bool { + let mut res = true; + + for level in &self.levels[start_level..] { + if let Some(e2) = level.next_expiration(self.elapsed) { + if e2.deadline < before { + res = false; + } + } + } + + res + } + + /// iteratively find entries that are between the wheel's current + /// time and the expiration time. for each in that population either + /// queue it for notification (in the case of the last level) or tier + /// it down to the next level (in all other cases). + pub(crate) fn process_expiration( + &mut self, + expiration: &Expiration, + wake_queue: &mut WakeQueue, + ) { + // Note that we need to take _all_ of the entries off the list before + // processing any of them. This is important because it's possible that + // those entries might need to be reinserted into the same slot. + // + // This happens only on the highest level, when an entry is inserted + // more than MAX_DURATION into the future. When this happens, we wrap + // around, and process some entries a multiple of MAX_DURATION before + // they actually need to be dropped down a level. We then reinsert them + // back into the same position; we must make sure we don't then process + // those entries again or we'll end up in an infinite loop. + let mut entries = self.take_entries(expiration); + + while let Some(hdl) = entries.pop_back() { + if expiration.level == 0 { + debug_assert_eq!(hdl.deadline(), expiration.deadline); + } + + let deadline = hdl.deadline(); + + if deadline > expiration.deadline { + let level = level_for(expiration.deadline, deadline); + unsafe { + self.levels[level].add_entry(hdl); + } + } else { + unsafe { + wake_queue.push_front(hdl); + } + } + } + } + + fn set_elapsed(&mut self, when: u64) { + assert!( + self.elapsed <= when, + "elapsed={:?}; when={:?}", + self.elapsed, + when + ); + + if when > self.elapsed { + self.elapsed = when; + } + } + + /// Obtains the list of entries that need processing for the given expiration. + fn take_entries(&mut self, expiration: &Expiration) -> EntryList { + self.levels[expiration.level].take_slot(expiration.slot) + } + + fn level_for(&self, when: u64) -> usize { + level_for(self.elapsed, when) + } +} + +fn level_for(elapsed: u64, when: u64) -> usize { + const SLOT_MASK: u64 = (1 << 6) - 1; + + // Mask in the trailing bits ignored by the level calculation in order to cap + // the possible leading zeros + let mut masked = elapsed ^ when | SLOT_MASK; + + if masked >= MAX_DURATION { + // Fudge the timer into the top level + masked = MAX_DURATION - 1; + } + + let leading_zeros = masked.leading_zeros() as usize; + let significant = 63 - leading_zeros; + + significant / NUM_LEVELS +} + +#[cfg(all(test, not(loom)))] +mod test { + use super::*; + + #[test] + fn test_level_for() { + for pos in 0..64 { + assert_eq!(0, level_for(0, pos), "level_for({pos}) -- binary = {pos:b}"); + } + + for level in 1..5 { + for pos in level..64 { + let a = pos * 64_usize.pow(level as u32); + assert_eq!( + level, + level_for(0, a as u64), + "level_for({a}) -- binary = {a:b}" + ); + + if pos > level { + let a = a - 1; + assert_eq!( + level, + level_for(0, a as u64), + "level_for({a}) -- binary = {a:b}" + ); + } + + if pos < 64 { + let a = a + 1; + assert_eq!( + level, + level_for(0, a as u64), + "level_for({a}) -- binary = {a:b}" + ); + } + } + } + } +} diff --git a/tokio/src/time/error.rs b/tokio/src/time/error.rs index aaf8847b81a..21920059090 100644 --- a/tokio/src/time/error.rs +++ b/tokio/src/time/error.rs @@ -46,6 +46,11 @@ impl From for Error { #[derive(Debug, PartialEq, Eq)] pub struct Elapsed(()); +#[derive(Debug)] +pub(crate) enum InsertError { + Elapsed, +} + // ===== impl Error ===== impl Error { diff --git a/tokio/src/time/interval.rs b/tokio/src/time/interval.rs index 42b2973a38a..c7dcedf0a8d 100644 --- a/tokio/src/time/interval.rs +++ b/tokio/src/time/interval.rs @@ -444,7 +444,8 @@ impl Interval { #[cfg(not(all(tokio_unstable, feature = "tracing")))] let instant = poll_fn(|cx| self.poll_tick(cx)); - instant.await + let r = instant.await; + r } /// Polls for the next instant in the interval to be reached. @@ -484,7 +485,10 @@ impl Interval { .unwrap_or_else(Instant::far_future) }; - self.delay.as_mut().reset(next); + // When we arrive here, the internal delay returned `Poll::Ready`. + // Reset the delay but do not register it. It should be registered with + // the next call to [`poll_tick`]. + self.delay.as_mut().reset_without_reregister(next); // Return the time when we were scheduled to tick Poll::Ready(timeout) diff --git a/tokio/src/time/sleep.rs b/tokio/src/time/sleep.rs index d2720fe82ee..f0bbf5c2fd1 100644 --- a/tokio/src/time/sleep.rs +++ b/tokio/src/time/sleep.rs @@ -1,5 +1,5 @@ -use crate::runtime::time::Timer; -use crate::time::{Duration, Instant}; +use crate::runtime::Timer; +use crate::time::{error::Error, Duration, Instant}; use crate::util::trace; use pin_project_lite::pin_project; @@ -252,14 +252,14 @@ impl Sleep { location: Option<&'static Location<'static>>, ) -> Sleep { use crate::runtime::scheduler; - let sched_hdl = scheduler::Handle::current(); - let entry = Timer::new(sched_hdl, deadline); + let handle = scheduler::Handle::current(); + let entry = Timer::new(handle, deadline); #[cfg(all(tokio_unstable, feature = "tracing"))] let inner = { - let sched_hdl = scheduler::Handle::current(); - let clock = sched_hdl.driver().clock(); - let time_hdl = sched_hdl.driver().time(); - let time_source = time_hdl.time_source(); + let handle = scheduler::Handle::current(); + let clock = handle.driver().clock(); + let handle = &handle.driver().time(); + let time_source = handle.time_source(); let deadline_tick = time_source.deadline_to_tick(deadline); let duration = deadline_tick.saturating_sub(time_source.now(clock)); @@ -349,10 +349,43 @@ impl Sleep { /// /// [`Pin::as_mut`]: fn@std::pin::Pin::as_mut pub fn reset(self: Pin<&mut Self>, deadline: Instant) { - use crate::runtime::scheduler; + self.reset_inner(deadline); + } + + /// Resets the `Sleep` instance to a new deadline without reregistering it + /// to be woken up. + /// + /// Calling this function allows changing the instant at which the `Sleep` + /// future completes without having to create new associated state and + /// without having it registered. This is required in e.g. the + /// [`crate::time::Interval`] where we want to reset the internal [Sleep] + /// without having it wake up the last task that polled it. + pub(crate) fn reset_without_reregister(self: Pin<&mut Self>, deadline: Instant) { + let mut me = self.project(); + match me.entry.as_ref().flavor() { + crate::runtime::TimerFlavor::Traditional => { + me.entry.as_mut().reset(deadline, false); + } + #[cfg(all(tokio_unstable, feature = "rt-multi-thread"))] + crate::runtime::TimerFlavor::Alternative => { + let handle = me.entry.as_ref().scheduler_handle().clone(); + me.entry.set(Timer::new(handle, deadline)); + } + } + } + + fn reset_inner(self: Pin<&mut Self>, deadline: Instant) { let mut me = self.project(); - me.entry - .set(Timer::new(scheduler::Handle::current(), deadline)); + match me.entry.as_ref().flavor() { + crate::runtime::TimerFlavor::Traditional => { + me.entry.as_mut().reset(deadline, true); + } + #[cfg(all(tokio_unstable, feature = "rt-multi-thread"))] + crate::runtime::TimerFlavor::Alternative => { + let handle = me.entry.as_ref().scheduler_handle().clone(); + me.entry.set(Timer::new(handle, deadline)); + } + } #[cfg(all(tokio_unstable, feature = "tracing"))] { @@ -365,12 +398,8 @@ impl Sleep { tracing::trace_span!("runtime.resource.async_op.poll"); let duration = { - use crate::runtime::scheduler; - - let handle = scheduler::Handle::current(); - let clock = handle.driver().clock(); - let handle = &handle.driver().time(); - let time_source = handle.time_source(); + let clock = me.entry.as_ref().clock(); + let time_source = me.entry.as_ref().driver().time_source(); let now = time_source.now(clock); let deadline_tick = time_source.deadline_to_tick(deadline); deadline_tick.saturating_sub(now) @@ -385,7 +414,7 @@ impl Sleep { } } - fn poll_elapsed(self: Pin<&mut Self>, cx: &mut task::Context<'_>) -> Poll<()> { + fn poll_elapsed(self: Pin<&mut Self>, cx: &mut task::Context<'_>) -> Poll> { let me = self.project(); ready!(crate::trace::trace_leaf(cx)); @@ -432,6 +461,9 @@ impl Future for Sleep { let _ao_span = self.inner.ctx.async_op_span.clone().entered(); #[cfg(all(tokio_unstable, feature = "tracing"))] let _ao_poll_span = self.inner.ctx.async_op_poll_span.clone().entered(); - self.as_mut().poll_elapsed(cx) + match ready!(self.as_mut().poll_elapsed(cx)) { + Ok(()) => Poll::Ready(()), + Err(e) => panic!("timer error: {e}"), + } } } diff --git a/tokio/src/util/mod.rs b/tokio/src/util/mod.rs index f8c6641d8b3..c671fd6a1da 100644 --- a/tokio/src/util/mod.rs +++ b/tokio/src/util/mod.rs @@ -27,6 +27,8 @@ pub(crate) mod metric_atomics; // rt and signal use `Notify`, which requires `WakeList`. feature = "rt", feature = "signal", + // time driver uses `WakeList` in `Handle::process_at_time`. + feature = "time", ))] mod wake_list; #[cfg(any( @@ -36,6 +38,7 @@ mod wake_list; feature = "fs", feature = "rt", feature = "signal", + feature = "time", ))] pub(crate) use wake_list::WakeList; diff --git a/tokio/tests/time_alt.rs b/tokio/tests/time_alt.rs new file mode 100644 index 00000000000..561e36f897b --- /dev/null +++ b/tokio/tests/time_alt.rs @@ -0,0 +1,112 @@ +#![warn(rust_2018_idioms)] +#![cfg(all(tokio_unstable, feature = "time", feature = "rt-multi-thread"))] + +use tokio::runtime::Runtime; +use tokio::time::*; + +fn rt_combinations() -> Vec { + let mut rts = vec![]; + + let rt = tokio::runtime::Builder::new_multi_thread() + .worker_threads(1) + .enable_all() + .build() + .unwrap(); + rts.push(rt); + + let rt = tokio::runtime::Builder::new_multi_thread() + .worker_threads(4) + .enable_all() + .build() + .unwrap(); + rts.push(rt); + + #[cfg(tokio_unstable)] + { + let rt = tokio::runtime::Builder::new_multi_thread() + .worker_threads(1) + .enable_alt_timer() + .enable_all() + .build() + .unwrap(); + rts.push(rt); + + let rt = tokio::runtime::Builder::new_multi_thread() + .worker_threads(4) + .enable_alt_timer() + .enable_all() + .build() + .unwrap(); + rts.push(rt); + } + + rts +} + +#[test] +fn sleep() { + const N: u32 = 512; + + for rt in rt_combinations() { + rt.block_on(async { + let mut jhs = vec![]; + + // sleep outside of the worker threads + let now = Instant::now(); + tokio::time::sleep(Duration::from_millis(10)).await; + assert!(now.elapsed() >= Duration::from_millis(10)); + + for _ in 0..N { + let jh = tokio::spawn(async move { + // sleep inside of the worker threads + let now = Instant::now(); + tokio::time::sleep(Duration::from_millis(10)).await; + assert!(now.elapsed() >= Duration::from_millis(10)); + }); + jhs.push(jh); + } + + for jh in jhs { + jh.await.unwrap(); + } + }); + } +} + +#[test] +fn timeout() { + const N: u32 = 512; + + for rt in rt_combinations() { + rt.block_on(async { + let mut jhs = vec![]; + + // timeout outside of the worker threads + let now = Instant::now(); + tokio::time::timeout( + Duration::from_millis(10), + std::future::pending::<()>(), + ) + .await.expect_err("timeout should occur"); + assert!(now.elapsed() >= Duration::from_millis(10)); + + for _ in 0..N { + let jh = tokio::spawn(async move { + let now = Instant::now(); + // timeout inside of the worker threads + tokio::time::timeout( + Duration::from_millis(10), + std::future::pending::<()>(), + ) + .await.expect_err("timeout should occur"); + assert!(now.elapsed() >= Duration::from_millis(10)); + }); + jhs.push(jh); + } + + for jh in jhs { + jh.await.unwrap(); + } + }); + } +} diff --git a/tokio/tests/time_panic.rs b/tokio/tests/time_panic.rs index 8a997f04529..aa7439cce56 100644 --- a/tokio/tests/time_panic.rs +++ b/tokio/tests/time_panic.rs @@ -13,19 +13,64 @@ mod support { } use support::panic::test_panic; +fn rt_combinations() -> Vec { + let mut rts = vec![]; + + let rt = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + .unwrap(); + rts.push(rt); + + let rt = tokio::runtime::Builder::new_multi_thread() + .worker_threads(1) + .enable_all() + .build() + .unwrap(); + rts.push(rt); + + let rt = tokio::runtime::Builder::new_multi_thread() + .worker_threads(4) + .enable_all() + .build() + .unwrap(); + rts.push(rt); + + #[cfg(tokio_unstable)] + { + let rt = tokio::runtime::Builder::new_multi_thread() + .worker_threads(1) + .enable_alt_timer() + .enable_all() + .build() + .unwrap(); + rts.push(rt); + + let rt = tokio::runtime::Builder::new_multi_thread() + .worker_threads(4) + .enable_alt_timer() + .enable_all() + .build() + .unwrap(); + rts.push(rt); + } + + rts +} + #[test] fn pause_panic_caller() -> Result<(), Box> { - let panic_location_file = test_panic(|| { - let rt = current_thread(); - - rt.block_on(async { - time::pause(); - time::pause(); + for rt in rt_combinations() { + let panic_location_file = test_panic(|| { + rt.block_on(async { + time::pause(); + time::pause(); + }); }); - }); - // The panic location should be in this file - assert_eq!(&panic_location_file.unwrap(), file!()); + // The panic location should be in this file + assert_eq!(&panic_location_file.unwrap(), file!()); + } Ok(()) } diff --git a/tokio/tests/time_rt.rs b/tokio/tests/time_rt.rs index 13f888c1791..283967798a1 100644 --- a/tokio/tests/time_rt.rs +++ b/tokio/tests/time_rt.rs @@ -1,28 +1,96 @@ #![warn(rust_2018_idioms)] #![cfg(feature = "full")] +use tokio::runtime::Runtime; use tokio::time::*; use std::sync::mpsc; +fn rt_combinations() -> Vec { + let mut rts = vec![]; + + let rt = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + .unwrap(); + rts.push(rt); + + let rt = tokio::runtime::Builder::new_multi_thread() + .worker_threads(1) + .enable_all() + .build() + .unwrap(); + rts.push(rt); + + let rt = tokio::runtime::Builder::new_multi_thread() + .worker_threads(4) + .enable_all() + .build() + .unwrap(); + rts.push(rt); + + #[cfg(tokio_unstable)] + { + let rt = tokio::runtime::Builder::new_multi_thread() + .worker_threads(1) + .enable_alt_timer() + .enable_all() + .build() + .unwrap(); + rts.push(rt); + + let rt = tokio::runtime::Builder::new_multi_thread() + .worker_threads(4) + .enable_alt_timer() + .enable_all() + .build() + .unwrap(); + rts.push(rt); + } + + rts +} + #[cfg(all(feature = "rt-multi-thread", not(target_os = "wasi")))] // Wasi doesn't support threads #[test] fn timer_with_threaded_runtime() { use tokio::runtime::Runtime; - let rt = Runtime::new().unwrap(); - let (tx, rx) = mpsc::channel(); + { + let rt = Runtime::new().unwrap(); + let (tx, rx) = mpsc::channel(); - rt.spawn(async move { - let when = Instant::now() + Duration::from_millis(10); + rt.spawn(async move { + let when = Instant::now() + Duration::from_millis(10); - sleep_until(when).await; - assert!(Instant::now() >= when); + sleep_until(when).await; + assert!(Instant::now() >= when); - tx.send(()).unwrap(); - }); + tx.send(()).unwrap(); + }); - rx.recv().unwrap(); + rx.recv().unwrap(); + } + + #[cfg(tokio_unstable)] + { + let rt = tokio::runtime::Builder::new_multi_thread() + .enable_alt_timer() + .build() + .unwrap(); + let (tx, rx) = mpsc::channel(); + + rt.block_on(async move { + let when = Instant::now() + Duration::from_millis(10); + + sleep_until(when).await; + assert!(Instant::now() >= when); + + tx.send(()).unwrap(); + }); + + rx.recv().unwrap(); + } } #[test] @@ -44,8 +112,8 @@ fn timer_with_current_thread_scheduler() { rx.recv().unwrap(); } -#[tokio::test] -async fn starving() { +#[test] +fn starving() { use std::future::Future; use std::pin::Pin; use std::task::{Context, Poll}; @@ -68,23 +136,31 @@ async fn starving() { } } - let when = Instant::now() + Duration::from_millis(10); - let starve = Starve(Box::pin(sleep_until(when)), 0); + for rt in rt_combinations() { + rt.block_on(async { + let when = Instant::now() + Duration::from_millis(10); + let starve = Starve(Box::pin(sleep_until(when)), 0); - starve.await; - assert!(Instant::now() >= when); + starve.await; + assert!(Instant::now() >= when); + }); + } } -#[tokio::test] -async fn timeout_value() { +#[test] +fn timeout_value() { use tokio::sync::oneshot; - let (_tx, rx) = oneshot::channel::<()>(); + for rt in rt_combinations() { + rt.block_on(async { + let (_tx, rx) = oneshot::channel::<()>(); - let now = Instant::now(); - let dur = Duration::from_millis(10); + let now = Instant::now(); + let dur = Duration::from_millis(10); - let res = timeout(dur, rx).await; - assert!(res.is_err()); - assert!(Instant::now() >= now + dur); + let res = timeout(dur, rx).await; + assert!(res.is_err()); + assert!(Instant::now() >= now + dur); + }); + } } From e4ed82c570f2717a53921dca6f609ae7b8805bf3 Mon Sep 17 00:00:00 2001 From: Qi Date: Tue, 25 Nov 2025 22:01:29 +0800 Subject: [PATCH 096/100] fix rustfmt reports Signed-off-by: ADD-SP --- tokio/tests/time_alt.rs | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/tokio/tests/time_alt.rs b/tokio/tests/time_alt.rs index 561e36f897b..360aac3c802 100644 --- a/tokio/tests/time_alt.rs +++ b/tokio/tests/time_alt.rs @@ -83,22 +83,18 @@ fn timeout() { // timeout outside of the worker threads let now = Instant::now(); - tokio::time::timeout( - Duration::from_millis(10), - std::future::pending::<()>(), - ) - .await.expect_err("timeout should occur"); + tokio::time::timeout(Duration::from_millis(10), std::future::pending::<()>()) + .await + .expect_err("timeout should occur"); assert!(now.elapsed() >= Duration::from_millis(10)); for _ in 0..N { let jh = tokio::spawn(async move { let now = Instant::now(); // timeout inside of the worker threads - tokio::time::timeout( - Duration::from_millis(10), - std::future::pending::<()>(), - ) - .await.expect_err("timeout should occur"); + tokio::time::timeout(Duration::from_millis(10), std::future::pending::<()>()) + .await + .expect_err("timeout should occur"); assert!(now.elapsed() >= Duration::from_millis(10)); }); jhs.push(jh); From 91eb3e1806f5a491d943d1b8ecd7dcd3d85fedf0 Mon Sep 17 00:00:00 2001 From: Qi Date: Tue, 25 Nov 2025 22:19:48 +0800 Subject: [PATCH 097/100] revert `tokio-util/tests/time_delay_queue.rs` as it is using the existing timer impl Signed-off-by: ADD-SP --- tokio-util/tests/time_delay_queue.rs | 88 +++++++++++++++++++++++++++- 1 file changed, 85 insertions(+), 3 deletions(-) diff --git a/tokio-util/tests/time_delay_queue.rs b/tokio-util/tests/time_delay_queue.rs index dcbf2d0e7c9..fdd0844c8c3 100644 --- a/tokio-util/tests/time_delay_queue.rs +++ b/tokio-util/tests/time_delay_queue.rs @@ -3,7 +3,7 @@ #![cfg(feature = "full")] use futures::StreamExt; -use tokio::time::{self, sleep, Duration, Instant}; +use tokio::time::{self, sleep, sleep_until, Duration, Instant}; use tokio_test::{assert_pending, assert_ready, task}; use tokio_util::time::DelayQueue; @@ -82,6 +82,8 @@ async fn single_short_delay() { sleep(ms(5)).await; + assert!(queue.is_woken()); + let entry = assert_ready_some!(poll!(queue)); assert_eq!(*entry.get_ref(), "foo"); @@ -219,7 +221,7 @@ async fn reset_much_later() { sleep(ms(20)).await; - assert_ready_some!(poll!(queue)); + assert!(queue.is_woken()); } // Reproduces tokio-rs/tokio#849. @@ -246,7 +248,7 @@ async fn reset_twice() { sleep(ms(20)).await; - assert_ready_some!(poll!(queue)); + assert!(queue.is_woken()); } /// Regression test: Given an entry inserted with a deadline in the past, so @@ -410,6 +412,8 @@ async fn expire_first_key_when_reset_to_expire_earlier() { sleep(ms(100)).await; + assert!(queue.is_woken()); + let entry = assert_ready_some!(poll!(queue)).into_inner(); assert_eq!(entry, "one"); } @@ -431,6 +435,8 @@ async fn expire_second_key_when_reset_to_expire_earlier() { sleep(ms(100)).await; + assert!(queue.is_woken()); + let entry = assert_ready_some!(poll!(queue)).into_inner(); assert_eq!(entry, "two"); } @@ -451,6 +457,8 @@ async fn reset_first_expiring_item_to_expire_later() { queue.reset_at(&one, now + ms(300)); sleep(ms(250)).await; + assert!(queue.is_woken()); + let entry = assert_ready_some!(poll!(queue)).into_inner(); assert_eq!(entry, "two"); } @@ -514,6 +522,43 @@ async fn insert_after_ready_poll() { assert_eq!("3", res[2]); } +#[tokio::test] +async fn reset_later_after_slot_starts() { + time::pause(); + + let mut queue = task::spawn(DelayQueue::new()); + + let now = Instant::now(); + + let foo = queue.insert_at("foo", now + ms(100)); + + assert_pending!(poll!(queue)); + + sleep_until(now + Duration::from_millis(80)).await; + + assert!(!queue.is_woken()); + + // At this point the queue hasn't been polled, so `elapsed` on the wheel + // for the queue is still at 0 and hence the 1ms resolution slots cover + // [0-64). Resetting the time on the entry to 120 causes it to get put in + // the [64-128) slot. As the queue knows that the first entry is within + // that slot, but doesn't know when, it must wake immediately to advance + // the wheel. + queue.reset_at(&foo, now + ms(120)); + assert!(queue.is_woken()); + + assert_pending!(poll!(queue)); + + sleep_until(now + Duration::from_millis(119)).await; + assert!(!queue.is_woken()); + + sleep(ms(1)).await; + assert!(queue.is_woken()); + + let entry = assert_ready_some!(poll!(queue)).into_inner(); + assert_eq!(entry, "foo"); +} + #[tokio::test] async fn reset_inserted_expired() { time::pause(); @@ -539,6 +584,43 @@ async fn reset_inserted_expired() { assert_eq!(queue.len(), 0); } +#[tokio::test] +async fn reset_earlier_after_slot_starts() { + time::pause(); + + let mut queue = task::spawn(DelayQueue::new()); + + let now = Instant::now(); + + let foo = queue.insert_at("foo", now + ms(200)); + + assert_pending!(poll!(queue)); + + sleep_until(now + Duration::from_millis(80)).await; + + assert!(!queue.is_woken()); + + // At this point the queue hasn't been polled, so `elapsed` on the wheel + // for the queue is still at 0 and hence the 1ms resolution slots cover + // [0-64). Resetting the time on the entry to 120 causes it to get put in + // the [64-128) slot. As the queue knows that the first entry is within + // that slot, but doesn't know when, it must wake immediately to advance + // the wheel. + queue.reset_at(&foo, now + ms(120)); + assert!(queue.is_woken()); + + assert_pending!(poll!(queue)); + + sleep_until(now + Duration::from_millis(119)).await; + assert!(!queue.is_woken()); + + sleep(ms(1)).await; + assert!(queue.is_woken()); + + let entry = assert_ready_some!(poll!(queue)).into_inner(); + assert_eq!(entry, "foo"); +} + #[tokio::test] async fn insert_in_past_after_poll_fires_immediately() { time::pause(); From 4465babab4149d806000708346f52e0f14e25569 Mon Sep 17 00:00:00 2001 From: Qi Date: Tue, 25 Nov 2025 23:31:06 +0800 Subject: [PATCH 098/100] improve the test coverage Signed-off-by: ADD-SP --- tokio/src/runtime/time_alt/mod.rs | 3 + tokio/src/runtime/time_alt/tests.rs | 173 ++++++++++++++++++++++++++++ 2 files changed, 176 insertions(+) create mode 100644 tokio/src/runtime/time_alt/tests.rs diff --git a/tokio/src/runtime/time_alt/mod.rs b/tokio/src/runtime/time_alt/mod.rs index 1ba4321b08b..5d528461ced 100644 --- a/tokio/src/runtime/time_alt/mod.rs +++ b/tokio/src/runtime/time_alt/mod.rs @@ -19,3 +19,6 @@ pub(super) use wheel::Wheel; mod wake_queue; pub(crate) use wake_queue::WakeQueue; + +#[cfg(test)] +mod tests; diff --git a/tokio/src/runtime/time_alt/tests.rs b/tokio/src/runtime/time_alt/tests.rs new file mode 100644 index 00000000000..2806b04d848 --- /dev/null +++ b/tokio/src/runtime/time_alt/tests.rs @@ -0,0 +1,173 @@ +use super::*; +use crate::loom::thread; + +use futures_test::task::{new_count_waker, AwokenCount}; + +#[cfg(loom)] +const NUM_ITEMS: usize = 16; + +#[cfg(not(loom))] +const NUM_ITEMS: usize = 64; + +fn new_handle() -> (EntryHandle, AwokenCount) { + let (waker, count) = new_count_waker(); + (EntryHandle::new(0, waker), count) +} + +fn model(f: F) { + #[cfg(loom)] + loom::model(f); + + #[cfg(not(loom))] + f(); +} + +#[test] +fn wake_up_in_the_same_thread() { + model(|| { + let mut counts = Vec::new(); + + let mut reg_queue = RegistrationQueue::new(); + + for _ in 0..NUM_ITEMS { + let (hdl, count) = new_handle(); + counts.push(count); + unsafe { + reg_queue.push_front(hdl); + } + } + + let mut wake_queue = WakeQueue::new(); + for _ in 0..NUM_ITEMS { + if let Some(hdl) = reg_queue.pop_front() { + unsafe { + wake_queue.push_front(hdl); + } + } + } + assert!(reg_queue.pop_front().is_none()); + wake_queue.wake_all(); + + assert!(counts.into_iter().all(|c| c.get() == 1)); + }); +} + +#[test] +fn cancel_in_the_same_thread() { + model(|| { + let mut counts = Vec::new(); + let (cancel_tx, mut cancel_rx) = cancellation_queue::new(); + + let mut reg_queue = RegistrationQueue::new(); + + for _ in 0..NUM_ITEMS { + let (hdl, count) = new_handle(); + hdl.register_cancel_tx(cancel_tx.clone()); + counts.push(count); + unsafe { + reg_queue.push_front(hdl.clone()); + } + hdl.cancel(); + } + + // drain the registration queue + while let Some(hdl) = reg_queue.pop_front() { + drop(hdl); + } + + let mut wake_queue = WakeQueue::new(); + for hdl in cancel_rx.recv_all() { + unsafe { + wake_queue.push_front(hdl); + } + } + wake_queue.wake_all(); + + assert!(counts.into_iter().all(|c| c.get() == 0)); + }); +} + +#[test] +fn wake_up_in_the_different_thread() { + model(|| { + let mut counts = Vec::new(); + + let mut hdls = Vec::new(); + let mut reg_queue = RegistrationQueue::new(); + + for _ in 0..NUM_ITEMS { + let (hdl, count) = new_handle(); + counts.push(count); + hdls.push(hdl.clone()); + unsafe { + reg_queue.push_front(hdl); + } + } + + // wake up all handles in a different thread + thread::spawn(move || { + let mut wake_queue = WakeQueue::new(); + for _ in 0..NUM_ITEMS { + if let Some(hdl) = reg_queue.pop_front() { + unsafe { + wake_queue.push_front(hdl); + } + } + } + assert!(reg_queue.pop_front().is_none()); + wake_queue.wake_all(); + assert!(counts.into_iter().all(|c| c.get() == 1)); + }) + .join() + .unwrap(); + }); +} + +#[test] +fn cancel_in_the_different_thread() { + model(|| { + let mut counts = Vec::new(); + let (cancel_tx, mut cancel_rx) = cancellation_queue::new(); + let mut hdls = Vec::new(); + let mut reg_queue = RegistrationQueue::new(); + + for _ in 0..NUM_ITEMS { + let (hdl, count) = new_handle(); + hdl.register_cancel_tx(cancel_tx.clone()); + counts.push(count); + hdls.push(hdl.clone()); + unsafe { + reg_queue.push_front(hdl); + } + } + + // this thread cancel all handles concurrently + let jh1 = thread::spawn(move || { + // cancel all handles + for hdl in hdls { + hdl.cancel(); + } + }); + + // this thread process the cancellation queue concurrently + let jh2 = thread::spawn(move || { + let mut wake_queue = WakeQueue::new(); + for hdl in cancel_rx.recv_all() { + unsafe { + wake_queue.push_front(hdl); + } + } + wake_queue.wake_all(); + + assert!(counts.into_iter().all(|c| c.get() == 0)); + }); + + // the current thread drain the registration queue concurrently + while let Some(hdl) = reg_queue.pop_front() { + drop(hdl); + } + + jh1.join().unwrap(); + jh2.join().unwrap(); + }) +} From 804f799ca75035445bf1e1d464c04a6eb5295486 Mon Sep 17 00:00:00 2001 From: Qi Date: Tue, 25 Nov 2025 23:37:28 +0800 Subject: [PATCH 099/100] fixup! improve the test coverage --- tokio/src/runtime/time_alt/tests.rs | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/tokio/src/runtime/time_alt/tests.rs b/tokio/src/runtime/time_alt/tests.rs index 2806b04d848..29015f3bde9 100644 --- a/tokio/src/runtime/time_alt/tests.rs +++ b/tokio/src/runtime/time_alt/tests.rs @@ -142,32 +142,27 @@ fn cancel_in_the_different_thread() { } // this thread cancel all handles concurrently - let jh1 = thread::spawn(move || { + let jh = thread::spawn(move || { // cancel all handles for hdl in hdls { hdl.cancel(); } }); - // this thread process the cancellation queue concurrently - let jh2 = thread::spawn(move || { - let mut wake_queue = WakeQueue::new(); - for hdl in cancel_rx.recv_all() { - unsafe { - wake_queue.push_front(hdl); - } - } - wake_queue.wake_all(); - - assert!(counts.into_iter().all(|c| c.get() == 0)); - }); - - // the current thread drain the registration queue concurrently + // cancellation queue concurrently while let Some(hdl) = reg_queue.pop_front() { drop(hdl); } - jh1.join().unwrap(); - jh2.join().unwrap(); + let mut wake_queue = WakeQueue::new(); + for hdl in cancel_rx.recv_all() { + unsafe { + wake_queue.push_front(hdl); + } + } + wake_queue.wake_all(); + assert!(counts.into_iter().all(|c| c.get() == 0)); + + jh.join().unwrap(); }) } From f6115395b9dcb8d4e0108e98fefee4874d77829a Mon Sep 17 00:00:00 2001 From: Qi Date: Thu, 27 Nov 2025 00:20:02 +0800 Subject: [PATCH 100/100] improve style Signed-off-by: ADD-SP --- tokio/src/runtime/builder.rs | 2 ++ tokio/src/runtime/scheduler/mod.rs | 11 ------- .../runtime/scheduler/multi_thread/worker.rs | 15 ++------- tokio/src/runtime/time_alt/timer.rs | 31 ------------------- tokio/src/time/interval.rs | 3 +- 5 files changed, 5 insertions(+), 57 deletions(-) diff --git a/tokio/src/runtime/builder.rs b/tokio/src/runtime/builder.rs index a14d336f497..3a40717e5ce 100644 --- a/tokio/src/runtime/builder.rs +++ b/tokio/src/runtime/builder.rs @@ -378,6 +378,8 @@ impl Builder { /// This option only applies to multi-threaded runtimes. Attempting to use /// this option with any other runtime type will have no effect. /// + /// [Click here to share your experience with the alternative timer](https://github.com/tokio-rs/tokio/issues/7745) + /// /// # Examples /// /// ``` diff --git a/tokio/src/runtime/scheduler/mod.rs b/tokio/src/runtime/scheduler/mod.rs index 45d24ea288d..3f142120d33 100644 --- a/tokio/src/runtime/scheduler/mod.rs +++ b/tokio/src/runtime/scheduler/mod.rs @@ -304,17 +304,6 @@ cfg_rt! { } } - // #[cfg(all(tokio_unstable, feature = "time", feature = "rt-multi-thread", not(target_os = "wasi"), test))] - // pub(crate) fn with_time_local_context(&self, f: F) -> R - // where - // F: FnOnce(Option<&mut crate::runtime::time_alt::LocalContext>) -> R, - // { - // match self { - // Context::CurrentThread(_) => panic!("the alternative timer implementation is not supported on CurrentThread runtime"), - // Context::MultiThread(context) => context.with_time_local_context(f), - // } - // } - cfg_rt_multi_thread! { #[track_caller] pub(crate) fn expect_multi_thread(&self) -> &multi_thread::Context { diff --git a/tokio/src/runtime/scheduler/multi_thread/worker.rs b/tokio/src/runtime/scheduler/multi_thread/worker.rs index 559ee5db3c8..ae9f2556dfb 100644 --- a/tokio/src/runtime/scheduler/multi_thread/worker.rs +++ b/tokio/src/runtime/scheduler/multi_thread/worker.rs @@ -989,17 +989,6 @@ impl Context { } } - // #[cfg(all(tokio_unstable, feature = "time", not(target_os = "wasi"), test))] - // pub(crate) fn with_time_local_context(&self, f: F) -> R - // where - // F: FnOnce(Option<&mut time_alt::LocalContext>) -> R, - // { - // self.with_core(|maybe_core| match maybe_core { - // Some(core) => f(Some(&mut core.time_context)), - // None => f(None), - // }) - // } - #[cfg(all(tokio_unstable, feature = "time"))] pub(crate) fn with_time_temp_local_context(&self, f: F) -> R where @@ -1353,7 +1342,7 @@ impl Handle { #[cfg(all(tokio_unstable, feature = "time"))] pub(crate) fn push_remote_timer(&self, hdl: time_alt::EntryHandle) { - assert_eq!(self.timer_flavor, TimerFlavor::Alternative,); + assert_eq!(self.timer_flavor, TimerFlavor::Alternative); { let mut synced = self.shared.synced.lock(); synced.inject_timers.push(hdl); @@ -1363,7 +1352,7 @@ impl Handle { #[cfg(all(tokio_unstable, feature = "time"))] pub(crate) fn take_remote_timers(&self) -> Vec { - assert_eq!(self.timer_flavor, TimerFlavor::Alternative,); + assert_eq!(self.timer_flavor, TimerFlavor::Alternative); // It's ok to lost the race, as another worker is // draining the inject_timers. match self.shared.synced.try_lock() { diff --git a/tokio/src/runtime/time_alt/timer.rs b/tokio/src/runtime/time_alt/timer.rs index e76a16a5034..178ab81f24e 100644 --- a/tokio/src/runtime/time_alt/timer.rs +++ b/tokio/src/runtime/time_alt/timer.rs @@ -149,37 +149,6 @@ where } } -// #[cfg(all(not(target_os = "wasi"), test))] -// pub(super) fn with_current_local_context(hdl: &SchedulerHandle, f: F) -> R -// where -// F: FnOnce(Option<&mut super::LocalContext>) -> R, -// { -// #[cfg(not(feature = "rt"))] -// { -// let (_, _) = (hdl, f); -// panic!("Tokio runtime is not enabled, cannot access the current wheel"); -// } - -// #[cfg(feature = "rt")] -// { -// use crate::runtime::context; - -// let is_same_rt = -// context::with_current(|cur_hdl| cur_hdl.is_same_runtime(hdl)).unwrap_or_default(); - -// if !is_same_rt { -// // We don't want to create the timer in one runtime, -// // but register it in a different runtime's timer wheel. -// f(None) -// } else { -// context::with_scheduler(|maybe_cx| match maybe_cx { -// Some(cx) => cx.with_time_local_context(f), -// None => f(None), -// }) -// } -// } -// } - fn push_from_remote(sched_hdl: &SchedulerHandle, entry_hdl: EntryHandle) { #[cfg(not(feature = "rt"))] { diff --git a/tokio/src/time/interval.rs b/tokio/src/time/interval.rs index c7dcedf0a8d..02cecc6ec1a 100644 --- a/tokio/src/time/interval.rs +++ b/tokio/src/time/interval.rs @@ -444,8 +444,7 @@ impl Interval { #[cfg(not(all(tokio_unstable, feature = "tracing")))] let instant = poll_fn(|cx| self.poll_tick(cx)); - let r = instant.await; - r + instant.await } /// Polls for the next instant in the interval to be reached.