-
Notifications
You must be signed in to change notification settings - Fork 196
feat: add --no-progress-timeout to forest-cli f3 ready
#6057
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
9f9b617
14aac3e
c746bdf
a15d349
1e9dca5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,7 +4,11 @@ | |
| #[cfg(test)] | ||
| mod tests; | ||
|
|
||
| use std::{borrow::Cow, sync::LazyLock, time::Duration}; | ||
| use std::{ | ||
| borrow::Cow, | ||
| sync::LazyLock, | ||
| time::{Duration, Instant}, | ||
| }; | ||
|
|
||
| use crate::{ | ||
| blocks::{Tipset, TipsetKey}, | ||
|
|
@@ -92,13 +96,17 @@ pub enum F3Commands { | |
| #[command(subcommand, name = "powertable", visible_alias = "pt")] | ||
| PowerTable(F3PowerTableCommands), | ||
| /// Checks if F3 is in sync. | ||
| #[group(args = ["no_progress_timeout"], requires = "wait")] | ||
| Ready { | ||
| /// Wait until F3 is in sync. | ||
| #[arg(long)] | ||
| wait: bool, | ||
| /// The threshold of the epoch gap between chain head and F3 head within which F3 is considered in sync. | ||
| #[arg(long, default_value_t = 20)] | ||
| threshold: usize, | ||
| /// Exit after F3 making no progress for this duration. | ||
| #[arg(long, default_value = "10m")] | ||
| no_progress_timeout: humantime::Duration, | ||
| }, | ||
| } | ||
|
|
||
|
|
@@ -128,7 +136,11 @@ impl F3Commands { | |
| } | ||
| Self::Certs(cmd) => cmd.run(client).await, | ||
| Self::PowerTable(cmd) => cmd.run(client).await, | ||
| Self::Ready { wait, threshold } => { | ||
| Self::Ready { | ||
| wait, | ||
| threshold, | ||
| no_progress_timeout, | ||
| } => { | ||
| let is_running = client.call(F3IsRunning::request(())?).await?; | ||
| if !is_running { | ||
| anyhow::bail!("F3 is not running"); | ||
|
|
@@ -148,16 +160,21 @@ impl F3Commands { | |
| ); | ||
| pb.enable_steady_tick(std::time::Duration::from_millis(100)); | ||
| let mut num_consecutive_fetch_failtures = 0; | ||
| let no_progress_timeout_duration: Duration = no_progress_timeout.into(); | ||
| let mut interval = tokio::time::interval(Duration::from_secs(1)); | ||
| let mut last_f3_head_epoch = 0; | ||
| let mut last_progress = Instant::now(); | ||
| loop { | ||
| interval.tick().await; | ||
| match get_heads(&client).await { | ||
| Ok((chain_head, cert_head)) => { | ||
| num_consecutive_fetch_failtures = 0; | ||
| if cert_head | ||
| .chain_head() | ||
| .epoch | ||
| .saturating_add(threshold.try_into()?) | ||
| let f3_head_epoch = cert_head.chain_head().epoch; | ||
| if f3_head_epoch != last_f3_head_epoch { | ||
| last_f3_head_epoch = f3_head_epoch; | ||
| last_progress = Instant::now(); | ||
| } | ||
| if f3_head_epoch.saturating_add(threshold.try_into()?) | ||
| >= chain_head.epoch() | ||
| { | ||
| let text = format!( | ||
|
|
@@ -195,6 +212,13 @@ impl F3Commands { | |
| } | ||
| } | ||
| } | ||
|
|
||
| if last_progress + no_progress_timeout_duration < Instant::now() { | ||
| eprintln!( | ||
| "Warning: F3 made no progress in the past {no_progress_timeout}. Exiting..." | ||
| ); | ||
| std::process::exit(3); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why 3?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Exit code 1 and 2 have been used above
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's use some constants to easily indicate the exit code cause.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed. |
||
| } | ||
| } | ||
| Ok(()) | ||
| } | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
💡 Verification agent
❓ Verification inconclusive
Bug:
--no-progress-timeoutapplies even when the flag isn’t passed (due todefault_value)With
default_value = "10m", the timeout is always set and the no-progress exit will trigger after 10 minutes even if the user did not pass--no-progress-timeout. The PR objective states the flag should cause exit when provided. Fix by making the arg optional, only enabling the behavior when present, and using a “default when present without value”.Apply these diffs:
requiresdirectly on the arg (drop the group):elapsed()(more idiomatic, avoids time arithmetic):To confirm behavior, after the change please verify:
forest-cli f3 ready --waitwaits indefinitely without exiting on stall.forest-cli f3 ready --wait --no-progress-timeoutuses a default 10m and exits on stall.forest-cli f3 ready --wait --no-progress-timeout 30sexits after ~30s stall.Also applies to: 167-171, 220-225
Make
--no-progress-timeouttruly optional and only active when passeddefault_value = "10m"means the 10-minute timeout always applies. Refactor in src/cli/subcommands/f3_cmd.rs (lines 99–110, 167–171, 220–225):#[group(...)]; add on the flag itself#[arg(long, default_value = "10m")] no_progress_timeout: humantime::Duration,to
#[arg(long, requires = "wait", num_args(0..=1), default_missing_value = "10m")] no_progress_timeout: Option<humantime::Duration>,Option<Duration>and wrap your timeout check inThis ensures
--no-progress-timeoutonly takes effect when provided.🤖 Prompt for AI Agents
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Simplified the
groupusage. Always having a 10m timeout is fine.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@hanabi1224 Understood! Having a default 10-minute timeout makes sense from a fail-fast perspective - it prevents the command from hanging indefinitely even when users don't explicitly specify the timeout flag. This aligns well with preventing operational issues in automated scenarios.
✏️ Learnings added
🧠 Learnings used