-
Notifications
You must be signed in to change notification settings - Fork 66
syn2mas: Add progress reporting to log and to opentelemetry metrics #4215
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
0afeb89
ebad8a7
9228f20
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,4 +1,4 @@ | ||
| use std::{collections::HashMap, process::ExitCode}; | ||
| use std::{collections::HashMap, process::ExitCode, sync::atomic::Ordering, time::Duration}; | ||
|
|
||
| use anyhow::Context; | ||
| use camino::Utf8PathBuf; | ||
|
|
@@ -10,12 +10,18 @@ use mas_config::{ | |
| }; | ||
| use mas_storage::SystemClock; | ||
| use mas_storage_pg::MIGRATOR; | ||
| use opentelemetry::KeyValue; | ||
| use rand::thread_rng; | ||
| use sqlx::{Connection, Either, PgConnection, postgres::PgConnectOptions, types::Uuid}; | ||
| use syn2mas::{LockedMasDatabase, MasWriter, SynapseReader, synapse_config}; | ||
| use tracing::{Instrument, error, info_span, warn}; | ||
| use syn2mas::{ | ||
| LockedMasDatabase, MasWriter, Progress, ProgressStage, SynapseReader, synapse_config, | ||
| }; | ||
| use tracing::{Instrument, error, info, info_span, warn}; | ||
|
|
||
| use crate::util::{DatabaseConnectOptions, database_connection_from_config_with_options}; | ||
| use crate::{ | ||
| telemetry::METER, | ||
| util::{DatabaseConnectOptions, database_connection_from_config_with_options}, | ||
| }; | ||
|
|
||
| /// The exit code used by `syn2mas check` and `syn2mas migrate` when there are | ||
| /// errors preventing migration. | ||
|
|
@@ -248,7 +254,12 @@ impl Options { | |
| #[allow(clippy::disallowed_methods)] | ||
| let mut rng = thread_rng(); | ||
|
|
||
| // TODO progress reporting | ||
| let progress = Progress::default(); | ||
|
|
||
| let occasional_progress_logger_task = | ||
| tokio::spawn(occasional_progress_logger(progress.clone())); | ||
| let progress_telemetry_task = tokio::spawn(progress_telemetry(progress.clone())); | ||
|
|
||
| let mas_matrix = MatrixConfig::extract(figment)?; | ||
| eprintln!("\n\n"); | ||
| syn2mas::migrate( | ||
|
|
@@ -258,11 +269,75 @@ impl Options { | |
| &clock, | ||
| &mut rng, | ||
| provider_id_mappings, | ||
| &progress, | ||
| ) | ||
| .await?; | ||
|
|
||
| occasional_progress_logger_task.abort(); | ||
| progress_telemetry_task.abort(); | ||
|
|
||
| Ok(ExitCode::SUCCESS) | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| /// Logs progress every 30 seconds, as a lightweight alternative to a progress | ||
| /// bar. For most deployments, the migration will not take 30 seconds so this | ||
| /// will not be relevant. In other cases, this will give the operator an idea of | ||
| /// what's going on. | ||
| async fn occasional_progress_logger(progress: Progress) { | ||
| loop { | ||
| tokio::time::sleep(Duration::from_secs(30)).await; | ||
| match &**progress.get_current_stage() { | ||
| ProgressStage::SettingUp => { | ||
| info!(name: "progress", "still setting up"); | ||
| } | ||
| ProgressStage::MigratingData { | ||
| entity, | ||
| migrated, | ||
| approx_count, | ||
| } => { | ||
| let migrated = migrated.load(Ordering::Relaxed); | ||
| #[allow(clippy::cast_precision_loss)] | ||
| let percent = (f64::from(migrated) / *approx_count as f64) * 100.0; | ||
| info!(name: "progress", "migrating {entity}: {migrated}/~{approx_count} (~{percent:.1}%)"); | ||
| } | ||
| ProgressStage::RebuildIndex { index_name } => { | ||
| info!(name: "progress", "still waiting for rebuild of index {index_name}"); | ||
| } | ||
| ProgressStage::RebuildConstraint { constraint_name } => { | ||
| info!(name: "progress", "still waiting for rebuild of constraint {constraint_name}"); | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| /// Reports migration progress as OpenTelemetry metrics | ||
| async fn progress_telemetry(progress: Progress) { | ||
| let migrated_data_counter = METER | ||
| .u64_gauge("migrated_data") | ||
|
||
| .with_description("How many entities have been migrated so far") | ||
| .build(); | ||
| let max_data_counter = METER | ||
| .u64_gauge("max_data") | ||
| .with_description("How many entities of the given type exist (approximate)") | ||
| .build(); | ||
|
||
|
|
||
| loop { | ||
| tokio::time::sleep(Duration::from_secs(10)).await; | ||
| if let ProgressStage::MigratingData { | ||
| entity, | ||
| migrated, | ||
| approx_count, | ||
| } = &**progress.get_current_stage() | ||
| { | ||
| let metrics_kv = [KeyValue::new("entity", *entity)]; | ||
| let migrated = migrated.load(Ordering::Relaxed); | ||
| migrated_data_counter.record(u64::from(migrated), &metrics_kv); | ||
| max_data_counter.record(*approx_count, &metrics_kv); | ||
| } else { | ||
| // not sure how to map other stages | ||
| } | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ideally this would be done through a CancellationToken, I don't particularly like aborting tasks, but since we haven't set that up in the rest of the migration process, I'm happy to keep it like this for now