Merge pull request #2245 from Jamesbarford/fix/error-table

Kobzol · web-flow · commit 55388f116470 · 2025-09-16T17:08:07.000+02:00
Add job ID to error table
diff --git a/collector/src/bin/collector.rs b/collector/src/bin/collector.rs
@@ -137,6 +137,7 @@ struct SharedBenchmarkConfig {
     artifact_id: ArtifactId,
     toolchain: Toolchain,
     record_duration: bool,
+    job_id: Option<u32>,
 }
 
 fn check_measureme_installed() -> Result<(), String> {
@@ -847,12 +848,14 @@ fn main_result() -> anyhow::Result<i32> {
                 runtime.group,
                 &toolchain,
                 &artifact_id,
+                None,
             ))?;
 
             let shared = SharedBenchmarkConfig {
                 artifact_id,
                 toolchain,
                 record_duration: true,
+                job_id: None,
             };
             let config = RuntimeBenchmarkConfig::new(
                 runtime_suite,
@@ -996,6 +999,7 @@ fn main_result() -> anyhow::Result<i32> {
                 toolchain,
                 artifact_id,
                 record_duration: true,
+                job_id: None,
             };
             let config = CompileBenchmarkConfig {
                 benchmarks,
@@ -1048,7 +1052,12 @@ fn main_result() -> anyhow::Result<i32> {
                         let toolchain =
                             create_toolchain_from_published_version(&tag, &host_target_tuple)?;
                         let conn = rt.block_on(pool.connection());
-                        rt.block_on(bench_published_artifact(conn, toolchain, &benchmark_dirs))
+                        rt.block_on(bench_published_artifact(
+                            conn,
+                            toolchain,
+                            &benchmark_dirs,
+                            None,
+                        ))
                     }
                     NextArtifact::Commit {
                         commit,
@@ -1132,6 +1141,7 @@ fn main_result() -> anyhow::Result<i32> {
                             None,
                             &toolchain,
                             &artifact_id,
+                            None,
                         ))?;
 
                         let runtime_config = RuntimeBenchmarkConfig {
@@ -1143,6 +1153,7 @@ fn main_result() -> anyhow::Result<i32> {
                             artifact_id,
                             toolchain,
                             record_duration: true,
+                            job_id: None,
                         };
 
                         rt.block_on(run_benchmarks(
@@ -1173,7 +1184,12 @@ fn main_result() -> anyhow::Result<i32> {
             let conn = rt.block_on(pool.connection());
             let toolchain =
                 create_toolchain_from_published_version(&toolchain, &host_target_tuple)?;
-            rt.block_on(bench_published_artifact(conn, toolchain, &benchmark_dirs))?;
+            rt.block_on(bench_published_artifact(
+                conn,
+                toolchain,
+                &benchmark_dirs,
+                None,
+            ))?;
             Ok(0)
         }
 
@@ -1495,8 +1511,9 @@ async fn run_job_queue_benchmarks(
                         // not with a benchmark.
                         conn.record_error(
                             artifact_row_id,
-                            &format!("job:{}", benchmark_job.id()),
+                            "Job failure",
                             &format!("Error while benchmarking job {benchmark_job:?}: {error:?}"),
+                            Some(benchmark_job.id()),
                         )
                         .await;
 
@@ -1656,6 +1673,7 @@ async fn run_benchmark_job(
         artifact_id,
         toolchain,
         record_duration: false,
+        job_id: Some(job.id()),
     };
 
     // A failure here means that it was not possible to compile something, that likely won't resolve
@@ -1723,6 +1741,7 @@ async fn create_benchmark_configs(
             None,
             toolchain,
             artifact_id,
+            Some(job.id()),
         )
         .await?;
         Some(RuntimeBenchmarkConfig {
@@ -2047,6 +2066,7 @@ async fn load_runtime_benchmarks(
     group: Option<String>,
     toolchain: &Toolchain,
     artifact_id: &ArtifactId,
+    job_id: Option<u32>,
 ) -> anyhow::Result<BenchmarkSuite> {
     let BenchmarkSuiteCompilation {
         suite,
@@ -2059,19 +2079,20 @@ async fn load_runtime_benchmarks(
         RuntimeCompilationOpts::default(),
     )?;
 
-    record_runtime_compilation_errors(conn, artifact_id, failed_to_compile).await;
+    record_runtime_compilation_errors(conn, artifact_id, failed_to_compile, job_id).await;
     Ok(suite)
 }
 
 async fn record_runtime_compilation_errors(
     connection: &mut dyn Connection,
     artifact_id: &ArtifactId,
     errors: HashMap<String, String>,
+    job_id: Option<u32>,
 ) {
     let artifact_row_number = connection.artifact_id(artifact_id).await;
     for (krate, error) in errors {
         connection
-            .record_error(artifact_row_number, &krate, &error)
+            .record_error(artifact_row_number, &krate, &error, job_id)
             .await;
     }
 }
@@ -2153,6 +2174,7 @@ async fn run_benchmarks(
             &collector,
             runtime.filter,
             runtime.iterations,
+            shared.job_id,
         )
         .await
         .context("Runtime benchmarks failed")
@@ -2175,6 +2197,7 @@ async fn bench_published_artifact(
     mut connection: Box<dyn Connection>,
     toolchain: Toolchain,
     dirs: &BenchmarkDirs<'_>,
+    job_id: Option<u32>,
 ) -> anyhow::Result<()> {
     let artifact_id = ArtifactId::Tag(toolchain.id.clone());
 
@@ -2200,13 +2223,15 @@ async fn bench_published_artifact(
         None,
         &toolchain,
         &artifact_id,
+        job_id,
     )
     .await?;
 
     let shared = SharedBenchmarkConfig {
         artifact_id,
         toolchain,
         record_duration: true,
+        job_id: None,
     };
     run_benchmarks(
         connection.as_mut(),
@@ -2295,6 +2320,7 @@ async fn bench_compile(
                     collector.artifact_row_id,
                     &benchmark_name.0,
                     &format!("{s:?}"),
+                    shared.job_id,
                 )
                 .await;
         };
diff --git a/collector/src/runtime/mod.rs b/collector/src/runtime/mod.rs
@@ -35,6 +35,7 @@ pub async fn bench_runtime(
     collector: &CollectorCtx,
     filter: RuntimeBenchmarkFilter,
     iterations: u32,
+    job_id: Option<u32>,
 ) -> anyhow::Result<()> {
     let filtered = suite.filtered_benchmark_count(&filter);
     println!("Executing {filtered} benchmarks\n");
@@ -89,7 +90,12 @@ pub async fn bench_runtime(
         if let Err(error) = result {
             eprintln!("collector error: {error:#}");
             tx.conn()
-                .record_error(collector.artifact_row_id, &step_name, &format!("{error:?}"))
+                .record_error(
+                    collector.artifact_row_id,
+                    &step_name,
+                    &format!("{error:?}"),
+                    job_id,
+                )
                 .await;
         };
 
diff --git a/database/schema.md b/database/schema.md
@@ -250,14 +250,21 @@ bors_sha    pr  parent_sha  complete  requested    include  exclude  runs  commi
 
 ### error
 
-Records a compilation or runtime error for an artifact and a benchmark.
+Records an error within the application namely a;
+- compilation
+- runtime
+- error contextual to a benchmark job
 
-```
-sqlite> select * from error limit 1;
-aid         benchmark   error
-----------  ---         -----
-1           syn-1.0.89  Failed to compile...
-```
+Columns:
+
+* **id** (`BIGINT` / `SERIAL`): Primary key identifier for the error row;
+  auto increments with each new error.
+* **aid** (`INTERGER`): References the artifact id column.
+* **context** (`TEXT NOT NULL`): A little message to be able to understand a 
+  bit more about why or where the error occured.
+* **message** (`TEXT NOT NULL`): The error message.
+* **job_id** (`INTEGER`): A nullable job_id which, if it exists it will inform
+  us as to which job this error is part of.
 
 ## New benchmarking design
 We are currently implementing a new design for dispatching benchmarks to collector(s) and storing
diff --git a/database/src/bin/postgres-to-sqlite.rs b/database/src/bin/postgres-to-sqlite.rs
@@ -191,12 +191,12 @@ impl Table for Error {
     }
 
     fn postgres_select_statement(&self, since_weeks_ago: Option<u32>) -> String {
-        let s = "select benchmark, aid, error from ".to_string() + self.name();
+        let s = "select context, aid, message from ".to_string() + self.name();
         with_filter_clause_maybe(s, ARTIFACT_JOIN_AND_WHERE, since_weeks_ago)
     }
 
     fn sqlite_insert_statement(&self) -> &'static str {
-        "insert into error (benchmark, aid, error) VALUES (?, ?, ?)"
+        "insert into error (context, aid, message) VALUES (?, ?, ?)"
     }
 
     fn sqlite_execute_insert(&self, statement: &mut rusqlite::Statement, row: tokio_postgres::Row) {
diff --git a/database/src/bin/sqlite-to-postgres.rs b/database/src/bin/sqlite-to-postgres.rs
@@ -241,9 +241,10 @@ struct Error;
 
 #[derive(Serialize)]
 struct ErrorRow<'a> {
+    id: i32,
     aid: i32,
-    benchmark: &'a str,
-    error: Nullable<&'a str>,
+    context: &'a str,
+    message: Nullable<&'a str>,
 }
 
 impl Table for Error {
@@ -252,11 +253,11 @@ impl Table for Error {
     }
 
     fn sqlite_attributes() -> &'static str {
-        "aid, benchmark, error"
+        "id, aid, context, message, job_id"
     }
 
     fn postgres_attributes() -> &'static str {
-        "aid, benchmark, error"
+        "id, aid, context, message, job_id"
     }
 
     fn postgres_generated_id_attribute() -> Option<&'static str> {
@@ -266,9 +267,10 @@ impl Table for Error {
     fn write_postgres_csv_row<W: Write>(writer: &mut csv::Writer<W>, row: &rusqlite::Row) {
         writer
             .serialize(ErrorRow {
-                aid: row.get(0).unwrap(),
-                benchmark: row.get_ref(1).unwrap().as_str().unwrap(),
-                error: row.get_ref(2).unwrap().try_into().unwrap(),
+                id: row.get(0).unwrap(),
+                aid: row.get(1).unwrap(),
+                context: row.get_ref(2).unwrap().as_str().unwrap(),
+                message: row.get_ref(3).unwrap().try_into().unwrap(),
             })
             .unwrap();
     }
diff --git a/database/src/pool.rs b/database/src/pool.rs
@@ -74,7 +74,13 @@ pub trait Connection: Send + Sync {
         profile: Profile,
         scenario: Scenario,
     );
-    async fn record_error(&self, artifact: ArtifactIdNumber, krate: &str, error: &str);
+    async fn record_error(
+        &self,
+        artifact: ArtifactIdNumber,
+        context: &str,
+        message: &str,
+        job_id: Option<u32>,
+    );
     async fn record_rustc_crate(
         &self,
         collection: CollectionId,
@@ -1094,8 +1100,8 @@ mod tests {
 
             // Request 1 will have artifact with errors
             let aid1 = ctx.upsert_master_artifact("sha1").await;
-            db.record_error(aid1, "crate1", "error1").await;
-            db.record_error(aid1, "crate2", "error2").await;
+            db.record_error(aid1, "crate1", "error1", None).await;
+            db.record_error(aid1, "crate2", "error2", None).await;
 
             // Request 2 will have artifact without errors
             let _aid2 = ctx.upsert_master_artifact("sha2").await;
diff --git a/database/src/pool/postgres.rs b/database/src/pool/postgres.rs
@@ -387,6 +387,29 @@ static MIGRATIONS: &[&str] = &[
     r#"
     ALTER TABLE collector_config ADD COLUMN commit_sha TEXT NULL;
     "#,
+    r#"
+    CREATE TABLE error_new (
+        id      SERIAL PRIMARY KEY,
+        aid     INTEGER NOT NULL REFERENCES artifact(id) ON DELETE CASCADE ON UPDATE CASCADE,
+        message TEXT NOT NULL,
+        context TEXT NOT NULL,
+        job_id  INTEGER
+    );
+
+    INSERT INTO
+        error_new (aid, message, context)
+    SELECT
+        aid,
+        error,
+        benchmark
+    FROM
+        error;
+
+    DROP TABLE error;
+    ALTER TABLE error_new RENAME TO error;
+
+    CREATE INDEX error_artifact_idx ON error(aid);
+    "#,
 ];
 
 #[async_trait::async_trait]
@@ -577,7 +600,7 @@ impl PostgresConnection {
                     .prepare("insert into rustc_compilation (aid, cid, crate, duration) VALUES ($1, $2, $3, $4)")
                     .await
                     .unwrap(),
-                get_error: conn.prepare("select benchmark, error from error where aid = $1").await.unwrap(),
+                get_error: conn.prepare("select context, message from error where aid = $1").await.unwrap(),
                 insert_pstat_series: conn.prepare("insert into pstat_series (crate, profile, scenario, backend, target, metric) VALUES ($1, $2, $3, $4, $5, $6) ON CONFLICT DO NOTHING RETURNING id").await.unwrap(),
                 select_pstat_series: conn.prepare("select id from pstat_series where crate = $1 and profile = $2 and scenario = $3 and backend = $4 and target = $5 and metric = $6").await.unwrap(),
                 collection_id: conn.prepare("insert into collection (perf_commit) VALUES ($1) returning id").await.unwrap(),
@@ -684,21 +707,21 @@ impl PostgresConnection {
                     ), errors AS (
                         SELECT
                             artifacts.name AS tag,
-                            error.benchmark,
-                            error.error
+                            error.context,
+                            error.message
                         FROM error
                         -- Use right join to only return errors for selected artifacts
                         RIGHT JOIN artifacts ON error.aid = artifacts.id
                     )
                     -- Select request duplicated for each pair of (benchmark, error)
                     SELECT
                         completed.*,
-                        errors.benchmark,
-                        errors.error
+                        errors.context,
+                        errors.message
                     FROM completed
                     LEFT JOIN errors ON errors.tag = completed.tag
                     -- Re-sort the requests, because the original order may be lost
-                    ORDER BY completed.completed_at DESC;
+                    ORDER BY completed.completed_at DESC
                 ")).await.unwrap(),
                 get_jobs_of_in_progress_benchmark_requests: conn.prepare(&format!("
                     -- Get in progress requests
@@ -1180,11 +1203,22 @@ where
         ArtifactIdNumber(aid)
     }
 
-    async fn record_error(&self, artifact: ArtifactIdNumber, krate: &str, error: &str) {
+    async fn record_error(
+        &self,
+        artifact: ArtifactIdNumber,
+        context: &str,
+        message: &str,
+        job_id: Option<u32>,
+    ) {
         self.conn()
             .execute(
-                "insert into error (benchmark, aid, error) VALUES ($1, $2, $3)",
-                &[&krate, &(artifact.0 as i32), &error],
+                "insert into error (context, aid, message, job_id) VALUES ($1, $2, $3, $4)",
+                &[
+                    &context,
+                    &(artifact.0 as i32),
+                    &message,
+                    &job_id.map(|id| id as i32),
+                ],
             )
             .await
             .unwrap();
diff --git a/database/src/pool/sqlite.rs b/database/src/pool/sqlite.rs

Original file line number	Diff line number	Diff line change
`@@ -191,12 +191,12 @@ impl Table for Error {`
`191`	`191`	`}`
`192`	`192`
`193`	`193`	`fn postgres_select_statement(&self, since_weeks_ago: Option<u32>) -> String {`
`194`		`- let s = "select benchmark, aid, error from ".to_string() + self.name();`
	`194`	`+ let s = "select context, aid, message from ".to_string() + self.name();`
`195`	`195`	`with_filter_clause_maybe(s, ARTIFACT_JOIN_AND_WHERE, since_weeks_ago)`
`196`	`196`	`}`
`197`	`197`
`198`	`198`	`fn sqlite_insert_statement(&self) -> &'static str {`
`199`		`- "insert into error (benchmark, aid, error) VALUES (?, ?, ?)"`
	`199`	`+ "insert into error (context, aid, message) VALUES (?, ?, ?)"`
`200`	`200`	`}`
`201`	`201`
`202`	`202`	`fn sqlite_execute_insert(&self, statement: &mut rusqlite::Statement, row: tokio_postgres::Row) {`