From 97c12e8a45ceff1ee1a2953d45afdc3e784a3ebf Mon Sep 17 00:00:00 2001 From: Jan-Erik Rediger Date: Thu, 6 Aug 2020 13:34:45 +0200 Subject: [PATCH] Track the database size on initialization --- docs/user/collected-metrics/metrics.md | 1 + glean-core/metrics.yaml | 14 +++++++++ glean-core/src/database/mod.rs | 40 +++++++++++++++++++++++--- glean-core/src/internal_metrics.rs | 23 +++++++++++++++ glean-core/src/lib.rs | 19 +++++++++++- glean-core/src/lib_unit_tests.rs | 28 ++++++++++++++++++ 6 files changed, 120 insertions(+), 5 deletions(-) diff --git a/docs/user/collected-metrics/metrics.md b/docs/user/collected-metrics/metrics.md index f66a55d56d..517d27b9bf 100644 --- a/docs/user/collected-metrics/metrics.md +++ b/docs/user/collected-metrics/metrics.md @@ -128,6 +128,7 @@ The following metrics are added to the ping: | Name | Type | Description | Data reviews | Extras | Expiration | [Data Sensitivity](https://wiki.mozilla.org/Firefox/Data_Collection) | | --- | --- | --- | --- | --- | --- | --- | +| glean.database.size |[memory_distribution](https://mozilla.github.io/glean/book/user/metrics/memory_distribution.html) |The size of the database file at startup. |[1](https://bugzilla.mozilla.org/show_bug.cgi?id=1656589)||2020-09-30 | | | glean.error.preinit_tasks_overflow |[counter](https://mozilla.github.io/glean/book/user/metrics/counter.html) |The number of tasks queued in the pre-initialization buffer. Only sent if the buffer overflows. |[1](https://bugzilla.mozilla.org/show_bug.cgi?id=1609482#c3)||never |1 | | glean.upload.discarded_exceeding_pings_size |[memory_distribution](https://mozilla.github.io/glean/book/user/metrics/memory_distribution.html) |The size of pings that exceeded the maximum ping size allowed for upload. |[1](https://bugzilla.mozilla.org/show_bug.cgi?id=1597761#c10)||never |1 | | glean.upload.ping_upload_failure |[labeled_counter](https://mozilla.github.io/glean/book/user/metrics/labeled_counters.html) |Counts the number of ping upload failures, by type of failure. This includes failures for all ping types, though the counts appear in the next successfully sent `metrics` ping. |[1](https://bugzilla.mozilla.org/show_bug.cgi?id=1589124#c1)||never |1 | diff --git a/glean-core/metrics.yaml b/glean-core/metrics.yaml index bead6a5603..274b8108fe 100644 --- a/glean-core/metrics.yaml +++ b/glean-core/metrics.yaml @@ -391,3 +391,17 @@ glean.upload: expires: never no_lint: - COMMON_PREFIX + +glean.database: + size: + type: memory_distribution + description: > + The size of the database file at startup. + memory_unit: kilobyte + bugs: + - https://bugzilla.mozilla.org/show_bug.cgi?id=1656589 + data_reviews: + - https://bugzilla.mozilla.org/show_bug.cgi?id=1656589#c7 + notification_emails: + - glean-team@mozilla.com + expires: "2020-09-30" diff --git a/glean-core/src/database/mod.rs b/glean-core/src/database/mod.rs index 33a9bc0023..fec0f821e2 100644 --- a/glean-core/src/database/mod.rs +++ b/glean-core/src/database/mod.rs @@ -5,6 +5,8 @@ use std::collections::btree_map::Entry; use std::collections::BTreeMap; use std::fs; +use std::num::NonZeroU64; +use std::path::Path; use std::str; use std::sync::RwLock; @@ -32,6 +34,9 @@ pub struct Database { /// we will save metrics with 'ping' lifetime data in a map temporarily /// so as to persist them to disk using rkv in bulk on demand. ping_lifetime_data: Option>>, + + // Initial file size when opening the database. + file_size: Option, } impl std::fmt::Debug for Database { @@ -46,6 +51,24 @@ impl std::fmt::Debug for Database { } } +/// Get the file size of a file in the given path and file. +/// +/// # Arguments +/// +/// - `path` - The path +/// +/// # Returns +/// +/// Returns the non-zero file size in bytes, +/// or `None` on error or if the size is `0`. +fn file_size(path: &Path) -> Option { + log::trace!("Getting file size for path: {}", path.display()); + fs::metadata(path) + .ok() + .map(|stat| stat.len()) + .and_then(NonZeroU64::new) +} + impl Database { /// Initialize the data store. /// @@ -55,7 +78,12 @@ impl Database { /// It also loads any Lifetime::Ping data that might be /// persisted, in case `delay_ping_lifetime_io` is set. pub fn new(data_path: &str, delay_ping_lifetime_io: bool) -> Result { - let rkv = Self::open_rkv(data_path)?; + let path = Path::new(data_path).join("db"); + log::debug!("Database path: {:?}", path.display()); + + let file_size = file_size(&path.join("data.mdb")); + + let rkv = Self::open_rkv(&path)?; let user_store = rkv.open_single(Lifetime::User.as_str(), StoreOptions::create())?; let ping_store = rkv.open_single(Lifetime::Ping.as_str(), StoreOptions::create())?; let application_store = @@ -72,6 +100,7 @@ impl Database { ping_store, application_store, ping_lifetime_data, + file_size, }; db.load_ping_lifetime_data(); @@ -79,6 +108,11 @@ impl Database { Ok(db) } + /// Get the initial database file size. + pub fn file_size(&self) -> Option { + self.file_size + } + fn get_store(&self, lifetime: Lifetime) -> &SingleStore { match lifetime { Lifetime::User => &self.user_store, @@ -88,9 +122,7 @@ impl Database { } /// Creates the storage directories and inits rkv. - fn open_rkv(path: &str) -> Result { - let path = std::path::Path::new(path).join("db"); - log::debug!("Database path: {:?}", path.display()); + fn open_rkv(path: &Path) -> Result { fs::create_dir_all(&path)?; let rkv = Rkv::new(&path)?; diff --git a/glean-core/src/internal_metrics.rs b/glean-core/src/internal_metrics.rs index 53421672fc..b2af8f8907 100644 --- a/glean-core/src/internal_metrics.rs +++ b/glean-core/src/internal_metrics.rs @@ -88,3 +88,26 @@ impl UploadMetrics { } } } + +#[derive(Debug)] +pub struct DatabaseMetrics { + pub size: MemoryDistributionMetric, +} + +impl DatabaseMetrics { + pub fn new() -> DatabaseMetrics { + DatabaseMetrics { + size: MemoryDistributionMetric::new( + CommonMetricData { + name: "size".into(), + category: "glean.database".into(), + send_in_pings: vec!["metrics".into()], + lifetime: Lifetime::Application, + disabled: false, + dynamic_label: None, + }, + MemoryUnit::Kilobyte, + ), + } + } +} diff --git a/glean-core/src/lib.rs b/glean-core/src/lib.rs index 557f06d76c..0d35360ce3 100644 --- a/glean-core/src/lib.rs +++ b/glean-core/src/lib.rs @@ -47,7 +47,7 @@ use crate::debug::DebugOptions; pub use crate::error::{Error, ErrorKind, Result}; pub use crate::error_recording::{test_get_num_recorded_errors, ErrorType}; use crate::event_database::EventDatabase; -use crate::internal_metrics::CoreMetrics; +use crate::internal_metrics::{CoreMetrics, DatabaseMetrics}; use crate::internal_pings::InternalPings; use crate::metrics::{Metric, MetricType, PingType}; use crate::ping::PingMaker; @@ -170,6 +170,7 @@ pub struct Glean { data_store: Option, event_data_store: EventDatabase, core_metrics: CoreMetrics, + database_metrics: DatabaseMetrics, internal_pings: InternalPings, data_path: PathBuf, application_id: String, @@ -210,6 +211,7 @@ impl Glean { data_store, event_data_store, core_metrics: CoreMetrics::new(), + database_metrics: DatabaseMetrics::new(), internal_pings: InternalPings::new(), upload_manager, data_path: PathBuf::from(&cfg.data_path), @@ -318,6 +320,20 @@ impl Glean { self.set_application_lifetime_core_metrics(); } + /// Initialize the database metrics managed by Glean's Rust core. + fn initialize_database_metrics(&mut self) { + log::trace!("Initializing database metrics"); + + if let Some(size) = self + .data_store + .as_ref() + .and_then(|database| database.file_size()) + { + log::trace!("Database file size: {}", size.get()); + self.database_metrics.size.accumulate(self, size.get()) + } + } + /// Called when Glean is initialized to the point where it can correctly /// assemble pings. Usually called from the language specific layer after all /// of the core metrics have been set and the ping types have been @@ -380,6 +396,7 @@ impl Glean { fn on_upload_enabled(&mut self) { self.upload_enabled = true; self.initialize_core_metrics(); + self.initialize_database_metrics(); } /// Handles the changing of state from upload enabled to disabled. diff --git a/glean-core/src/lib_unit_tests.rs b/glean-core/src/lib_unit_tests.rs index e59ad01b61..ce35fb1ae7 100644 --- a/glean-core/src/lib_unit_tests.rs +++ b/glean-core/src/lib_unit_tests.rs @@ -805,3 +805,31 @@ fn test_empty_application_id() { // Check that this is indeed the first run. assert!(glean.is_first_run()); } + +#[test] +fn records_database_file_size() { + let _ = env_logger::builder().is_test(true).try_init(); + + // Note: We don't use `new_glean` because we need to re-use the database directory. + + let dir = tempfile::tempdir().unwrap(); + let tmpname = dir.path().display().to_string(); + + // Initialize Glean once to ensure we create the database. + let glean = Glean::with_options(&tmpname, GLOBAL_APPLICATION_ID, true).unwrap(); + let database_size = &glean.database_metrics.size; + let data = database_size.test_get_value(&glean, "metrics"); + assert!(data.is_none()); + drop(glean); + + // Initialize Glean again to record file size. + let glean = Glean::with_options(&tmpname, GLOBAL_APPLICATION_ID, true).unwrap(); + + let database_size = &glean.database_metrics.size; + let data = database_size.test_get_value(&glean, "metrics"); + assert!(data.is_some()); + let data = data.unwrap(); + + // We should see the database containing some data. + assert!(data.sum > 0); +}