diff --git a/CHANGELOG.md b/CHANGELOG.md index 9ad0193b1e..698939bad4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,9 @@ [Full changelog](https://github.com/mozilla/glean/compare/v66.1.1...main) +* General + * Stop reporting db file sizes during init phase ([#3331](https://github.com/mozilla/glean/pull/3331)) + # v66.1.1 (2025-11-06) [Full changelog](https://github.com/mozilla/glean/compare/v66.1.0...v66.1.1) diff --git a/glean-core/metrics.yaml b/glean-core/metrics.yaml index ad9159902c..eda34050ed 100644 --- a/glean-core/metrics.yaml +++ b/glean-core/metrics.yaml @@ -895,46 +895,6 @@ glean.database: expires: never disabled: true - load_sizes: - type: object - description: | - The size of the db file during specific phases of initialization. - bugs: - - https://bugzilla.mozilla.org/1990627 - data_reviews: - - https://bugzilla.mozilla.org/1990627 - data_sensitivity: - - technical - notification_emails: - - glean-team@mozilla.com - - chutten@mozilla.com - expires: 2025-12-01 - send_in_pings: - - health - structure: - type: object - properties: - new: - type: number - open: - type: number - post_open: - type: number - post_open_user: - type: number - post_load_ping_lifetime_data: - type: number - user_records: - type: number - ping_records: - type: number - application_records: - type: number - ping_memory_records: - type: number - error: - type: string - glean.validation: foreground_count: type: counter diff --git a/glean-core/rlb/tests/health_ping.rs b/glean-core/rlb/tests/health_ping.rs index 3ed1500eb3..e43bb9231f 100644 --- a/glean-core/rlb/tests/health_ping.rs +++ b/glean-core/rlb/tests/health_ping.rs @@ -129,21 +129,6 @@ fn test_pre_post_init_health_pings_exist() { let exception_uuid = &preinits[0].1["metrics"]["uuid"]["glean.health_recovered_client_id"]; assert_eq!(&JsonValue::Null, exception_uuid); - // An initial preinit "health" ping will show no db file sizes - let load_sizes = preinits[0].1["metrics"]["object"]["glean.database.load_sizes"] - .as_object() - .unwrap(); - assert_eq!(None, load_sizes.get("new")); - assert_eq!(None, load_sizes.get("open")); - assert_eq!(None, load_sizes.get("post_open")); - assert_eq!(None, load_sizes.get("post_open_user")); - assert_eq!(None, load_sizes.get("post_load_ping_lifetime_data")); - assert_eq!(0, load_sizes["user_records"]); - assert_eq!(0, load_sizes["ping_records"]); - assert_eq!(0, load_sizes["application_records"]); - assert_eq!(None, load_sizes.get("ping_memory_records")); - assert_eq!(None, load_sizes.get("error")); - let cfg = ConfigurationBuilder::new(true, tmpname.clone(), "health-ping-test") .with_server_endpoint("invalid-test-host") .with_uploader(FakeUploader) @@ -164,22 +149,4 @@ fn test_pre_post_init_health_pings_exist() { // We should have a second "pre_init"-reason "health" ping now. assert_eq!(1, second_preinit.len()); - - let load_sizes = second_preinit[0].1["metrics"]["object"]["glean.database.load_sizes"] - .as_object() - .unwrap(); - assert_ne!(0, load_sizes["new"]); - assert_ne!(0, load_sizes["open"]); - assert_ne!(0, load_sizes["post_open"]); - assert_ne!(0, load_sizes["post_open_user"].as_i64().unwrap()); - assert_ne!(0, load_sizes["post_load_ping_lifetime_data"]); - assert_eq!( - load_sizes["new"], - load_sizes["post_load_ping_lifetime_data"] - ); - assert!(0 < load_sizes["user_records"].as_i64().unwrap()); - assert!(0 < load_sizes["ping_records"].as_i64().unwrap()); - assert!(0 < load_sizes["application_records"].as_i64().unwrap()); - assert_eq!(None, load_sizes.get("ping_memory_records")); - assert_eq!(None, load_sizes.get("error")); } diff --git a/glean-core/src/core/mod.rs b/glean-core/src/core/mod.rs index 21f3a700fa..5baa278a40 100644 --- a/glean-core/src/core/mod.rs +++ b/glean-core/src/core/mod.rs @@ -336,14 +336,13 @@ impl Glean { { let data_store = glean.data_store.as_ref().unwrap(); - let db_load_sizes = data_store.load_sizes.as_ref().unwrap(); - let new_size = db_load_sizes.new.unwrap_or(0); + let file_size = data_store.file_size.map(|n| n.get()).unwrap_or(0); // If we have a client ID on disk, we check the database if let Some(stored_client_id) = stored_client_id { // state (c) - if new_size <= 0 { - log::trace!("no database. database size={new_size}. stored_client_id={stored_client_id}"); + if file_size == 0 { + log::trace!("no database. database size={file_size}. stored_client_id={stored_client_id}"); // state (d) glean .health_metrics @@ -673,17 +672,6 @@ impl Glean { .rkv_load_error .set_sync(self, rkv_load_state) } - - if let Some(load_sizes) = self - .data_store - .as_mut() - .and_then(|database| database.load_sizes()) - { - self.database_metrics.load_sizes.set_sync( - self, - serde_json::to_value(load_sizes).unwrap_or(serde_json::json!({})), - ); - } } /// Signals that the environment is ready to submit pings. diff --git a/glean-core/src/database/mod.rs b/glean-core/src/database/mod.rs index 893eefdd5d..8408aa1f18 100644 --- a/glean-core/src/database/mod.rs +++ b/glean-core/src/database/mod.rs @@ -14,7 +14,6 @@ use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::RwLock; use std::time::{Duration, Instant}; -use crate::internal_metrics::LoadSizesObject; use crate::ErrorKind; use malloc_size_of::MallocSizeOf; @@ -137,7 +136,7 @@ pub struct Database { ping_lifetime_max_time: Duration, /// Initial file size when opening the database. - file_size: Option, + pub(crate) file_size: Option, /// RKV load state rkv_load_state: RkvLoadState, @@ -145,9 +144,6 @@ pub struct Database { /// Times an Rkv write-commit took. /// Re-applied as samples in a timing distribution later. pub(crate) write_timings: RefCell>, - - /// The database size at various phases of loading. - pub(crate) load_sizes: Option, } impl MallocSizeOf for Database { @@ -217,12 +213,6 @@ fn database_size(dir: &Path) -> Option { NonZeroU64::new(total_size) } -fn store_size(rkv: &Rkv, store: &SingleStore) -> Result { - let reader = rkv.read()?; - let iter = store.iter_start(&reader)?; - Ok(iter.count().try_into().unwrap_or(-1)) -} - impl Database { /// Initializes the data store. /// @@ -238,33 +228,14 @@ impl Database { ping_lifetime_max_time: Duration, ) -> Result { let path = data_path.join("db"); - let mut load_sizes = LoadSizesObject { - new: database_size(&path).map(|x| x.get() as i64), - ..Default::default() - }; log::debug!("Database path: {:?}", path.display()); let file_size = database_size(&path); - load_sizes.open = file_size.map(|x| x.get() as i64); let (rkv, rkv_load_state) = Self::open_rkv(&path)?; - load_sizes.post_open = database_size(&path).map(|x| x.get() as i64); let user_store = rkv.open_single(Lifetime::User.as_str(), StoreOptions::create())?; - match store_size(&rkv, &user_store) { - Ok(record_count) => load_sizes.user_records = Some(record_count), - Err(e) => load_sizes.error = Some(e.to_string()), - }; - load_sizes.post_open_user = database_size(&path).map(|x| x.get() as i64); let ping_store = rkv.open_single(Lifetime::Ping.as_str(), StoreOptions::create())?; - match store_size(&rkv, &ping_store) { - Ok(record_count) => load_sizes.ping_records = Some(record_count), - Err(e) => load_sizes.error = Some(e.to_string()), - }; let application_store = rkv.open_single(Lifetime::Application.as_str(), StoreOptions::create())?; - match store_size(&rkv, &application_store) { - Ok(record_count) => load_sizes.application_records = Some(record_count), - Err(e) => load_sizes.error = Some(e.to_string()), - }; let ping_lifetime_data = if delay_ping_lifetime_io { Some(RwLock::new(BTreeMap::new())) } else { @@ -277,7 +248,7 @@ impl Database { let now = Instant::now(); - let mut db = Self { + let db = Self { rkv, user_store, ping_store, @@ -290,19 +261,9 @@ impl Database { file_size, rkv_load_state, write_timings, - load_sizes: Some(load_sizes), }; db.load_ping_lifetime_data(); - // Safe unwrap: just set it to Some above. - db.load_sizes.as_mut().unwrap().post_load_ping_lifetime_data = - database_size(&path).map(|x| x.get() as i64); - if let Some(ref ping_data) = db.ping_lifetime_data { - // Safe unwrap: just set load_sizes to Some above. - // Safe unwrap: Unless the read lock is poisoned (already?!). - db.load_sizes.as_mut().unwrap().ping_memory_records = - Some(ping_data.read().unwrap().len().try_into().unwrap_or(-1)); - } Ok(db) } @@ -321,11 +282,6 @@ impl Database { } } - /// Get the load events. - pub fn load_sizes(&mut self) -> Option { - self.load_sizes.take() - } - fn get_store(&self, lifetime: Lifetime) -> &SingleStore { match lifetime { Lifetime::User => &self.user_store, diff --git a/glean-core/src/internal_metrics.rs b/glean-core/src/internal_metrics.rs index a0ea5c5029..ebc5fbd9ce 100644 --- a/glean-core/src/internal_metrics.rs +++ b/glean-core/src/internal_metrics.rs @@ -335,9 +335,6 @@ pub struct DatabaseMetrics { /// The time it takes for a write-commit for the Glean database. pub write_time: TimingDistributionMetric, - - /// The database size at specific phases of initialization. - pub load_sizes: ObjectMetric, } impl DatabaseMetrics { @@ -375,15 +372,6 @@ impl DatabaseMetrics { }, TimeUnit::Microsecond, ), - - load_sizes: ObjectMetric::new(CommonMetricData { - name: "load_sizes".into(), - category: "glean.database".into(), - send_in_pings: vec!["health".into()], - lifetime: Lifetime::Ping, - disabled: false, - dynamic_label: None, - }), } } } @@ -539,28 +527,3 @@ pub struct DataDirectoryInfoObjectItemItemFilesItem { #[serde(skip_serializing_if = "Option::is_none")] pub error_message: Option, } - -#[derive(Debug, Default, Serialize)] -#[serde(deny_unknown_fields)] -pub struct LoadSizesObject { - #[serde(skip_serializing_if = "Option::is_none")] - pub new: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub open: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub post_open: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub post_open_user: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub post_load_ping_lifetime_data: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub user_records: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub ping_records: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub application_records: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub ping_memory_records: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub error: Option, -}