From d76712cd6da136ad526c63b0df19d514788a781c Mon Sep 17 00:00:00 2001 From: Will Jones Date: Fri, 21 Mar 2025 15:55:17 -0700 Subject: [PATCH 1/3] feat: add tracing to cleanup --- rust/lance/src/dataset.rs | 1 + rust/lance/src/dataset/cleanup.rs | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/rust/lance/src/dataset.rs b/rust/lance/src/dataset.rs index 0b83a7c767e..48f755d668b 100644 --- a/rust/lance/src/dataset.rs +++ b/rust/lance/src/dataset.rs @@ -641,6 +641,7 @@ impl Dataset { /// # Returns /// /// * `RemovalStats` - Statistics about the removal operation + #[instrument(level = "debug", skip(self))] pub fn cleanup_old_versions( &self, older_than: Duration, diff --git a/rust/lance/src/dataset/cleanup.rs b/rust/lance/src/dataset/cleanup.rs index f1bf6727c5b..781dea73510 100644 --- a/rust/lance/src/dataset/cleanup.rs +++ b/rust/lance/src/dataset/cleanup.rs @@ -55,7 +55,7 @@ use std::{ future, sync::{Mutex, MutexGuard}, }; -use tracing::info; +use tracing::{info, instrument}; use crate::{utils::temporal::utc_now, Dataset}; @@ -150,6 +150,7 @@ impl<'a> CleanupTask<'a> { self.delete_unreferenced_files(inspection).await } + #[instrument(level = "debug", skip_all)] async fn process_manifests( &'a self, tagged_versions: &HashSet, @@ -246,6 +247,7 @@ impl<'a> CleanupTask<'a> { Ok(()) } + #[instrument(level = "debug", skip_all)] async fn delete_unreferenced_files( &self, inspection: CleanupInspection, From 93aa3526fae6a2205e37b980681420ae518a8dad Mon Sep 17 00:00:00 2001 From: Will Jones Date: Fri, 21 Mar 2025 16:08:21 -0700 Subject: [PATCH 2/3] add some fields --- rust/lance/src/dataset/cleanup.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/rust/lance/src/dataset/cleanup.rs b/rust/lance/src/dataset/cleanup.rs index 781dea73510..111b3b02122 100644 --- a/rust/lance/src/dataset/cleanup.rs +++ b/rust/lance/src/dataset/cleanup.rs @@ -55,7 +55,7 @@ use std::{ future, sync::{Mutex, MutexGuard}, }; -use tracing::{info, instrument}; +use tracing::{info, instrument, Span}; use crate::{utils::temporal::utc_now, Dataset}; @@ -247,7 +247,7 @@ impl<'a> CleanupTask<'a> { Ok(()) } - #[instrument(level = "debug", skip_all)] + #[instrument(level = "debug", skip_all, fields(old_versions = inspection.old_manifests.len(), bytes_deleted = tracing::field::Empty))] async fn delete_unreferenced_files( &self, inspection: CleanupInspection, @@ -308,6 +308,10 @@ impl<'a> CleanupTask<'a> { let mut removal_stats = removal_stats.into_inner().unwrap(); removal_stats.old_versions = num_old_manifests as u64; removal_stats.bytes_removed += manifest_bytes_removed?; + + let span = Span::current(); + span.record("bytes_deleted", removal_stats.bytes_removed); + Ok(removal_stats) } From 9580ab737923c0a4f0dd40c27742d422c4606eac Mon Sep 17 00:00:00 2001 From: Will Jones Date: Fri, 21 Mar 2025 16:12:04 -0700 Subject: [PATCH 3/3] consistency --- rust/lance/src/dataset/cleanup.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rust/lance/src/dataset/cleanup.rs b/rust/lance/src/dataset/cleanup.rs index 111b3b02122..8bad6eb634f 100644 --- a/rust/lance/src/dataset/cleanup.rs +++ b/rust/lance/src/dataset/cleanup.rs @@ -247,7 +247,7 @@ impl<'a> CleanupTask<'a> { Ok(()) } - #[instrument(level = "debug", skip_all, fields(old_versions = inspection.old_manifests.len(), bytes_deleted = tracing::field::Empty))] + #[instrument(level = "debug", skip_all, fields(old_versions = inspection.old_manifests.len(), bytes_removed = tracing::field::Empty))] async fn delete_unreferenced_files( &self, inspection: CleanupInspection, @@ -310,7 +310,7 @@ impl<'a> CleanupTask<'a> { removal_stats.bytes_removed += manifest_bytes_removed?; let span = Span::current(); - span.record("bytes_deleted", removal_stats.bytes_removed); + span.record("bytes_removed", removal_stats.bytes_removed); Ok(removal_stats) }