From 5b69dc4206cc4f0964868e4a637eedaa33bd5be9 Mon Sep 17 00:00:00 2001 From: Zilinghan Date: Wed, 4 Mar 2026 12:01:50 -0600 Subject: [PATCH 1/2] fix mlflow logger --- src/driver/continuous_monitor.py | 26 +++++++++++++++++++++++--- src/logger/mlflow_logger.py | 23 ++++++++++++++++++++--- 2 files changed, 43 insertions(+), 6 deletions(-) diff --git a/src/driver/continuous_monitor.py b/src/driver/continuous_monitor.py index 299578c..7c2c15d 100644 --- a/src/driver/continuous_monitor.py +++ b/src/driver/continuous_monitor.py @@ -265,6 +265,12 @@ def _handle_drift(self, drift_signal: DriftSignal) -> None: self.logger.info( f"==== DRIFT DETECTED (Event #{self.drift_event_count})! ====", level=0 ) + # Log data timestamp range if the harness tracks it + timerange = getattr(self.modelHarness, "current_window_timerange", None) + if timerange is not None: + self.logger.info( + f"\tData time range: {timerange[0]} → {timerange[1]}", level=1 + ) self.logger.info( f"\tRegime: {drift_signal.regime.value if drift_signal.regime else 'N/A'}", level=1, @@ -328,17 +334,31 @@ def _log_metrics(self, drift_signal: DriftSignal, metric_value: float) -> None: """ flops_perf = self.flops_profiler.get_performance() - # Log all drift metrics including performance in a single call + # Log all drift metrics in a single call. + # detected=0 is sampled at 10% to reduce log volume; detected=1 is + # always included so no true drift events are dropped. + log_detected = drift_signal.drift_detected or (np.random.random() <= 0.1) self.logger.stage("drift") + # Include data timestamp range if available from the harness + timerange = getattr(self.modelHarness, "current_window_timerange", None) + ts_fields = {} + if timerange is not None: + ts_fields["data_time_start"] = timerange[0] + ts_fields["data_time_end"] = timerange[1] self.logger.log( { - "detected": drift_signal.drift_detected, + **( + {"detected": int(drift_signal.drift_detected)} + if log_detected + else {} + ), "score": drift_signal.drift_score, "regime": (drift_signal.regime.value if drift_signal.regime else "N/A"), "confidence": ( drift_signal.confidence if drift_signal.confidence else "N/A" ), f"metric_{self.metric_idx}": metric_value, + **ts_fields, **{f"cperf_{k}": v for k, v in flops_perf.items()}, }, - ) + ) \ No newline at end of file diff --git a/src/logger/mlflow_logger.py b/src/logger/mlflow_logger.py index f6b7278..4ffbbd9 100644 --- a/src/logger/mlflow_logger.py +++ b/src/logger/mlflow_logger.py @@ -174,7 +174,7 @@ def log( # Add step metrics prefixed_metrics["step"] = current_step - if self._current_stage: + if self._current_stage and increment: prefixed_metrics[f"{self._current_stage}.step"] = self._stage_steps[ self._current_stage ] @@ -187,13 +187,30 @@ def log( if self.enabled and self.run: mlflow = self._get_mlflow() # Filter to only numeric values (MLflow only accepts numbers for metrics) + # Exclude bools: bool is a subclass of int in Python, but casting + # True/False to float(1.0/0.0) corrupts categorical semantics. + # Instead, booleans are converted to int (0/1) separately. numeric_metrics = { k: float(v) for k, v in prefixed_metrics.items() - if isinstance(v, (int, float)) and k != "step" + if isinstance(v, (int, float)) + and not isinstance(v, bool) + and k != "step" } + # Convert booleans to int (0/1) so they remain discrete metrics + bool_metrics = { + k: int(v) for k, v in prefixed_metrics.items() if isinstance(v, bool) + } + numeric_metrics.update(bool_metrics) if numeric_metrics: - mlflow.log_metrics(numeric_metrics, step=current_step) + # WandB uses the global step as x-axis, however, MLflow uses the per-stage step so that + # sparse global-step values do not cause MLflow to downsample away the rare detected=1 events. + log_step = ( + self._stage_steps[self._current_stage] + if self._current_stage + else current_step + ) + mlflow.log_metrics(numeric_metrics, step=log_step) def save( self, From c50445c76401c7cc99d94ddd26ca52560e86d993 Mon Sep 17 00:00:00 2001 From: Ana Gainaru Date: Wed, 4 Mar 2026 18:14:40 -0500 Subject: [PATCH 2/2] Pass the CI --- src/driver/continuous_monitor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/driver/continuous_monitor.py b/src/driver/continuous_monitor.py index 7c2c15d..a015da3 100644 --- a/src/driver/continuous_monitor.py +++ b/src/driver/continuous_monitor.py @@ -361,4 +361,4 @@ def _log_metrics(self, drift_signal: DriftSignal, metric_value: float) -> None: **ts_fields, **{f"cperf_{k}": v for k, v in flops_perf.items()}, }, - ) \ No newline at end of file + )