Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions Lite/Services/LocalDataService.CollectionHealth.cs
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,9 @@ public async Task<List<CollectorHealthRow>> GetCollectionHealthAsync(int serverI
AVG(duration_ms) AS avg_duration_ms,
MAX(CASE WHEN status = 'SUCCESS' THEN collection_time END) AS last_success_time,
MAX(collection_time) AS last_run_time,
MAX(CASE WHEN status = 'ERROR' THEN error_message END) AS last_error,
MAX(CASE WHEN status = 'ERROR' THEN collection_time END) AS last_error_time
MAX(CASE WHEN status IN ('ERROR', 'PERMISSIONS') THEN error_message END) AS last_error,
MAX(CASE WHEN status IN ('ERROR', 'PERMISSIONS') THEN collection_time END) AS last_error_time,
SUM(CASE WHEN status = 'PERMISSIONS' THEN 1 ELSE 0 END) AS permission_denied_count
FROM collection_log
WHERE server_id = $1
AND collection_time >= $2
Expand All @@ -56,7 +57,8 @@ GROUP BY collector_name
LastSuccessTime = reader.IsDBNull(5) ? null : reader.GetDateTime(5),
LastRunTime = reader.IsDBNull(6) ? null : reader.GetDateTime(6),
LastError = reader.IsDBNull(7) ? null : reader.GetString(7),
LastErrorTime = reader.IsDBNull(8) ? null : reader.GetDateTime(8)
LastErrorTime = reader.IsDBNull(8) ? null : reader.GetDateTime(8),
PermissionDeniedCount = reader.IsDBNull(9) ? 0 : ToInt64(reader.GetValue(9))
});
}

Expand Down Expand Up @@ -147,6 +149,7 @@ public class CollectorHealthRow
public DateTime? LastRunTime { get; set; }
public string? LastError { get; set; }
public DateTime? LastErrorTime { get; set; }
public long PermissionDeniedCount { get; set; }

public double FailureRatePercent => TotalRuns > 0 ? (double)ErrorCount / TotalRuns * 100 : 0;
public double HoursSinceLastSuccess => LastSuccessTime.HasValue
Expand All @@ -158,6 +161,7 @@ public string HealthStatus
get
{
if (TotalRuns == 0) return "NEVER_RUN";
if (PermissionDeniedCount > 0 && ErrorCount == 0 && SuccessCount == 0) return "NO_PERMISSIONS";
if (HoursSinceLastSuccess > 24) return "FAILING";
if (HoursSinceLastSuccess > 4) return "STALE";
if (FailureRatePercent > 20) return "WARNING";
Expand Down
15 changes: 12 additions & 3 deletions Lite/Services/RemoteCollectorService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ public CollectorHealthSummary GetHealthSummary(int? serverId = null)
/// <summary>
/// Records a collector execution result for health tracking.
/// </summary>
private void RecordCollectorResult(int serverId, string collectorName, bool success, string? errorMessage = null)
private void RecordCollectorResult(int serverId, string collectorName, string status, string? errorMessage = null)
{
lock (_healthLock)
{
Expand All @@ -164,12 +164,20 @@ private void RecordCollectorResult(int serverId, string collectorName, bool succ
_collectorHealth[key] = entry;
}

if (success)
if (status == "SUCCESS")
{
entry.LastSuccessTime = DateTime.UtcNow;
entry.ConsecutiveErrors = 0;
entry.TotalSuccesses++;
}
else if (status == "PERMISSIONS")
{
/* Permission errors are not transient — don't count as failures
(which would show FAILING) but don't count as success either.
Record the error message so the user can see what's wrong. */
entry.LastErrorTime = DateTime.UtcNow;
entry.LastErrorMessage = errorMessage;
}
else
{
entry.LastErrorTime = DateTime.UtcNow;
Expand Down Expand Up @@ -337,6 +345,7 @@ public async Task RunCollectorAsync(ServerConnection server, string collectorNam
}
else if (ex.Number == 229 || ex.Number == 297 || ex.Number == 300)
{
status = "PERMISSIONS";
_logger?.LogWarning("Collector '{Collector}' permission denied for server '{Server}': {Message}",
collectorName, server.DisplayName, ex.Message);
}
Expand Down Expand Up @@ -369,7 +378,7 @@ public async Task RunCollectorAsync(ServerConnection server, string collectorNam
}

// Track collector health
RecordCollectorResult(GetServerId(server), collectorName, status == "SUCCESS", errorMessage);
RecordCollectorResult(GetServerId(server), collectorName, status, errorMessage);

// Log the collection attempt
await LogCollectionAsync(GetServerId(server), server.DisplayName, collectorName, startTime, status, errorMessage, rowsCollected, _lastSqlMs, _lastDuckDbMs);
Expand Down
Loading