feat: wire all health metrics and add instance counts to dashboard

Wired ISiteHealthCollector calls for script errors (ScriptExecutionActor),
alarm eval errors (AlarmActor), dead letters (DeadLetterMonitorActor), and
S&F buffer depth placeholder. Added instance count tracking (deployed/
enabled/disabled) to SiteHealthReport via DeploymentManagerActor. Updated
Health Dashboard UI to show instance counts per site. All metrics flow
through the existing health report pipeline via ClusterClient.
This commit is contained in:
Joseph Doherty
2026-03-18 00:57:49 -04:00
parent 88b5f6cb54
commit f165ca2774
18 changed files with 151 additions and 28 deletions

View File

@@ -15,6 +15,8 @@ public class SiteHealthCollector : ISiteHealthCollector
private int _deadLetterCount;
private readonly ConcurrentDictionary<string, ConnectionHealth> _connectionStatuses = new();
private readonly ConcurrentDictionary<string, TagResolutionStatus> _tagResolutionCounts = new();
private IReadOnlyDictionary<string, int> _sfBufferDepths = new Dictionary<string, int>();
private int _deployedInstanceCount, _enabledInstanceCount, _disabledInstanceCount;
/// <summary>
/// Increment the script error counter. Covers unhandled exceptions,
@@ -68,6 +70,26 @@ public class SiteHealthCollector : ISiteHealthCollector
_tagResolutionCounts[connectionName] = new TagResolutionStatus(totalSubscribed, successfullyResolved);
}
/// <summary>
/// Set the current store-and-forward buffer depths snapshot.
/// Called before report collection with data from the S&amp;F service.
/// </summary>
public void SetStoreAndForwardDepths(IReadOnlyDictionary<string, int> depths)
{
_sfBufferDepths = depths;
}
/// <summary>
/// Set the current instance counts.
/// Called by the Deployment Manager after instance state changes.
/// </summary>
public void SetInstanceCounts(int deployed, int enabled, int disabled)
{
Interlocked.Exchange(ref _deployedInstanceCount, deployed);
Interlocked.Exchange(ref _enabledInstanceCount, enabled);
Interlocked.Exchange(ref _disabledInstanceCount, disabled);
}
/// <summary>
/// Collect the current health report for the site and reset interval counters.
/// Connection statuses and tag resolution counts are NOT reset (they reflect current state).
@@ -84,8 +106,8 @@ public class SiteHealthCollector : ISiteHealthCollector
var connectionStatuses = new Dictionary<string, ConnectionHealth>(_connectionStatuses);
var tagResolution = new Dictionary<string, TagResolutionStatus>(_tagResolutionCounts);
// S&F buffer depth: placeholder (Phase 3C)
var sfBufferDepths = new Dictionary<string, int>();
// Snapshot current S&F buffer depths
var sfBufferDepths = new Dictionary<string, int>(_sfBufferDepths);
return new SiteHealthReport(
SiteId: siteId,
@@ -96,6 +118,9 @@ public class SiteHealthCollector : ISiteHealthCollector
ScriptErrorCount: scriptErrors,
AlarmEvaluationErrorCount: alarmErrors,
StoreAndForwardBufferDepths: sfBufferDepths,
DeadLetterCount: deadLetters);
DeadLetterCount: deadLetters,
DeployedInstanceCount: _deployedInstanceCount,
EnabledInstanceCount: _enabledInstanceCount,
DisabledInstanceCount: _disabledInstanceCount);
}
}