feat: wire all health metrics and add instance counts to dashboard

Wired ISiteHealthCollector calls for script errors (ScriptExecutionActor),
alarm eval errors (AlarmActor), dead letters (DeadLetterMonitorActor), and
S&F buffer depth placeholder. Added instance count tracking (deployed/
enabled/disabled) to SiteHealthReport via DeploymentManagerActor. Updated
Health Dashboard UI to show instance counts per site. All metrics flow
through the existing health report pipeline via ClusterClient.
This commit is contained in:
Joseph Doherty
2026-03-18 00:57:49 -04:00
parent 88b5f6cb54
commit f165ca2774
18 changed files with 151 additions and 28 deletions

View File

@@ -53,7 +53,10 @@ public class HealthAggregationTests
{
["ext-system"] = i * 2
},
DeadLetterCount: 0);
DeadLetterCount: 0,
DeployedInstanceCount: 0,
EnabledInstanceCount: 0,
DisabledInstanceCount: 0);
_aggregator.ProcessReport(report);
}
@@ -83,7 +86,10 @@ public class HealthAggregationTests
ScriptErrorCount: seq % 5 == 0 ? 1 : 0,
AlarmEvaluationErrorCount: 0,
StoreAndForwardBufferDepths: new Dictionary<string, int>(),
DeadLetterCount: 0);
DeadLetterCount: 0,
DeployedInstanceCount: 0,
EnabledInstanceCount: 0,
DisabledInstanceCount: 0);
_aggregator.ProcessReport(report);
}
@@ -111,14 +117,14 @@ public class HealthAggregationTests
siteId, 10, DateTimeOffset.UtcNow,
new Dictionary<string, ConnectionHealth>(),
new Dictionary<string, TagResolutionStatus>(),
5, 0, new Dictionary<string, int>(), 0));
5, 0, new Dictionary<string, int>(), 0, 0, 0, 0));
// Send stale report with seq 5 — should be rejected
_aggregator.ProcessReport(new SiteHealthReport(
siteId, 5, DateTimeOffset.UtcNow,
new Dictionary<string, ConnectionHealth>(),
new Dictionary<string, TagResolutionStatus>(),
99, 0, new Dictionary<string, int>(), 0));
99, 0, new Dictionary<string, int>(), 0, 0, 0, 0));
var state = _aggregator.GetSiteState(siteId);
Assert.NotNull(state);