feat(kpi): K5 — Host central wiring + KpiHistoryRecorder cluster singleton + appsettings (not readiness-gated)

Wire the M6 KPI History recorder into the central composition path:
- Program.cs: call services.AddKpiHistory(configuration) on the central-only
  branch alongside AddNotificationOutbox/AddAuditLog/AddSiteCallAudit.
- AkkaHostedService.cs: register KpiHistoryRecorderActor as a central,
  non-role-scoped ClusterSingletonManager + ClusterSingletonProxy + a
  PhaseClusterLeave CoordinatedShutdown graceful-stop drain (singleton name
  'kpi-history-recorder'), copied/adapted from the audit-log-purge block.
- appsettings.Central.json (Host + docker + docker-env2 central nodes): add a
  ScadaBridge:KpiHistory section (SampleInterval 00:01:00, RetentionDays 90,
  PurgeInterval 1.00:00:00, DefaultMaxSeriesPoints 200).

KPI history is observability/best-effort and MUST NOT gate readiness: the
recorder is deliberately NOT added to RequiredSingletonsHealthCheck or any
other readiness gate.
This commit is contained in:
Joseph Doherty
2026-06-17 20:20:34 -04:00
parent 601cc6f594
commit e14433cd64
7 changed files with 93 additions and 0 deletions
@@ -699,6 +699,63 @@ akka {{
_actorSystem.ActorOf(auditReconProxyProps, "site-audit-reconciliation-proxy");
_logger.LogInformation("SiteAuditReconciliationActor singleton created");
// KPI History (#26, M6) — central singleton that periodically samples the
// Notification Outbox / Site Call Audit point-in-time KPIs into the
// KpiHistorySamples table and runs the daily retention purge. Mirrors the
// audit-log-purge singleton pattern above: a ClusterSingletonManager pins
// the recorder to the active central node, a ClusterSingletonProxy gives a
// stable address, and a PhaseClusterLeave graceful-stop task drains the
// in-flight tick before handover. The recorder's sample + purge timers
// self-schedule in PreStart. Options come from AddKpiHistory (central
// composition root only). The actor takes the root IServiceProvider and
// opens its own per-tick DI scope (the KPI repository is a scoped EF Core
// service), so the 3 ctor args (IServiceProvider, KpiHistoryOptions,
// ILogger) are resolved here from DI exactly like the other singletons.
// NOT readiness-gated by design: KPI history is observability/best-effort
// (it must never gate /health/ready), so kpi-history-recorder is
// deliberately absent from RequiredSingletonsHealthCheck.
var kpiHistoryOptions = _serviceProvider
.GetRequiredService<IOptions<ZB.MOM.WW.ScadaBridge.KpiHistory.KpiHistoryOptions>>().Value;
var kpiHistoryLogger = _serviceProvider.GetRequiredService<ILoggerFactory>()
.CreateLogger<ZB.MOM.WW.ScadaBridge.KpiHistory.KpiHistoryRecorderActor>();
var kpiHistorySingletonProps = ClusterSingletonManager.Props(
singletonProps: Props.Create(() => new ZB.MOM.WW.ScadaBridge.KpiHistory.KpiHistoryRecorderActor(
_serviceProvider,
kpiHistoryOptions,
kpiHistoryLogger)),
terminationMessage: PoisonPill.Instance,
settings: ClusterSingletonManagerSettings.Create(_actorSystem!)
.WithSingletonName("kpi-history-recorder"));
var kpiHistorySingletonManager =
_actorSystem!.ActorOf(kpiHistorySingletonProps, "kpi-history-recorder-singleton");
var kpiHistoryShutdown = Akka.Actor.CoordinatedShutdown.Get(_actorSystem);
kpiHistoryShutdown.AddTask(
Akka.Actor.CoordinatedShutdown.PhaseClusterLeave,
"drain-kpi-history-recorder-singleton",
async () =>
{
try
{
await kpiHistorySingletonManager.GracefulStop(TimeSpan.FromSeconds(10));
}
catch (Exception ex)
{
_logger.LogWarning(ex,
"KpiHistoryRecorder singleton did not drain within the graceful-stop "
+ "timeout; falling through to PoisonPill handover");
}
return Akka.Done.Instance;
});
var kpiHistoryProxyProps = ClusterSingletonProxy.Props(
singletonManagerPath: "/user/kpi-history-recorder-singleton",
settings: ClusterSingletonProxySettings.Create(_actorSystem)
.WithSingletonName("kpi-history-recorder"));
_actorSystem.ActorOf(kpiHistoryProxyProps, "kpi-history-recorder-proxy");
_logger.LogInformation("KpiHistoryRecorderActor singleton created (not readiness-gated)");
_logger.LogInformation("Central actors registered. CentralCommunicationActor created.");
}
@@ -16,6 +16,7 @@ using ZB.MOM.WW.ScadaBridge.Host.Actors;
using ZB.MOM.WW.ScadaBridge.Host.Health;
using ZB.MOM.WW.ScadaBridge.InboundAPI;
using ZB.MOM.WW.ScadaBridge.InboundAPI.Middleware;
using ZB.MOM.WW.ScadaBridge.KpiHistory;
using ZB.MOM.WW.ScadaBridge.ManagementService;
using ZB.MOM.WW.ScadaBridge.NotificationOutbox;
using ZB.MOM.WW.ScadaBridge.NotificationService;
@@ -110,6 +111,11 @@ try
// but the call is here for symmetry with the other audit composition
// roots so future per-actor DI lands without touching Program.cs.
builder.Services.AddSiteCallAudit();
// KPI History (#26, M6) — central-only. Binds KpiHistoryOptions from
// ScadaBridge:KpiHistory and registers the validated options consumed by
// the KpiHistoryRecorderActor cluster singleton (started in
// AkkaHostedService). Observability/best-effort: NOT readiness-gated.
builder.Services.AddKpiHistory(builder.Configuration);
builder.Services.AddTemplateEngine();
builder.Services.AddDeploymentManager();
// Host is the composition root and owns config-coupled wiring: register the
@@ -64,6 +64,12 @@
"PurgeInterval": "1.00:00:00",
"DeliveredKpiWindow": "00:01:00"
},
"KpiHistory": {
"SampleInterval": "00:01:00",
"RetentionDays": 90,
"PurgeInterval": "1.00:00:00",
"DefaultMaxSeriesPoints": 200
},
"Transport": {
"BundleSessionTtlMinutes": 30,
"MaxBundleSizeMb": 100,