feat: adopt shared ZB.MOM.WW.Health probes; add /healthz; canonical writer

This commit is contained in:
Joseph Doherty
2026-06-01 13:46:49 -04:00
parent 2a7ff03718
commit bbff1d19b5
5 changed files with 124 additions and 272 deletions
+32 -23
View File
@@ -1,5 +1,6 @@
using HealthChecks.UI.Client;
using Microsoft.AspNetCore.Diagnostics.HealthChecks;
using ZB.MOM.WW.Health;
using ZB.MOM.WW.Health.Akka;
using ZB.MOM.WW.Health.EntityFrameworkCore;
using ZB.MOM.WW.ScadaBridge.AuditLog;
using ZB.MOM.WW.ScadaBridge.CentralUI;
using ZB.MOM.WW.ScadaBridge.ClusterInfrastructure;
@@ -110,11 +111,25 @@ try
?? throw new InvalidOperationException("ScadaBridge:Database:ConfigurationDb connection string is required for Central role.");
builder.Services.AddConfigurationDatabase(configDbConnectionString);
// WP-12: Health checks for readiness gating
// WP-12: Health checks for readiness gating — shared ZB.MOM.WW.Health probes.
// Check names and the ready/active tier split are preserved: database + akka-cluster
// carry the Ready tag (/health/ready), active-node carries the Active tag (/health/active).
// The Akka checks resolve ActorSystem from DI via the transient bridge registered below;
// the DatabaseHealthCheck<TContext> resolves a scoped ScadaBridgeDbContext (no factory).
builder.Services.AddHealthChecks()
.AddCheck<DatabaseHealthCheck>("database")
.AddCheck<AkkaClusterHealthCheck>("akka-cluster")
.AddCheck<ActiveNodeHealthCheck>("active-node");
.AddTypeActivatedCheck<DatabaseHealthCheck<ScadaBridgeDbContext>>(
"database",
failureStatus: null,
tags: new[] { ZbHealthTags.Ready })
.AddTypeActivatedCheck<AkkaClusterHealthCheck>(
"akka-cluster",
failureStatus: null,
tags: new[] { ZbHealthTags.Ready },
args: AkkaClusterStatusPolicy.Default)
.AddTypeActivatedCheck<ActiveNodeHealthCheck>(
"active-node",
failureStatus: null,
tags: new[] { ZbHealthTags.Active });
// WP-13: Akka.NET bootstrap via hosted service
builder.Services.AddSingleton<AkkaHostedService>();
@@ -221,23 +236,17 @@ try
&& HttpMethods.IsPost(ctx.Request.Method),
branch => branch.UseAuditWriteMiddleware());
// WP-12: Map readiness endpoint — returns 503 until ready, 200 when ready.
// REQ-HOST-4a defines readiness as cluster membership + DB connectivity,
// explicitly NOT cluster leadership. The leader-only "active-node" check is
// excluded here so a fully operational standby central node reports ready;
// leadership is reported separately on /health/active.
app.MapHealthChecks("/health/ready", new HealthCheckOptions
{
Predicate = check => check.Name != "active-node",
ResponseWriter = UIResponseWriter.WriteHealthCheckUIResponse
});
// Active node endpoint — returns 200 only on the cluster leader; used by Traefik for routing
app.MapHealthChecks("/health/active", new HealthCheckOptions
{
Predicate = check => check.Name == "active-node",
ResponseWriter = UIResponseWriter.WriteHealthCheckUIResponse
});
// WP-12: Map the canonical three-tier health endpoints in one call:
// /health/ready — Ready-tagged checks (database + akka-cluster). REQ-HOST-4a defines
// readiness as cluster membership + DB connectivity, explicitly NOT
// cluster leadership, so the leader-only active-node check is excluded
// (a fully operational standby central node still reports ready).
// /health/active — Active-tagged check (active-node); returns 200 only on the cluster
// leader; used by Traefik for routing.
// /healthz — bare process liveness; runs no checks (always 200 while the process
// is up). New tier added by adopting the shared library.
// All three are anonymous and use the canonical ZbHealthWriter JSON output.
app.MapZbHealth();
app.MapStaticAssets();
app.MapCentralUI<ZB.MOM.WW.ScadaBridge.Host.Components.App>();