diff --git a/src/ZB.MOM.WW.ScadaBridge.Host/Health/ActiveNodeHealthCheck.cs b/src/ZB.MOM.WW.ScadaBridge.Host/Health/ActiveNodeHealthCheck.cs
deleted file mode 100644
index 6e887e3c..00000000
--- a/src/ZB.MOM.WW.ScadaBridge.Host/Health/ActiveNodeHealthCheck.cs
+++ /dev/null
@@ -1,45 +0,0 @@
-using Akka.Cluster;
-using Microsoft.Extensions.Diagnostics.HealthChecks;
-using ZB.MOM.WW.ScadaBridge.Host.Actors;
-
-namespace ZB.MOM.WW.ScadaBridge.Host.Health;
-
-///
-/// Health check that returns healthy only if this node is the active (leader) node
-/// in the Akka.NET cluster. Used by Traefik to route traffic to the active node.
-///
-public class ActiveNodeHealthCheck : IHealthCheck
-{
- private readonly AkkaHostedService _akkaService;
-
- /// Initializes a new with the given Akka hosted service.
- /// The Akka hosted service providing access to the actor system and cluster state.
- public ActiveNodeHealthCheck(AkkaHostedService akkaService)
- {
- _akkaService = akkaService;
- }
-
- /// Returns healthy if this node is the cluster leader (active node); otherwise returns unhealthy.
- /// Health check context providing registration details.
- /// Cancellation token.
- public Task CheckHealthAsync(
- HealthCheckContext context,
- CancellationToken cancellationToken = default)
- {
- var system = _akkaService.ActorSystem;
- if (system == null)
- return Task.FromResult(HealthCheckResult.Unhealthy("ActorSystem not yet available."));
-
- var cluster = Cluster.Get(system);
- var self = cluster.SelfMember;
-
- if (self.Status != MemberStatus.Up)
- return Task.FromResult(HealthCheckResult.Unhealthy($"Node not Up (status: {self.Status})."));
-
- var leader = cluster.State.Leader;
- if (leader != null && leader == self.Address)
- return Task.FromResult(HealthCheckResult.Healthy("Active node (cluster leader)."));
-
- return Task.FromResult(HealthCheckResult.Unhealthy("Standby node (not cluster leader)."));
- }
-}
diff --git a/src/ZB.MOM.WW.ScadaBridge.Host/Health/AkkaClusterHealthCheck.cs b/src/ZB.MOM.WW.ScadaBridge.Host/Health/AkkaClusterHealthCheck.cs
deleted file mode 100644
index 4356c7dd..00000000
--- a/src/ZB.MOM.WW.ScadaBridge.Host/Health/AkkaClusterHealthCheck.cs
+++ /dev/null
@@ -1,52 +0,0 @@
-using Akka.Cluster;
-using Microsoft.Extensions.Diagnostics.HealthChecks;
-using ZB.MOM.WW.ScadaBridge.Host.Actors;
-
-namespace ZB.MOM.WW.ScadaBridge.Host.Health;
-
-///
-/// Health check that verifies this node is an active member of the Akka.NET cluster.
-/// Returns healthy only if the node's self-member status is Up or Joining.
-///
-public class AkkaClusterHealthCheck : IHealthCheck
-{
- private readonly AkkaHostedService _akkaService;
-
- ///
- /// Initializes the health check with the Akka hosted service.
- ///
- /// The hosted service providing access to the Akka actor system.
- public AkkaClusterHealthCheck(AkkaHostedService akkaService)
- {
- _akkaService = akkaService;
- }
-
- ///
- /// Checks that this node is an active member of the Akka.NET cluster.
- ///
- /// Health check context.
- /// Cancellation token.
- public Task CheckHealthAsync(
- HealthCheckContext context,
- CancellationToken cancellationToken = default)
- {
- var system = _akkaService.ActorSystem;
- if (system == null)
- return Task.FromResult(HealthCheckResult.Degraded("ActorSystem not yet available."));
-
- var cluster = Cluster.Get(system);
- var status = cluster.SelfMember.Status;
-
- var result = status switch
- {
- MemberStatus.Up or MemberStatus.Joining =>
- HealthCheckResult.Healthy($"Akka cluster member status: {status}"),
- MemberStatus.Leaving or MemberStatus.Exiting =>
- HealthCheckResult.Degraded($"Akka cluster member status: {status}"),
- _ =>
- HealthCheckResult.Unhealthy($"Akka cluster member status: {status}")
- };
-
- return Task.FromResult(result);
- }
-}
diff --git a/src/ZB.MOM.WW.ScadaBridge.Host/Health/DatabaseHealthCheck.cs b/src/ZB.MOM.WW.ScadaBridge.Host/Health/DatabaseHealthCheck.cs
deleted file mode 100644
index beb3c3cb..00000000
--- a/src/ZB.MOM.WW.ScadaBridge.Host/Health/DatabaseHealthCheck.cs
+++ /dev/null
@@ -1,43 +0,0 @@
-using Microsoft.Extensions.Diagnostics.HealthChecks;
-using ZB.MOM.WW.ScadaBridge.ConfigurationDatabase;
-
-namespace ZB.MOM.WW.ScadaBridge.Host.Health;
-
-///
-/// Health check that verifies database connectivity for Central nodes.
-///
-public class DatabaseHealthCheck : IHealthCheck
-{
- private readonly ScadaBridgeDbContext _dbContext;
-
- ///
- /// Initializes a new .
- ///
- /// The EF Core database context used to test connectivity.
- public DatabaseHealthCheck(ScadaBridgeDbContext dbContext)
- {
- _dbContext = dbContext;
- }
-
- ///
- /// Checks database connectivity by attempting to open a connection.
- ///
- /// Health check context providing failure status information.
- /// Cancellation token for the check.
- public async Task CheckHealthAsync(
- HealthCheckContext context,
- CancellationToken cancellationToken = default)
- {
- try
- {
- var canConnect = await _dbContext.Database.CanConnectAsync(cancellationToken);
- return canConnect
- ? HealthCheckResult.Healthy("Database connection is available.")
- : HealthCheckResult.Unhealthy("Database connection failed.");
- }
- catch (Exception ex)
- {
- return HealthCheckResult.Unhealthy("Database connection failed.", ex);
- }
- }
-}
diff --git a/src/ZB.MOM.WW.ScadaBridge.Host/Program.cs b/src/ZB.MOM.WW.ScadaBridge.Host/Program.cs
index b1146c5f..fddb5f1d 100644
--- a/src/ZB.MOM.WW.ScadaBridge.Host/Program.cs
+++ b/src/ZB.MOM.WW.ScadaBridge.Host/Program.cs
@@ -1,5 +1,6 @@
-using HealthChecks.UI.Client;
-using Microsoft.AspNetCore.Diagnostics.HealthChecks;
+using ZB.MOM.WW.Health;
+using ZB.MOM.WW.Health.Akka;
+using ZB.MOM.WW.Health.EntityFrameworkCore;
using ZB.MOM.WW.ScadaBridge.AuditLog;
using ZB.MOM.WW.ScadaBridge.CentralUI;
using ZB.MOM.WW.ScadaBridge.ClusterInfrastructure;
@@ -110,11 +111,25 @@ try
?? throw new InvalidOperationException("ScadaBridge:Database:ConfigurationDb connection string is required for Central role.");
builder.Services.AddConfigurationDatabase(configDbConnectionString);
- // WP-12: Health checks for readiness gating
+ // WP-12: Health checks for readiness gating — shared ZB.MOM.WW.Health probes.
+ // Check names and the ready/active tier split are preserved: database + akka-cluster
+ // carry the Ready tag (/health/ready), active-node carries the Active tag (/health/active).
+ // The Akka checks resolve ActorSystem from DI via the transient bridge registered below;
+ // the DatabaseHealthCheck resolves a scoped ScadaBridgeDbContext (no factory).
builder.Services.AddHealthChecks()
- .AddCheck("database")
- .AddCheck("akka-cluster")
- .AddCheck("active-node");
+ .AddTypeActivatedCheck>(
+ "database",
+ failureStatus: null,
+ tags: new[] { ZbHealthTags.Ready })
+ .AddTypeActivatedCheck(
+ "akka-cluster",
+ failureStatus: null,
+ tags: new[] { ZbHealthTags.Ready },
+ args: AkkaClusterStatusPolicy.Default)
+ .AddTypeActivatedCheck(
+ "active-node",
+ failureStatus: null,
+ tags: new[] { ZbHealthTags.Active });
// WP-13: Akka.NET bootstrap via hosted service
builder.Services.AddSingleton();
@@ -221,23 +236,17 @@ try
&& HttpMethods.IsPost(ctx.Request.Method),
branch => branch.UseAuditWriteMiddleware());
- // WP-12: Map readiness endpoint — returns 503 until ready, 200 when ready.
- // REQ-HOST-4a defines readiness as cluster membership + DB connectivity,
- // explicitly NOT cluster leadership. The leader-only "active-node" check is
- // excluded here so a fully operational standby central node reports ready;
- // leadership is reported separately on /health/active.
- app.MapHealthChecks("/health/ready", new HealthCheckOptions
- {
- Predicate = check => check.Name != "active-node",
- ResponseWriter = UIResponseWriter.WriteHealthCheckUIResponse
- });
-
- // Active node endpoint — returns 200 only on the cluster leader; used by Traefik for routing
- app.MapHealthChecks("/health/active", new HealthCheckOptions
- {
- Predicate = check => check.Name == "active-node",
- ResponseWriter = UIResponseWriter.WriteHealthCheckUIResponse
- });
+ // WP-12: Map the canonical three-tier health endpoints in one call:
+ // /health/ready — Ready-tagged checks (database + akka-cluster). REQ-HOST-4a defines
+ // readiness as cluster membership + DB connectivity, explicitly NOT
+ // cluster leadership, so the leader-only active-node check is excluded
+ // (a fully operational standby central node still reports ready).
+ // /health/active — Active-tagged check (active-node); returns 200 only on the cluster
+ // leader; used by Traefik for routing.
+ // /healthz — bare process liveness; runs no checks (always 200 while the process
+ // is up). New tier added by adopting the shared library.
+ // All three are anonymous and use the canonical ZbHealthWriter JSON output.
+ app.MapZbHealth();
app.MapStaticAssets();
app.MapCentralUI();
diff --git a/tests/ZB.MOM.WW.ScadaBridge.Host.Tests/HealthCheckTests.cs b/tests/ZB.MOM.WW.ScadaBridge.Host.Tests/HealthCheckTests.cs
index 7f0cec38..49754337 100644
--- a/tests/ZB.MOM.WW.ScadaBridge.Host.Tests/HealthCheckTests.cs
+++ b/tests/ZB.MOM.WW.ScadaBridge.Host.Tests/HealthCheckTests.cs
@@ -1,11 +1,20 @@
+using System.Linq;
using Microsoft.AspNetCore.Mvc.Testing;
using Microsoft.Extensions.Configuration;
-using ZB.MOM.WW.ScadaBridge.Host.Health;
+using Microsoft.Extensions.DependencyInjection;
+using Microsoft.Extensions.Diagnostics.HealthChecks;
+using Microsoft.Extensions.Options;
+using ZB.MOM.WW.Health;
namespace ZB.MOM.WW.ScadaBridge.Host.Tests;
///
-/// WP-12: Tests for /health/ready and /health/active endpoints.
+/// WP-12: Tests for the three-tier health endpoints after adopting the shared
+/// ZB.MOM.WW.Health probes. Verifies that /health/ready, /health/active and the new
+/// /healthz tier are mapped, and that the readiness/active tier split is now carried by
+/// the canonical (Ready for database + akka-cluster, Active for
+/// active-node) rather than by check-name predicates. These are pure route/tag assertions
+/// — they require no database, LDAP, or formed Akka cluster.
///
public class HealthCheckTests : IDisposable
{
@@ -25,41 +34,49 @@ public class HealthCheckTests : IDisposable
}
}
+ private WebApplicationFactory CreateCentralFactory()
+ {
+ var factory = new WebApplicationFactory()
+ .WithWebHostBuilder(builder =>
+ {
+ builder.ConfigureAppConfiguration((context, config) =>
+ {
+ config.AddInMemoryCollection(new Dictionary
+ {
+ ["ScadaBridge:Node:NodeHostname"] = "localhost",
+ ["ScadaBridge:Node:RemotingPort"] = "0",
+ ["ScadaBridge:Cluster:SeedNodes:0"] = "akka.tcp://scadabridge@localhost:2551",
+ ["ScadaBridge:Cluster:SeedNodes:1"] = "akka.tcp://scadabridge@localhost:2552",
+ ["ScadaBridge:Database:SkipMigrations"] = "true",
+ });
+ });
+ builder.UseSetting("ScadaBridge:Node:Role", "Central");
+ builder.UseSetting("ScadaBridge:Database:SkipMigrations", "true");
+ });
+ _disposables.Add(factory);
+ return factory;
+ }
+
+ private static IEnumerable Registrations(WebApplicationFactory factory) =>
+ factory.Services.GetRequiredService>().Value.Registrations;
+
[Fact]
- public async Task HealthReady_Endpoint_ReturnsResponse()
+ public async Task HealthReady_Endpoint_IsMapped()
{
var previousEnv = Environment.GetEnvironmentVariable("DOTNET_ENVIRONMENT");
try
{
Environment.SetEnvironmentVariable("DOTNET_ENVIRONMENT", "Central");
-
- var factory = new WebApplicationFactory()
- .WithWebHostBuilder(builder =>
- {
- builder.ConfigureAppConfiguration((context, config) =>
- {
- config.AddInMemoryCollection(new Dictionary
- {
- ["ScadaBridge:Node:NodeHostname"] = "localhost",
- ["ScadaBridge:Node:RemotingPort"] = "0",
- ["ScadaBridge:Cluster:SeedNodes:0"] = "akka.tcp://scadabridge@localhost:2551",
- ["ScadaBridge:Cluster:SeedNodes:1"] = "akka.tcp://scadabridge@localhost:2552",
- ["ScadaBridge:Database:SkipMigrations"] = "true",
- });
- });
- builder.UseSetting("ScadaBridge:Node:Role", "Central");
- builder.UseSetting("ScadaBridge:Database:SkipMigrations", "true");
- });
- _disposables.Add(factory);
-
+ var factory = CreateCentralFactory();
var client = factory.CreateClient();
_disposables.Add(client);
var response = await client.GetAsync("/health/ready");
- // The endpoint exists and returns a status code.
- // With test infrastructure (no real DB), the database check may fail,
- // so we accept either 200 (Healthy) or 503 (Unhealthy).
+ // The endpoint exists and returns a status code. With test infrastructure
+ // (no real DB / cluster) the readiness checks may report Unhealthy, so we
+ // accept either 200 (Healthy/Degraded) or 503 (Unhealthy) — never 404.
+ Assert.NotEqual(System.Net.HttpStatusCode.NotFound, response.StatusCode);
Assert.True(
response.StatusCode == System.Net.HttpStatusCode.OK ||
response.StatusCode == System.Net.HttpStatusCode.ServiceUnavailable,
@@ -72,39 +89,19 @@ public class HealthCheckTests : IDisposable
}
[Fact]
- public async Task HealthActive_Endpoint_ReturnsResponse()
+ public async Task HealthActive_Endpoint_IsMapped()
{
var previousEnv = Environment.GetEnvironmentVariable("DOTNET_ENVIRONMENT");
try
{
Environment.SetEnvironmentVariable("DOTNET_ENVIRONMENT", "Central");
-
- var factory = new WebApplicationFactory()
- .WithWebHostBuilder(builder =>
- {
- builder.ConfigureAppConfiguration((context, config) =>
- {
- config.AddInMemoryCollection(new Dictionary
- {
- ["ScadaBridge:Node:NodeHostname"] = "localhost",
- ["ScadaBridge:Node:RemotingPort"] = "0",
- ["ScadaBridge:Cluster:SeedNodes:0"] = "akka.tcp://scadabridge@localhost:2551",
- ["ScadaBridge:Cluster:SeedNodes:1"] = "akka.tcp://scadabridge@localhost:2552",
- ["ScadaBridge:Database:SkipMigrations"] = "true",
- });
- });
- builder.UseSetting("ScadaBridge:Node:Role", "Central");
- builder.UseSetting("ScadaBridge:Database:SkipMigrations", "true");
- });
- _disposables.Add(factory);
-
+ var factory = CreateCentralFactory();
var client = factory.CreateClient();
_disposables.Add(client);
var response = await client.GetAsync("/health/active");
- // In test mode, the ActorSystem may not be fully available,
- // so the active-node check returns 503 (Unhealthy).
+ Assert.NotEqual(System.Net.HttpStatusCode.NotFound, response.StatusCode);
Assert.True(
response.StatusCode == System.Net.HttpStatusCode.OK ||
response.StatusCode == System.Net.HttpStatusCode.ServiceUnavailable,
@@ -117,46 +114,21 @@ public class HealthCheckTests : IDisposable
}
[Fact]
- public async Task HealthReady_Endpoint_ExcludesActiveNodeCheck()
+ public async Task Healthz_LivenessEndpoint_IsMappedAndReturns200()
{
- // Host-001 regression: /health/ready must reflect cluster membership + DB
- // connectivity only (REQ-HOST-4a), NOT cluster leadership. The leader-only
- // "active-node" check belongs solely to /health/active. If /health/ready
- // included "active-node", a fully operational standby central node would
- // permanently report 503, breaking load-balancer failover readiness.
+ // New tier added by adopting the shared library: /healthz runs no checks, so it
+ // returns 200 as long as the process is up — independent of DB / cluster state.
var previousEnv = Environment.GetEnvironmentVariable("DOTNET_ENVIRONMENT");
try
{
Environment.SetEnvironmentVariable("DOTNET_ENVIRONMENT", "Central");
-
- var factory = new WebApplicationFactory()
- .WithWebHostBuilder(builder =>
- {
- builder.ConfigureAppConfiguration((context, config) =>
- {
- config.AddInMemoryCollection(new Dictionary
- {
- ["ScadaBridge:Node:NodeHostname"] = "localhost",
- ["ScadaBridge:Node:RemotingPort"] = "0",
- ["ScadaBridge:Cluster:SeedNodes:0"] = "akka.tcp://scadabridge@localhost:2551",
- ["ScadaBridge:Cluster:SeedNodes:1"] = "akka.tcp://scadabridge@localhost:2552",
- ["ScadaBridge:Database:SkipMigrations"] = "true",
- });
- });
- builder.UseSetting("ScadaBridge:Node:Role", "Central");
- builder.UseSetting("ScadaBridge:Database:SkipMigrations", "true");
- });
- _disposables.Add(factory);
-
+ var factory = CreateCentralFactory();
var client = factory.CreateClient();
_disposables.Add(client);
- var response = await client.GetAsync("/health/ready");
- var body = await response.Content.ReadAsStringAsync();
+ var response = await client.GetAsync("/healthz");
- // The readiness body lists each executed check by name in its entries map.
- // The leader-only "active-node" check must not be among them.
- Assert.DoesNotContain("active-node", body);
+ Assert.Equal(System.Net.HttpStatusCode.OK, response.StatusCode);
}
finally
{
@@ -165,43 +137,54 @@ public class HealthCheckTests : IDisposable
}
[Fact]
- public async Task ActiveNodeHealthCheck_SystemNotStarted_ReturnsUnhealthy()
+ public void ReadyTier_Carries_Database_And_AkkaCluster()
{
- // AkkaHostedService before StartAsync has ActorSystem == null.
- // The integration test (HealthActive_Endpoint_ReturnsResponse) validates the full
- // endpoint wiring. This test validates the null-system path via WebApplicationFactory
- // where the ActorSystem may not be available.
+ // Host-001 regression guard: readiness reflects cluster membership + DB connectivity
+ // only (REQ-HOST-4a), NOT cluster leadership. The split is now carried by the Ready tag
+ // rather than a check-name predicate: database + akka-cluster are Ready-tagged, and the
+ // leader-only active-node check is NOT — so a fully operational standby central node
+ // still reports ready on /health/ready.
var previousEnv = Environment.GetEnvironmentVariable("DOTNET_ENVIRONMENT");
try
{
Environment.SetEnvironmentVariable("DOTNET_ENVIRONMENT", "Central");
- var factory = new WebApplicationFactory()
- .WithWebHostBuilder(builder =>
- {
- builder.ConfigureAppConfiguration((context, config) =>
- {
- config.AddInMemoryCollection(new Dictionary
- {
- ["ScadaBridge:Node:NodeHostname"] = "localhost",
- ["ScadaBridge:Node:RemotingPort"] = "0",
- ["ScadaBridge:Cluster:SeedNodes:0"] = "akka.tcp://scadabridge@localhost:2551",
- ["ScadaBridge:Database:SkipMigrations"] = "true",
- });
- });
- builder.UseSetting("ScadaBridge:Node:Role", "Central");
- builder.UseSetting("ScadaBridge:Database:SkipMigrations", "true");
- });
- _disposables.Add(factory);
+ var factory = CreateCentralFactory();
- var client = factory.CreateClient();
- _disposables.Add(client);
+ var registrations = Registrations(factory).ToDictionary(r => r.Name);
- var response = await client.GetAsync("/health/active");
- var body = await response.Content.ReadAsStringAsync();
+ Assert.True(registrations.ContainsKey("database"), "Expected a 'database' health check.");
+ Assert.True(registrations.ContainsKey("akka-cluster"), "Expected an 'akka-cluster' health check.");
- // Active-node check returns 503 when ActorSystem is not yet available or not leader
- Assert.Equal(System.Net.HttpStatusCode.ServiceUnavailable, response.StatusCode);
- Assert.Contains("active-node", body);
+ Assert.Contains(ZbHealthTags.Ready, registrations["database"].Tags);
+ Assert.Contains(ZbHealthTags.Ready, registrations["akka-cluster"].Tags);
+
+ // The leader-only active-node check must NOT be on the readiness tier.
+ Assert.DoesNotContain(ZbHealthTags.Ready, registrations["active-node"].Tags);
+ }
+ finally
+ {
+ Environment.SetEnvironmentVariable("DOTNET_ENVIRONMENT", previousEnv);
+ }
+ }
+
+ [Fact]
+ public void ActiveTier_Carries_Only_ActiveNode()
+ {
+ // The active-node leader check carries the Active tag (→ /health/active); the readiness
+ // checks do not, so /health/active reports leadership alone.
+ var previousEnv = Environment.GetEnvironmentVariable("DOTNET_ENVIRONMENT");
+ try
+ {
+ Environment.SetEnvironmentVariable("DOTNET_ENVIRONMENT", "Central");
+ var factory = CreateCentralFactory();
+
+ var registrations = Registrations(factory).ToDictionary(r => r.Name);
+
+ Assert.True(registrations.ContainsKey("active-node"), "Expected an 'active-node' health check.");
+ Assert.Contains(ZbHealthTags.Active, registrations["active-node"].Tags);
+
+ Assert.DoesNotContain(ZbHealthTags.Active, registrations["database"].Tags);
+ Assert.DoesNotContain(ZbHealthTags.Active, registrations["akka-cluster"].Tags);
}
finally
{