feat: adopt shared ZB.MOM.WW.Health probes; add /healthz; canonical writer

This commit is contained in:
Joseph Doherty
2026-06-01 13:46:49 -04:00
parent 2a7ff03718
commit bbff1d19b5
5 changed files with 124 additions and 272 deletions
@@ -1,45 +0,0 @@
using Akka.Cluster;
using Microsoft.Extensions.Diagnostics.HealthChecks;
using ZB.MOM.WW.ScadaBridge.Host.Actors;
namespace ZB.MOM.WW.ScadaBridge.Host.Health;
/// <summary>
/// Health check that returns healthy only if this node is the active (leader) node
/// in the Akka.NET cluster. Used by Traefik to route traffic to the active node.
/// </summary>
public class ActiveNodeHealthCheck : IHealthCheck
{
private readonly AkkaHostedService _akkaService;
/// <summary>Initializes a new <see cref="ActiveNodeHealthCheck"/> with the given Akka hosted service.</summary>
/// <param name="akkaService">The Akka hosted service providing access to the actor system and cluster state.</param>
public ActiveNodeHealthCheck(AkkaHostedService akkaService)
{
_akkaService = akkaService;
}
/// <summary>Returns healthy if this node is the cluster leader (active node); otherwise returns unhealthy.</summary>
/// <param name="context">Health check context providing registration details.</param>
/// <param name="cancellationToken">Cancellation token.</param>
public Task<HealthCheckResult> CheckHealthAsync(
HealthCheckContext context,
CancellationToken cancellationToken = default)
{
var system = _akkaService.ActorSystem;
if (system == null)
return Task.FromResult(HealthCheckResult.Unhealthy("ActorSystem not yet available."));
var cluster = Cluster.Get(system);
var self = cluster.SelfMember;
if (self.Status != MemberStatus.Up)
return Task.FromResult(HealthCheckResult.Unhealthy($"Node not Up (status: {self.Status})."));
var leader = cluster.State.Leader;
if (leader != null && leader == self.Address)
return Task.FromResult(HealthCheckResult.Healthy("Active node (cluster leader)."));
return Task.FromResult(HealthCheckResult.Unhealthy("Standby node (not cluster leader)."));
}
}
@@ -1,52 +0,0 @@
using Akka.Cluster;
using Microsoft.Extensions.Diagnostics.HealthChecks;
using ZB.MOM.WW.ScadaBridge.Host.Actors;
namespace ZB.MOM.WW.ScadaBridge.Host.Health;
/// <summary>
/// Health check that verifies this node is an active member of the Akka.NET cluster.
/// Returns healthy only if the node's self-member status is Up or Joining.
/// </summary>
public class AkkaClusterHealthCheck : IHealthCheck
{
private readonly AkkaHostedService _akkaService;
/// <summary>
/// Initializes the health check with the Akka hosted service.
/// </summary>
/// <param name="akkaService">The hosted service providing access to the Akka actor system.</param>
public AkkaClusterHealthCheck(AkkaHostedService akkaService)
{
_akkaService = akkaService;
}
/// <summary>
/// Checks that this node is an active member of the Akka.NET cluster.
/// </summary>
/// <param name="context">Health check context.</param>
/// <param name="cancellationToken">Cancellation token.</param>
public Task<HealthCheckResult> CheckHealthAsync(
HealthCheckContext context,
CancellationToken cancellationToken = default)
{
var system = _akkaService.ActorSystem;
if (system == null)
return Task.FromResult(HealthCheckResult.Degraded("ActorSystem not yet available."));
var cluster = Cluster.Get(system);
var status = cluster.SelfMember.Status;
var result = status switch
{
MemberStatus.Up or MemberStatus.Joining =>
HealthCheckResult.Healthy($"Akka cluster member status: {status}"),
MemberStatus.Leaving or MemberStatus.Exiting =>
HealthCheckResult.Degraded($"Akka cluster member status: {status}"),
_ =>
HealthCheckResult.Unhealthy($"Akka cluster member status: {status}")
};
return Task.FromResult(result);
}
}
@@ -1,43 +0,0 @@
using Microsoft.Extensions.Diagnostics.HealthChecks;
using ZB.MOM.WW.ScadaBridge.ConfigurationDatabase;
namespace ZB.MOM.WW.ScadaBridge.Host.Health;
/// <summary>
/// Health check that verifies database connectivity for Central nodes.
/// </summary>
public class DatabaseHealthCheck : IHealthCheck
{
private readonly ScadaBridgeDbContext _dbContext;
/// <summary>
/// Initializes a new <see cref="DatabaseHealthCheck"/>.
/// </summary>
/// <param name="dbContext">The EF Core database context used to test connectivity.</param>
public DatabaseHealthCheck(ScadaBridgeDbContext dbContext)
{
_dbContext = dbContext;
}
/// <summary>
/// Checks database connectivity by attempting to open a connection.
/// </summary>
/// <param name="context">Health check context providing failure status information.</param>
/// <param name="cancellationToken">Cancellation token for the check.</param>
public async Task<HealthCheckResult> CheckHealthAsync(
HealthCheckContext context,
CancellationToken cancellationToken = default)
{
try
{
var canConnect = await _dbContext.Database.CanConnectAsync(cancellationToken);
return canConnect
? HealthCheckResult.Healthy("Database connection is available.")
: HealthCheckResult.Unhealthy("Database connection failed.");
}
catch (Exception ex)
{
return HealthCheckResult.Unhealthy("Database connection failed.", ex);
}
}
}