feat: adopt shared ZB.MOM.WW.Health probes; add /healthz; canonical writer
This commit is contained in:
@@ -1,45 +0,0 @@
|
||||
using Akka.Cluster;
|
||||
using Microsoft.Extensions.Diagnostics.HealthChecks;
|
||||
using ZB.MOM.WW.ScadaBridge.Host.Actors;
|
||||
|
||||
namespace ZB.MOM.WW.ScadaBridge.Host.Health;
|
||||
|
||||
/// <summary>
|
||||
/// Health check that returns healthy only if this node is the active (leader) node
|
||||
/// in the Akka.NET cluster. Used by Traefik to route traffic to the active node.
|
||||
/// </summary>
|
||||
public class ActiveNodeHealthCheck : IHealthCheck
|
||||
{
|
||||
private readonly AkkaHostedService _akkaService;
|
||||
|
||||
/// <summary>Initializes a new <see cref="ActiveNodeHealthCheck"/> with the given Akka hosted service.</summary>
|
||||
/// <param name="akkaService">The Akka hosted service providing access to the actor system and cluster state.</param>
|
||||
public ActiveNodeHealthCheck(AkkaHostedService akkaService)
|
||||
{
|
||||
_akkaService = akkaService;
|
||||
}
|
||||
|
||||
/// <summary>Returns healthy if this node is the cluster leader (active node); otherwise returns unhealthy.</summary>
|
||||
/// <param name="context">Health check context providing registration details.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
public Task<HealthCheckResult> CheckHealthAsync(
|
||||
HealthCheckContext context,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var system = _akkaService.ActorSystem;
|
||||
if (system == null)
|
||||
return Task.FromResult(HealthCheckResult.Unhealthy("ActorSystem not yet available."));
|
||||
|
||||
var cluster = Cluster.Get(system);
|
||||
var self = cluster.SelfMember;
|
||||
|
||||
if (self.Status != MemberStatus.Up)
|
||||
return Task.FromResult(HealthCheckResult.Unhealthy($"Node not Up (status: {self.Status})."));
|
||||
|
||||
var leader = cluster.State.Leader;
|
||||
if (leader != null && leader == self.Address)
|
||||
return Task.FromResult(HealthCheckResult.Healthy("Active node (cluster leader)."));
|
||||
|
||||
return Task.FromResult(HealthCheckResult.Unhealthy("Standby node (not cluster leader)."));
|
||||
}
|
||||
}
|
||||
@@ -1,52 +0,0 @@
|
||||
using Akka.Cluster;
|
||||
using Microsoft.Extensions.Diagnostics.HealthChecks;
|
||||
using ZB.MOM.WW.ScadaBridge.Host.Actors;
|
||||
|
||||
namespace ZB.MOM.WW.ScadaBridge.Host.Health;
|
||||
|
||||
/// <summary>
|
||||
/// Health check that verifies this node is an active member of the Akka.NET cluster.
|
||||
/// Returns healthy only if the node's self-member status is Up or Joining.
|
||||
/// </summary>
|
||||
public class AkkaClusterHealthCheck : IHealthCheck
|
||||
{
|
||||
private readonly AkkaHostedService _akkaService;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes the health check with the Akka hosted service.
|
||||
/// </summary>
|
||||
/// <param name="akkaService">The hosted service providing access to the Akka actor system.</param>
|
||||
public AkkaClusterHealthCheck(AkkaHostedService akkaService)
|
||||
{
|
||||
_akkaService = akkaService;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Checks that this node is an active member of the Akka.NET cluster.
|
||||
/// </summary>
|
||||
/// <param name="context">Health check context.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
public Task<HealthCheckResult> CheckHealthAsync(
|
||||
HealthCheckContext context,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var system = _akkaService.ActorSystem;
|
||||
if (system == null)
|
||||
return Task.FromResult(HealthCheckResult.Degraded("ActorSystem not yet available."));
|
||||
|
||||
var cluster = Cluster.Get(system);
|
||||
var status = cluster.SelfMember.Status;
|
||||
|
||||
var result = status switch
|
||||
{
|
||||
MemberStatus.Up or MemberStatus.Joining =>
|
||||
HealthCheckResult.Healthy($"Akka cluster member status: {status}"),
|
||||
MemberStatus.Leaving or MemberStatus.Exiting =>
|
||||
HealthCheckResult.Degraded($"Akka cluster member status: {status}"),
|
||||
_ =>
|
||||
HealthCheckResult.Unhealthy($"Akka cluster member status: {status}")
|
||||
};
|
||||
|
||||
return Task.FromResult(result);
|
||||
}
|
||||
}
|
||||
@@ -1,43 +0,0 @@
|
||||
using Microsoft.Extensions.Diagnostics.HealthChecks;
|
||||
using ZB.MOM.WW.ScadaBridge.ConfigurationDatabase;
|
||||
|
||||
namespace ZB.MOM.WW.ScadaBridge.Host.Health;
|
||||
|
||||
/// <summary>
|
||||
/// Health check that verifies database connectivity for Central nodes.
|
||||
/// </summary>
|
||||
public class DatabaseHealthCheck : IHealthCheck
|
||||
{
|
||||
private readonly ScadaBridgeDbContext _dbContext;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new <see cref="DatabaseHealthCheck"/>.
|
||||
/// </summary>
|
||||
/// <param name="dbContext">The EF Core database context used to test connectivity.</param>
|
||||
public DatabaseHealthCheck(ScadaBridgeDbContext dbContext)
|
||||
{
|
||||
_dbContext = dbContext;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Checks database connectivity by attempting to open a connection.
|
||||
/// </summary>
|
||||
/// <param name="context">Health check context providing failure status information.</param>
|
||||
/// <param name="cancellationToken">Cancellation token for the check.</param>
|
||||
public async Task<HealthCheckResult> CheckHealthAsync(
|
||||
HealthCheckContext context,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
try
|
||||
{
|
||||
var canConnect = await _dbContext.Database.CanConnectAsync(cancellationToken);
|
||||
return canConnect
|
||||
? HealthCheckResult.Healthy("Database connection is available.")
|
||||
: HealthCheckResult.Unhealthy("Database connection failed.");
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
return HealthCheckResult.Unhealthy("Database connection failed.", ex);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,5 +1,6 @@
|
||||
using HealthChecks.UI.Client;
|
||||
using Microsoft.AspNetCore.Diagnostics.HealthChecks;
|
||||
using ZB.MOM.WW.Health;
|
||||
using ZB.MOM.WW.Health.Akka;
|
||||
using ZB.MOM.WW.Health.EntityFrameworkCore;
|
||||
using ZB.MOM.WW.ScadaBridge.AuditLog;
|
||||
using ZB.MOM.WW.ScadaBridge.CentralUI;
|
||||
using ZB.MOM.WW.ScadaBridge.ClusterInfrastructure;
|
||||
@@ -110,11 +111,25 @@ try
|
||||
?? throw new InvalidOperationException("ScadaBridge:Database:ConfigurationDb connection string is required for Central role.");
|
||||
builder.Services.AddConfigurationDatabase(configDbConnectionString);
|
||||
|
||||
// WP-12: Health checks for readiness gating
|
||||
// WP-12: Health checks for readiness gating — shared ZB.MOM.WW.Health probes.
|
||||
// Check names and the ready/active tier split are preserved: database + akka-cluster
|
||||
// carry the Ready tag (/health/ready), active-node carries the Active tag (/health/active).
|
||||
// The Akka checks resolve ActorSystem from DI via the transient bridge registered below;
|
||||
// the DatabaseHealthCheck<TContext> resolves a scoped ScadaBridgeDbContext (no factory).
|
||||
builder.Services.AddHealthChecks()
|
||||
.AddCheck<DatabaseHealthCheck>("database")
|
||||
.AddCheck<AkkaClusterHealthCheck>("akka-cluster")
|
||||
.AddCheck<ActiveNodeHealthCheck>("active-node");
|
||||
.AddTypeActivatedCheck<DatabaseHealthCheck<ScadaBridgeDbContext>>(
|
||||
"database",
|
||||
failureStatus: null,
|
||||
tags: new[] { ZbHealthTags.Ready })
|
||||
.AddTypeActivatedCheck<AkkaClusterHealthCheck>(
|
||||
"akka-cluster",
|
||||
failureStatus: null,
|
||||
tags: new[] { ZbHealthTags.Ready },
|
||||
args: AkkaClusterStatusPolicy.Default)
|
||||
.AddTypeActivatedCheck<ActiveNodeHealthCheck>(
|
||||
"active-node",
|
||||
failureStatus: null,
|
||||
tags: new[] { ZbHealthTags.Active });
|
||||
|
||||
// WP-13: Akka.NET bootstrap via hosted service
|
||||
builder.Services.AddSingleton<AkkaHostedService>();
|
||||
@@ -221,23 +236,17 @@ try
|
||||
&& HttpMethods.IsPost(ctx.Request.Method),
|
||||
branch => branch.UseAuditWriteMiddleware());
|
||||
|
||||
// WP-12: Map readiness endpoint — returns 503 until ready, 200 when ready.
|
||||
// REQ-HOST-4a defines readiness as cluster membership + DB connectivity,
|
||||
// explicitly NOT cluster leadership. The leader-only "active-node" check is
|
||||
// excluded here so a fully operational standby central node reports ready;
|
||||
// leadership is reported separately on /health/active.
|
||||
app.MapHealthChecks("/health/ready", new HealthCheckOptions
|
||||
{
|
||||
Predicate = check => check.Name != "active-node",
|
||||
ResponseWriter = UIResponseWriter.WriteHealthCheckUIResponse
|
||||
});
|
||||
|
||||
// Active node endpoint — returns 200 only on the cluster leader; used by Traefik for routing
|
||||
app.MapHealthChecks("/health/active", new HealthCheckOptions
|
||||
{
|
||||
Predicate = check => check.Name == "active-node",
|
||||
ResponseWriter = UIResponseWriter.WriteHealthCheckUIResponse
|
||||
});
|
||||
// WP-12: Map the canonical three-tier health endpoints in one call:
|
||||
// /health/ready — Ready-tagged checks (database + akka-cluster). REQ-HOST-4a defines
|
||||
// readiness as cluster membership + DB connectivity, explicitly NOT
|
||||
// cluster leadership, so the leader-only active-node check is excluded
|
||||
// (a fully operational standby central node still reports ready).
|
||||
// /health/active — Active-tagged check (active-node); returns 200 only on the cluster
|
||||
// leader; used by Traefik for routing.
|
||||
// /healthz — bare process liveness; runs no checks (always 200 while the process
|
||||
// is up). New tier added by adopting the shared library.
|
||||
// All three are anonymous and use the canonical ZbHealthWriter JSON output.
|
||||
app.MapZbHealth();
|
||||
|
||||
app.MapStaticAssets();
|
||||
app.MapCentralUI<ZB.MOM.WW.ScadaBridge.Host.Components.App>();
|
||||
|
||||
Reference in New Issue
Block a user