feat: adopt shared ZB.MOM.WW.Health probes (preserve tiers + OtOpcUaCompat policy)
This commit is contained in:
@@ -1,39 +0,0 @@
|
||||
using Microsoft.Extensions.Diagnostics.HealthChecks;
|
||||
using ZB.MOM.WW.OtOpcUa.Commons.Interfaces;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Host.Health;
|
||||
|
||||
/// <summary>
|
||||
/// Reports Healthy on the admin-role leader, Degraded on a non-leader admin member. Used by
|
||||
/// the <c>/health/active</c> endpoint so external load balancers can route admin-singleton
|
||||
/// traffic to the current leader (cookie sessions still work on either node — DataProtection
|
||||
/// keys are shared).
|
||||
/// </summary>
|
||||
public sealed class AdminRoleLeaderHealthCheck : IHealthCheck
|
||||
{
|
||||
private readonly IClusterRoleInfo _roleInfo;
|
||||
|
||||
/// <summary>Initializes a new instance of the AdminRoleLeaderHealthCheck class.</summary>
|
||||
/// <param name="roleInfo">The cluster role information provider.</param>
|
||||
public AdminRoleLeaderHealthCheck(IClusterRoleInfo roleInfo)
|
||||
{
|
||||
_roleInfo = roleInfo;
|
||||
}
|
||||
|
||||
/// <summary>Checks the health status of the admin role leader.</summary>
|
||||
/// <param name="context">The health check context.</param>
|
||||
/// <param name="cancellationToken">The cancellation token.</param>
|
||||
/// <returns>A task representing the health check operation.</returns>
|
||||
public Task<HealthCheckResult> CheckHealthAsync(HealthCheckContext context, CancellationToken cancellationToken = default)
|
||||
{
|
||||
if (!_roleInfo.HasRole("admin"))
|
||||
return Task.FromResult(HealthCheckResult.Healthy("Node does not carry admin role"));
|
||||
|
||||
var leader = _roleInfo.RoleLeader("admin");
|
||||
var isLeader = leader is not null && leader.Value.Equals(_roleInfo.LocalNode);
|
||||
|
||||
return Task.FromResult(isLeader
|
||||
? HealthCheckResult.Healthy($"Admin leader ({_roleInfo.LocalNode})")
|
||||
: HealthCheckResult.Degraded($"Admin member but not leader (leader={leader?.Value ?? "<unknown>"})"));
|
||||
}
|
||||
}
|
||||
@@ -1,35 +0,0 @@
|
||||
using Akka.Actor;
|
||||
using Akka.Cluster;
|
||||
using Microsoft.Extensions.Diagnostics.HealthChecks;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Host.Health;
|
||||
|
||||
public sealed class AkkaClusterHealthCheck : IHealthCheck
|
||||
{
|
||||
private readonly ActorSystem _system;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the AkkaClusterHealthCheck class.
|
||||
/// </summary>
|
||||
/// <param name="system">The Akka actor system to check cluster health for.</param>
|
||||
public AkkaClusterHealthCheck(ActorSystem system)
|
||||
{
|
||||
_system = system;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Checks the health of the Akka cluster asynchronously.
|
||||
/// </summary>
|
||||
/// <param name="context">The health check context.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
public Task<HealthCheckResult> CheckHealthAsync(HealthCheckContext context, CancellationToken cancellationToken = default)
|
||||
{
|
||||
var cluster = Akka.Cluster.Cluster.Get(_system);
|
||||
var selfUp = cluster.State.Members.Any(m =>
|
||||
m.Address == cluster.SelfAddress && m.Status == MemberStatus.Up);
|
||||
|
||||
return Task.FromResult(selfUp
|
||||
? HealthCheckResult.Healthy($"Self Up; {cluster.State.Members.Count} member(s)")
|
||||
: HealthCheckResult.Degraded("Self not yet Up in cluster"));
|
||||
}
|
||||
}
|
||||
@@ -1,38 +0,0 @@
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Microsoft.Extensions.Diagnostics.HealthChecks;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Host.Health;
|
||||
|
||||
public sealed class DatabaseHealthCheck : IHealthCheck
|
||||
{
|
||||
private readonly IDbContextFactory<OtOpcUaConfigDbContext> _dbFactory;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="DatabaseHealthCheck"/> class.
|
||||
/// </summary>
|
||||
/// <param name="dbFactory">The database context factory for the config database.</param>
|
||||
public DatabaseHealthCheck(IDbContextFactory<OtOpcUaConfigDbContext> dbFactory)
|
||||
{
|
||||
_dbFactory = dbFactory;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Checks the health of the configuration database.
|
||||
/// </summary>
|
||||
/// <param name="context">The health check context.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
public async Task<HealthCheckResult> CheckHealthAsync(HealthCheckContext context, CancellationToken cancellationToken = default)
|
||||
{
|
||||
try
|
||||
{
|
||||
await using var db = await _dbFactory.CreateDbContextAsync(cancellationToken);
|
||||
await db.Deployments.AsNoTracking().Take(1).ToListAsync(cancellationToken);
|
||||
return HealthCheckResult.Healthy("ConfigDb reachable");
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
return HealthCheckResult.Unhealthy("ConfigDb unreachable", ex);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,25 +1,40 @@
|
||||
using Microsoft.AspNetCore.Builder;
|
||||
using Microsoft.AspNetCore.Diagnostics.HealthChecks;
|
||||
using Microsoft.AspNetCore.Routing;
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.Diagnostics.HealthChecks;
|
||||
using ZB.MOM.WW.Health;
|
||||
using ZB.MOM.WW.Health.Akka;
|
||||
using ZB.MOM.WW.Health.EntityFrameworkCore;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Host.Health;
|
||||
|
||||
public static class HealthEndpoints
|
||||
{
|
||||
/// <summary>
|
||||
/// Registers the standard ASP.NET Core health-check infrastructure plus the OtOpcUa-specific
|
||||
/// probes. Mirrors ScadaLink's three-tier pattern: <c>ready</c> = boot ok; <c>active</c> =
|
||||
/// fully serving traffic; <c>healthz</c> = bare process liveness.
|
||||
/// Registers the shared ZB.MOM.WW health probes. Tier semantics preserved: configdb + akka on
|
||||
/// ready+active; admin-leader on active only.
|
||||
/// </summary>
|
||||
/// <param name="services">The service collection to register health checks with.</param>
|
||||
public static IServiceCollection AddOtOpcUaHealth(this IServiceCollection services)
|
||||
{
|
||||
services.AddHealthChecks()
|
||||
.AddCheck<DatabaseHealthCheck>("configdb", tags: new[] { "ready", "active" })
|
||||
.AddCheck<AkkaClusterHealthCheck>("akka", tags: new[] { "ready", "active" })
|
||||
.AddCheck<AdminRoleLeaderHealthCheck>("admin-leader", tags: new[] { "active" });
|
||||
.AddTypeActivatedCheck<DatabaseHealthCheck<OtOpcUaConfigDbContext>>(
|
||||
"configdb",
|
||||
failureStatus: null,
|
||||
tags: new[] { ZbHealthTags.Ready, ZbHealthTags.Active },
|
||||
args: new DatabaseHealthCheckOptions<OtOpcUaConfigDbContext>
|
||||
{
|
||||
ProbeQuery = static (db, ct) => db.Deployments.AsNoTracking().Take(1).ToListAsync(ct),
|
||||
})
|
||||
.AddTypeActivatedCheck<AkkaClusterHealthCheck>(
|
||||
"akka",
|
||||
failureStatus: null,
|
||||
tags: new[] { ZbHealthTags.Ready, ZbHealthTags.Active },
|
||||
args: AkkaClusterStatusPolicy.OtOpcUaCompat)
|
||||
.AddTypeActivatedCheck<ActiveNodeHealthCheck>(
|
||||
"admin-leader",
|
||||
failureStatus: null,
|
||||
tags: new[] { ZbHealthTags.Active },
|
||||
args: "admin");
|
||||
return services;
|
||||
}
|
||||
|
||||
@@ -27,21 +42,7 @@ public static class HealthEndpoints
|
||||
/// <param name="app">The endpoint route builder.</param>
|
||||
public static IEndpointRouteBuilder MapOtOpcUaHealth(this IEndpointRouteBuilder app)
|
||||
{
|
||||
// AllowAnonymous on all three — Traefik / k8s liveness probes / load-balancers
|
||||
// hit these without credentials. Without it the AddOtOpcUaAuth fallback policy
|
||||
// 401s every probe and Traefik marks every backend unhealthy.
|
||||
app.MapHealthChecks("/health/ready", new HealthCheckOptions
|
||||
{
|
||||
Predicate = c => c.Tags.Contains("ready"),
|
||||
}).AllowAnonymous();
|
||||
app.MapHealthChecks("/health/active", new HealthCheckOptions
|
||||
{
|
||||
Predicate = c => c.Tags.Contains("active"),
|
||||
}).AllowAnonymous();
|
||||
app.MapHealthChecks("/healthz", new HealthCheckOptions
|
||||
{
|
||||
Predicate = _ => false, // process-liveness only — no probes run.
|
||||
}).AllowAnonymous();
|
||||
app.MapZbHealth();
|
||||
return app;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user