feat(health.akka): cluster health check with configurable status policy

This commit is contained in:
Joseph Doherty
2026-06-01 06:47:29 -04:00
parent 1ab2f32e8e
commit 25dd328280
4 changed files with 315 additions and 0 deletions
@@ -0,0 +1,51 @@
using Akka.Actor;
using Akka.Cluster;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Diagnostics.HealthChecks;
namespace ZB.MOM.WW.Health.Akka;
/// <summary>
/// Health check that maps the local node's Akka cluster membership status to a
/// <see cref="HealthStatus"/> through a configurable <see cref="AkkaClusterStatusPolicy"/>.
/// Register to the <see cref="ZbHealthTags.Ready"/> tag (recommended <c>[ready, active]</c>).
/// </summary>
/// <remarks>
/// The <see cref="ActorSystem"/> is resolved lazily from the service provider. If it is not yet
/// available — e.g. during startup before Akka is initialised — the check returns
/// <see cref="HealthStatus.Degraded"/> rather than throwing, so it is safe to register before Akka
/// is fully up.
/// </remarks>
public sealed class AkkaClusterHealthCheck : IHealthCheck
{
private readonly IServiceProvider _serviceProvider;
private readonly AkkaClusterStatusPolicy _policy;
/// <summary>Initializes a new <see cref="AkkaClusterHealthCheck"/>.</summary>
/// <param name="serviceProvider">
/// The application service provider. The <see cref="ActorSystem"/> is resolved lazily so the
/// check is startup-safe: if no <see cref="ActorSystem"/> is registered yet the result is Degraded.
/// </param>
/// <param name="policy">The status-to-health mapping policy to apply.</param>
public AkkaClusterHealthCheck(IServiceProvider serviceProvider, AkkaClusterStatusPolicy policy)
{
_serviceProvider = serviceProvider ?? throw new ArgumentNullException(nameof(serviceProvider));
_policy = policy ?? throw new ArgumentNullException(nameof(policy));
}
/// <inheritdoc />
public Task<HealthCheckResult> CheckHealthAsync(
HealthCheckContext context,
CancellationToken cancellationToken = default)
{
var system = _serviceProvider.GetService<ActorSystem>();
if (system is null)
return Task.FromResult(HealthCheckResult.Degraded("ActorSystem not yet available."));
var status = Cluster.Get(system).SelfMember.Status;
var health = _policy.Evaluate(status);
var description = $"Akka cluster member status: {status}";
return Task.FromResult(new HealthCheckResult(health, description));
}
}
@@ -0,0 +1,56 @@
using Akka.Cluster;
using Microsoft.Extensions.Diagnostics.HealthChecks;
namespace ZB.MOM.WW.Health.Akka;
/// <summary>
/// Pure mapping from an Akka <see cref="MemberStatus"/> to a <see cref="HealthStatus"/>.
/// </summary>
/// <remarks>
/// <para>
/// Wraps a <see cref="Func{MemberStatus, HealthStatus}"/> so the decision logic is a deterministic,
/// table-testable function — <see cref="AkkaClusterHealthCheck"/> only supplies the live cluster
/// status. Two named presets reconcile the divergence between the existing ScadaBridge and OtOpcUa
/// implementations; construct a custom instance for project-specific overrides.
/// </para>
/// </remarks>
public sealed class AkkaClusterStatusPolicy
{
private readonly Func<MemberStatus, HealthStatus> _evaluate;
/// <summary>Initializes a new <see cref="AkkaClusterStatusPolicy"/>.</summary>
/// <param name="evaluate">The pure status-to-health mapping function.</param>
public AkkaClusterStatusPolicy(Func<MemberStatus, HealthStatus> evaluate)
{
_evaluate = evaluate ?? throw new ArgumentNullException(nameof(evaluate));
}
/// <summary>Applies the policy to the given member status.</summary>
/// <param name="status">The local node's Akka cluster member status.</param>
/// <returns>The mapped <see cref="HealthStatus"/>.</returns>
public HealthStatus Evaluate(MemberStatus status) => _evaluate(status);
/// <summary>
/// ScadaBridge origin: <c>Up</c>/<c>Joining</c> → Healthy, <c>Leaving</c>/<c>Exiting</c> →
/// Degraded, everything else → Unhealthy. The convergence target for all projects.
/// </summary>
public static AkkaClusterStatusPolicy Default { get; } = new(static status => status switch
{
MemberStatus.Up or MemberStatus.Joining => HealthStatus.Healthy,
MemberStatus.Leaving or MemberStatus.Exiting => HealthStatus.Degraded,
_ => HealthStatus.Unhealthy,
});
/// <summary>
/// OtOpcUa origin: self-<c>Up</c>-among-reachable-members → Healthy, any non-<c>Up</c> state
/// (including <c>Leaving</c>/<c>Exiting</c>/<c>Down</c>) → Degraded. Provided for backward
/// compatibility during OtOpcUa's migration.
/// </summary>
/// <remarks>
/// The original OtOpcUa check scanned the reachable member set for self with
/// <c>Status == Up</c>; any other state caused the scan to miss self and collapse to Degraded.
/// This preset reproduces that behavior: only <see cref="MemberStatus.Up"/> is Healthy.
/// </remarks>
public static AkkaClusterStatusPolicy OtOpcUaCompat { get; } = new(static status =>
status == MemberStatus.Up ? HealthStatus.Healthy : HealthStatus.Degraded);
}