feat(health.akka): cluster health check with configurable status policy
This commit is contained in:
@@ -0,0 +1,51 @@
|
||||
using Akka.Actor;
|
||||
using Akka.Cluster;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.Diagnostics.HealthChecks;
|
||||
|
||||
namespace ZB.MOM.WW.Health.Akka;
|
||||
|
||||
/// <summary>
|
||||
/// Health check that maps the local node's Akka cluster membership status to a
|
||||
/// <see cref="HealthStatus"/> through a configurable <see cref="AkkaClusterStatusPolicy"/>.
|
||||
/// Register to the <see cref="ZbHealthTags.Ready"/> tag (recommended <c>[ready, active]</c>).
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// The <see cref="ActorSystem"/> is resolved lazily from the service provider. If it is not yet
|
||||
/// available — e.g. during startup before Akka is initialised — the check returns
|
||||
/// <see cref="HealthStatus.Degraded"/> rather than throwing, so it is safe to register before Akka
|
||||
/// is fully up.
|
||||
/// </remarks>
|
||||
public sealed class AkkaClusterHealthCheck : IHealthCheck
|
||||
{
|
||||
private readonly IServiceProvider _serviceProvider;
|
||||
private readonly AkkaClusterStatusPolicy _policy;
|
||||
|
||||
/// <summary>Initializes a new <see cref="AkkaClusterHealthCheck"/>.</summary>
|
||||
/// <param name="serviceProvider">
|
||||
/// The application service provider. The <see cref="ActorSystem"/> is resolved lazily so the
|
||||
/// check is startup-safe: if no <see cref="ActorSystem"/> is registered yet the result is Degraded.
|
||||
/// </param>
|
||||
/// <param name="policy">The status-to-health mapping policy to apply.</param>
|
||||
public AkkaClusterHealthCheck(IServiceProvider serviceProvider, AkkaClusterStatusPolicy policy)
|
||||
{
|
||||
_serviceProvider = serviceProvider ?? throw new ArgumentNullException(nameof(serviceProvider));
|
||||
_policy = policy ?? throw new ArgumentNullException(nameof(policy));
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<HealthCheckResult> CheckHealthAsync(
|
||||
HealthCheckContext context,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var system = _serviceProvider.GetService<ActorSystem>();
|
||||
if (system is null)
|
||||
return Task.FromResult(HealthCheckResult.Degraded("ActorSystem not yet available."));
|
||||
|
||||
var status = Cluster.Get(system).SelfMember.Status;
|
||||
var health = _policy.Evaluate(status);
|
||||
var description = $"Akka cluster member status: {status}";
|
||||
|
||||
return Task.FromResult(new HealthCheckResult(health, description));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,56 @@
|
||||
using Akka.Cluster;
|
||||
using Microsoft.Extensions.Diagnostics.HealthChecks;
|
||||
|
||||
namespace ZB.MOM.WW.Health.Akka;
|
||||
|
||||
/// <summary>
|
||||
/// Pure mapping from an Akka <see cref="MemberStatus"/> to a <see cref="HealthStatus"/>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// Wraps a <see cref="Func{MemberStatus, HealthStatus}"/> so the decision logic is a deterministic,
|
||||
/// table-testable function — <see cref="AkkaClusterHealthCheck"/> only supplies the live cluster
|
||||
/// status. Two named presets reconcile the divergence between the existing ScadaBridge and OtOpcUa
|
||||
/// implementations; construct a custom instance for project-specific overrides.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public sealed class AkkaClusterStatusPolicy
|
||||
{
|
||||
private readonly Func<MemberStatus, HealthStatus> _evaluate;
|
||||
|
||||
/// <summary>Initializes a new <see cref="AkkaClusterStatusPolicy"/>.</summary>
|
||||
/// <param name="evaluate">The pure status-to-health mapping function.</param>
|
||||
public AkkaClusterStatusPolicy(Func<MemberStatus, HealthStatus> evaluate)
|
||||
{
|
||||
_evaluate = evaluate ?? throw new ArgumentNullException(nameof(evaluate));
|
||||
}
|
||||
|
||||
/// <summary>Applies the policy to the given member status.</summary>
|
||||
/// <param name="status">The local node's Akka cluster member status.</param>
|
||||
/// <returns>The mapped <see cref="HealthStatus"/>.</returns>
|
||||
public HealthStatus Evaluate(MemberStatus status) => _evaluate(status);
|
||||
|
||||
/// <summary>
|
||||
/// ScadaBridge origin: <c>Up</c>/<c>Joining</c> → Healthy, <c>Leaving</c>/<c>Exiting</c> →
|
||||
/// Degraded, everything else → Unhealthy. The convergence target for all projects.
|
||||
/// </summary>
|
||||
public static AkkaClusterStatusPolicy Default { get; } = new(static status => status switch
|
||||
{
|
||||
MemberStatus.Up or MemberStatus.Joining => HealthStatus.Healthy,
|
||||
MemberStatus.Leaving or MemberStatus.Exiting => HealthStatus.Degraded,
|
||||
_ => HealthStatus.Unhealthy,
|
||||
});
|
||||
|
||||
/// <summary>
|
||||
/// OtOpcUa origin: self-<c>Up</c>-among-reachable-members → Healthy, any non-<c>Up</c> state
|
||||
/// (including <c>Leaving</c>/<c>Exiting</c>/<c>Down</c>) → Degraded. Provided for backward
|
||||
/// compatibility during OtOpcUa's migration.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// The original OtOpcUa check scanned the reachable member set for self with
|
||||
/// <c>Status == Up</c>; any other state caused the scan to miss self and collapse to Degraded.
|
||||
/// This preset reproduces that behavior: only <see cref="MemberStatus.Up"/> is Healthy.
|
||||
/// </remarks>
|
||||
public static AkkaClusterStatusPolicy OtOpcUaCompat { get; } = new(static status =>
|
||||
status == MemberStatus.Up ? HealthStatus.Healthy : HealthStatus.Degraded);
|
||||
}
|
||||
Reference in New Issue
Block a user