feat(health.akka): active/leader check with role filter + IActiveNodeGate impl

This commit is contained in:
Joseph Doherty
2026-06-01 06:55:46 -04:00
parent 2dbedce0ac
commit cf277eb7df
3 changed files with 279 additions and 0 deletions
@@ -0,0 +1,138 @@
using Akka.Actor;
using Akka.Cluster;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Diagnostics.HealthChecks;
namespace ZB.MOM.WW.Health.Akka;
/// <summary>
/// Pure decision function for the active / leader probe, factored out of
/// <see cref="ActiveNodeHealthCheck"/> so the role-less and role-filtered matrices are exhaustively
/// table-testable without forming a real cluster.
/// </summary>
public static class ActiveNodeDecision
{
/// <summary>
/// Maps the resolved cluster facts to a <see cref="HealthStatus"/>.
/// </summary>
/// <param name="selfUp">Whether the local node's member status is <c>Up</c>.</param>
/// <param name="isLeader">
/// Whether the local node is the leader: the cluster leader in role-less mode, or the
/// role-singleton leader in role-filtered mode.
/// </param>
/// <param name="hasRole">
/// Whether the local node carries <paramref name="requiredRole"/>. Ignored when
/// <paramref name="requiredRole"/> is <c>null</c>.
/// </param>
/// <param name="requiredRole">
/// The role to scope the check to, or <c>null</c> for the role-less (whole-cluster-leader) mode.
/// </param>
/// <returns>
/// Role-less: Healthy iff the node is Up and the cluster leader, otherwise Unhealthy.
/// Role-filtered: Healthy when the node lacks the role (probe irrelevant) or carries the role and
/// is the role-singleton leader; Degraded when it carries the role but is not the leader.
/// </returns>
public static HealthStatus Evaluate(bool selfUp, bool isLeader, bool hasRole, string? requiredRole)
{
if (requiredRole is null)
return selfUp && isLeader ? HealthStatus.Healthy : HealthStatus.Unhealthy;
if (!hasRole)
return HealthStatus.Healthy;
return isLeader ? HealthStatus.Healthy : HealthStatus.Degraded;
}
}
/// <summary>
/// Health check that reports whether this node is the designated active / leader node.
/// An optional role scopes the check to nodes carrying that role. Register to the
/// <see cref="ZbHealthTags.Active"/> tag.
/// </summary>
/// <remarks>
/// The <see cref="ActorSystem"/> is resolved lazily from the service provider. If it is not yet
/// available — e.g. during startup before Akka is initialised — the check returns
/// <see cref="HealthStatus.Degraded"/> rather than throwing, so it is startup-safe.
/// </remarks>
public sealed class ActiveNodeHealthCheck : IHealthCheck
{
private readonly IServiceProvider _serviceProvider;
private readonly string? _role;
/// <summary>
/// Role-less constructor: Healthy when the node is <c>Up</c> and the cluster leader
/// (ScadaBridge ActiveNode pattern); Unhealthy otherwise. Degraded when the ActorSystem /
/// cluster is not yet ready.
/// </summary>
/// <param name="serviceProvider">
/// The application service provider. The <see cref="ActorSystem"/> is resolved lazily so the
/// check is startup-safe: if no <see cref="ActorSystem"/> is registered yet the result is Degraded.
/// </param>
public ActiveNodeHealthCheck(IServiceProvider serviceProvider)
{
_serviceProvider = serviceProvider ?? throw new ArgumentNullException(nameof(serviceProvider));
_role = null;
}
/// <summary>
/// Role-filtered constructor: Healthy when the node lacks <paramref name="role"/> or carries it
/// and is the role-singleton leader; Degraded when it carries the role but is not the leader
/// (OtOpcUa AdminRoleLeader pattern). Degraded when the ActorSystem / cluster is not yet ready.
/// </summary>
/// <param name="serviceProvider">
/// The application service provider. The <see cref="ActorSystem"/> is resolved lazily so the
/// check is startup-safe: if no <see cref="ActorSystem"/> is registered yet the result is Degraded.
/// </param>
/// <param name="role">The Akka cluster role to scope the check to.</param>
public ActiveNodeHealthCheck(IServiceProvider serviceProvider, string role)
{
_serviceProvider = serviceProvider ?? throw new ArgumentNullException(nameof(serviceProvider));
_role = role ?? throw new ArgumentNullException(nameof(role));
}
/// <inheritdoc />
public Task<HealthCheckResult> CheckHealthAsync(
HealthCheckContext context,
CancellationToken cancellationToken = default)
{
var system = _serviceProvider.GetService<ActorSystem>();
if (system is null)
return Task.FromResult(HealthCheckResult.Degraded("ActorSystem not yet available."));
var cluster = Cluster.Get(system);
var self = cluster.SelfMember;
var selfUp = self.Status == MemberStatus.Up;
bool hasRole;
bool isLeader;
if (_role is null)
{
hasRole = false;
var leader = cluster.State.Leader;
isLeader = leader is not null && leader == self.Address;
}
else
{
hasRole = self.HasRole(_role);
var roleLeader = cluster.State.RoleLeader(_role);
isLeader = roleLeader is not null && roleLeader == self.Address;
}
var health = ActiveNodeDecision.Evaluate(selfUp, isLeader, hasRole, _role);
return Task.FromResult(new HealthCheckResult(health, DescribeResult(health, self.Status)));
}
private string DescribeResult(HealthStatus health, MemberStatus status)
{
if (_role is null)
return health == HealthStatus.Healthy
? "Active node (cluster leader)."
: $"Standby node (status: {status}).";
return health switch
{
HealthStatus.Healthy => $"Active for role '{_role}' (or not a role member).",
_ => $"Role '{_role}' member but not leader.",
};
}
}
@@ -0,0 +1,50 @@
using Akka.Actor;
using Akka.Cluster;
using Microsoft.Extensions.DependencyInjection;
namespace ZB.MOM.WW.Health.Akka;
/// <summary>
/// <see cref="IActiveNodeGate"/> implementation that computes <see cref="IsActiveNode"/> directly
/// from the Akka cluster state (self member <c>Up</c> and the local node is the cluster leader).
/// Register as a singleton.
/// </summary>
/// <remarks>
/// The <see cref="ActorSystem"/> is resolved lazily from the service provider; if it is not yet
/// available — e.g. during startup before Akka is initialised — <see cref="IsActiveNode"/> returns
/// <c>false</c> (the safe default during startup). This gate reads the cluster state directly and
/// does not resolve <see cref="ActiveNodeHealthCheck"/> from DI.
/// </remarks>
public sealed class AkkaActiveNodeGate : IActiveNodeGate
{
private readonly IServiceProvider _serviceProvider;
/// <summary>Initializes a new <see cref="AkkaActiveNodeGate"/>.</summary>
/// <param name="serviceProvider">
/// The application service provider. The <see cref="ActorSystem"/> is resolved lazily; if it is
/// not yet available <see cref="IsActiveNode"/> returns <c>false</c>.
/// </param>
public AkkaActiveNodeGate(IServiceProvider serviceProvider)
{
_serviceProvider = serviceProvider ?? throw new ArgumentNullException(nameof(serviceProvider));
}
/// <inheritdoc />
public bool IsActiveNode
{
get
{
var system = _serviceProvider.GetService<ActorSystem>();
if (system is null)
return false;
var cluster = Cluster.Get(system);
var self = cluster.SelfMember;
if (self.Status != MemberStatus.Up)
return false;
var leader = cluster.State.Leader;
return leader is not null && leader == self.Address;
}
}
}