feat(health.akka): active/leader check with role filter + IActiveNodeGate impl
This commit is contained in:
@@ -0,0 +1,138 @@
|
|||||||
|
using Akka.Actor;
|
||||||
|
using Akka.Cluster;
|
||||||
|
using Microsoft.Extensions.DependencyInjection;
|
||||||
|
using Microsoft.Extensions.Diagnostics.HealthChecks;
|
||||||
|
|
||||||
|
namespace ZB.MOM.WW.Health.Akka;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Pure decision function for the active / leader probe, factored out of
|
||||||
|
/// <see cref="ActiveNodeHealthCheck"/> so the role-less and role-filtered matrices are exhaustively
|
||||||
|
/// table-testable without forming a real cluster.
|
||||||
|
/// </summary>
|
||||||
|
public static class ActiveNodeDecision
|
||||||
|
{
|
||||||
|
/// <summary>
|
||||||
|
/// Maps the resolved cluster facts to a <see cref="HealthStatus"/>.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="selfUp">Whether the local node's member status is <c>Up</c>.</param>
|
||||||
|
/// <param name="isLeader">
|
||||||
|
/// Whether the local node is the leader: the cluster leader in role-less mode, or the
|
||||||
|
/// role-singleton leader in role-filtered mode.
|
||||||
|
/// </param>
|
||||||
|
/// <param name="hasRole">
|
||||||
|
/// Whether the local node carries <paramref name="requiredRole"/>. Ignored when
|
||||||
|
/// <paramref name="requiredRole"/> is <c>null</c>.
|
||||||
|
/// </param>
|
||||||
|
/// <param name="requiredRole">
|
||||||
|
/// The role to scope the check to, or <c>null</c> for the role-less (whole-cluster-leader) mode.
|
||||||
|
/// </param>
|
||||||
|
/// <returns>
|
||||||
|
/// Role-less: Healthy iff the node is Up and the cluster leader, otherwise Unhealthy.
|
||||||
|
/// Role-filtered: Healthy when the node lacks the role (probe irrelevant) or carries the role and
|
||||||
|
/// is the role-singleton leader; Degraded when it carries the role but is not the leader.
|
||||||
|
/// </returns>
|
||||||
|
public static HealthStatus Evaluate(bool selfUp, bool isLeader, bool hasRole, string? requiredRole)
|
||||||
|
{
|
||||||
|
if (requiredRole is null)
|
||||||
|
return selfUp && isLeader ? HealthStatus.Healthy : HealthStatus.Unhealthy;
|
||||||
|
|
||||||
|
if (!hasRole)
|
||||||
|
return HealthStatus.Healthy;
|
||||||
|
|
||||||
|
return isLeader ? HealthStatus.Healthy : HealthStatus.Degraded;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Health check that reports whether this node is the designated active / leader node.
|
||||||
|
/// An optional role scopes the check to nodes carrying that role. Register to the
|
||||||
|
/// <see cref="ZbHealthTags.Active"/> tag.
|
||||||
|
/// </summary>
|
||||||
|
/// <remarks>
|
||||||
|
/// The <see cref="ActorSystem"/> is resolved lazily from the service provider. If it is not yet
|
||||||
|
/// available — e.g. during startup before Akka is initialised — the check returns
|
||||||
|
/// <see cref="HealthStatus.Degraded"/> rather than throwing, so it is startup-safe.
|
||||||
|
/// </remarks>
|
||||||
|
public sealed class ActiveNodeHealthCheck : IHealthCheck
|
||||||
|
{
|
||||||
|
private readonly IServiceProvider _serviceProvider;
|
||||||
|
private readonly string? _role;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Role-less constructor: Healthy when the node is <c>Up</c> and the cluster leader
|
||||||
|
/// (ScadaBridge ActiveNode pattern); Unhealthy otherwise. Degraded when the ActorSystem /
|
||||||
|
/// cluster is not yet ready.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="serviceProvider">
|
||||||
|
/// The application service provider. The <see cref="ActorSystem"/> is resolved lazily so the
|
||||||
|
/// check is startup-safe: if no <see cref="ActorSystem"/> is registered yet the result is Degraded.
|
||||||
|
/// </param>
|
||||||
|
public ActiveNodeHealthCheck(IServiceProvider serviceProvider)
|
||||||
|
{
|
||||||
|
_serviceProvider = serviceProvider ?? throw new ArgumentNullException(nameof(serviceProvider));
|
||||||
|
_role = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Role-filtered constructor: Healthy when the node lacks <paramref name="role"/> or carries it
|
||||||
|
/// and is the role-singleton leader; Degraded when it carries the role but is not the leader
|
||||||
|
/// (OtOpcUa AdminRoleLeader pattern). Degraded when the ActorSystem / cluster is not yet ready.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="serviceProvider">
|
||||||
|
/// The application service provider. The <see cref="ActorSystem"/> is resolved lazily so the
|
||||||
|
/// check is startup-safe: if no <see cref="ActorSystem"/> is registered yet the result is Degraded.
|
||||||
|
/// </param>
|
||||||
|
/// <param name="role">The Akka cluster role to scope the check to.</param>
|
||||||
|
public ActiveNodeHealthCheck(IServiceProvider serviceProvider, string role)
|
||||||
|
{
|
||||||
|
_serviceProvider = serviceProvider ?? throw new ArgumentNullException(nameof(serviceProvider));
|
||||||
|
_role = role ?? throw new ArgumentNullException(nameof(role));
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <inheritdoc />
|
||||||
|
public Task<HealthCheckResult> CheckHealthAsync(
|
||||||
|
HealthCheckContext context,
|
||||||
|
CancellationToken cancellationToken = default)
|
||||||
|
{
|
||||||
|
var system = _serviceProvider.GetService<ActorSystem>();
|
||||||
|
if (system is null)
|
||||||
|
return Task.FromResult(HealthCheckResult.Degraded("ActorSystem not yet available."));
|
||||||
|
|
||||||
|
var cluster = Cluster.Get(system);
|
||||||
|
var self = cluster.SelfMember;
|
||||||
|
var selfUp = self.Status == MemberStatus.Up;
|
||||||
|
|
||||||
|
bool hasRole;
|
||||||
|
bool isLeader;
|
||||||
|
if (_role is null)
|
||||||
|
{
|
||||||
|
hasRole = false;
|
||||||
|
var leader = cluster.State.Leader;
|
||||||
|
isLeader = leader is not null && leader == self.Address;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
hasRole = self.HasRole(_role);
|
||||||
|
var roleLeader = cluster.State.RoleLeader(_role);
|
||||||
|
isLeader = roleLeader is not null && roleLeader == self.Address;
|
||||||
|
}
|
||||||
|
|
||||||
|
var health = ActiveNodeDecision.Evaluate(selfUp, isLeader, hasRole, _role);
|
||||||
|
return Task.FromResult(new HealthCheckResult(health, DescribeResult(health, self.Status)));
|
||||||
|
}
|
||||||
|
|
||||||
|
private string DescribeResult(HealthStatus health, MemberStatus status)
|
||||||
|
{
|
||||||
|
if (_role is null)
|
||||||
|
return health == HealthStatus.Healthy
|
||||||
|
? "Active node (cluster leader)."
|
||||||
|
: $"Standby node (status: {status}).";
|
||||||
|
|
||||||
|
return health switch
|
||||||
|
{
|
||||||
|
HealthStatus.Healthy => $"Active for role '{_role}' (or not a role member).",
|
||||||
|
_ => $"Role '{_role}' member but not leader.",
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,50 @@
|
|||||||
|
using Akka.Actor;
|
||||||
|
using Akka.Cluster;
|
||||||
|
using Microsoft.Extensions.DependencyInjection;
|
||||||
|
|
||||||
|
namespace ZB.MOM.WW.Health.Akka;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// <see cref="IActiveNodeGate"/> implementation that computes <see cref="IsActiveNode"/> directly
|
||||||
|
/// from the Akka cluster state (self member <c>Up</c> and the local node is the cluster leader).
|
||||||
|
/// Register as a singleton.
|
||||||
|
/// </summary>
|
||||||
|
/// <remarks>
|
||||||
|
/// The <see cref="ActorSystem"/> is resolved lazily from the service provider; if it is not yet
|
||||||
|
/// available — e.g. during startup before Akka is initialised — <see cref="IsActiveNode"/> returns
|
||||||
|
/// <c>false</c> (the safe default during startup). This gate reads the cluster state directly and
|
||||||
|
/// does not resolve <see cref="ActiveNodeHealthCheck"/> from DI.
|
||||||
|
/// </remarks>
|
||||||
|
public sealed class AkkaActiveNodeGate : IActiveNodeGate
|
||||||
|
{
|
||||||
|
private readonly IServiceProvider _serviceProvider;
|
||||||
|
|
||||||
|
/// <summary>Initializes a new <see cref="AkkaActiveNodeGate"/>.</summary>
|
||||||
|
/// <param name="serviceProvider">
|
||||||
|
/// The application service provider. The <see cref="ActorSystem"/> is resolved lazily; if it is
|
||||||
|
/// not yet available <see cref="IsActiveNode"/> returns <c>false</c>.
|
||||||
|
/// </param>
|
||||||
|
public AkkaActiveNodeGate(IServiceProvider serviceProvider)
|
||||||
|
{
|
||||||
|
_serviceProvider = serviceProvider ?? throw new ArgumentNullException(nameof(serviceProvider));
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <inheritdoc />
|
||||||
|
public bool IsActiveNode
|
||||||
|
{
|
||||||
|
get
|
||||||
|
{
|
||||||
|
var system = _serviceProvider.GetService<ActorSystem>();
|
||||||
|
if (system is null)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
var cluster = Cluster.Get(system);
|
||||||
|
var self = cluster.SelfMember;
|
||||||
|
if (self.Status != MemberStatus.Up)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
var leader = cluster.State.Leader;
|
||||||
|
return leader is not null && leader == self.Address;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,91 @@
|
|||||||
|
using Akka.Actor;
|
||||||
|
using Microsoft.Extensions.DependencyInjection;
|
||||||
|
using Microsoft.Extensions.Diagnostics.HealthChecks;
|
||||||
|
using ZB.MOM.WW.Health.Akka;
|
||||||
|
|
||||||
|
namespace ZB.MOM.WW.Health.Akka.Tests;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Table-driven tests for the pure <see cref="ActiveNodeDecision.Evaluate"/> helper covering both
|
||||||
|
/// the role-less (ScadaBridge ActiveNode) and role-filtered (OtOpcUa AdminRoleLeader) matrices,
|
||||||
|
/// plus the startup-safety null-guards on <see cref="ActiveNodeHealthCheck"/> and
|
||||||
|
/// <see cref="AkkaActiveNodeGate"/> when no <see cref="ActorSystem"/> is registered.
|
||||||
|
/// </summary>
|
||||||
|
public sealed class ActiveNodeDecisionTests
|
||||||
|
{
|
||||||
|
// Role-less: requiredRole == null. hasRole is irrelevant. Healthy iff (selfUp && isLeader), else Unhealthy.
|
||||||
|
public static IEnumerable<object[]> RoleLessCases() => new[]
|
||||||
|
{
|
||||||
|
new object[] { true, true, false, (string?)null, HealthStatus.Healthy },
|
||||||
|
new object[] { true, false, false, (string?)null, HealthStatus.Unhealthy },
|
||||||
|
new object[] { false, true, false, (string?)null, HealthStatus.Unhealthy },
|
||||||
|
new object[] { false, false, false, (string?)null, HealthStatus.Unhealthy },
|
||||||
|
};
|
||||||
|
|
||||||
|
[Theory]
|
||||||
|
[MemberData(nameof(RoleLessCases))]
|
||||||
|
public void Evaluate_RoleLess(bool selfUp, bool isLeader, bool hasRole, string? requiredRole, HealthStatus expected)
|
||||||
|
{
|
||||||
|
Assert.Equal(expected, ActiveNodeDecision.Evaluate(selfUp, isLeader, hasRole, requiredRole));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Role-filtered: requiredRole != null.
|
||||||
|
// lacks role -> Healthy (probe irrelevant for this node)
|
||||||
|
// has role & is leader -> Healthy
|
||||||
|
// has role & not leader -> Degraded
|
||||||
|
public static IEnumerable<object[]> RoleFilteredCases() => new[]
|
||||||
|
{
|
||||||
|
// node lacks the role -> Healthy regardless of selfUp / isLeader
|
||||||
|
new object[] { true, true, false, "admin", HealthStatus.Healthy },
|
||||||
|
new object[] { true, false, false, "admin", HealthStatus.Healthy },
|
||||||
|
new object[] { false, false, false, "admin", HealthStatus.Healthy },
|
||||||
|
// node carries the role and is leader -> Healthy
|
||||||
|
new object[] { true, true, true, "admin", HealthStatus.Healthy },
|
||||||
|
// node carries the role but is not leader -> Degraded
|
||||||
|
new object[] { true, false, true, "admin", HealthStatus.Degraded },
|
||||||
|
new object[] { false, false, true, "admin", HealthStatus.Degraded },
|
||||||
|
};
|
||||||
|
|
||||||
|
[Theory]
|
||||||
|
[MemberData(nameof(RoleFilteredCases))]
|
||||||
|
public void Evaluate_RoleFiltered(bool selfUp, bool isLeader, bool hasRole, string? requiredRole, HealthStatus expected)
|
||||||
|
{
|
||||||
|
Assert.Equal(expected, ActiveNodeDecision.Evaluate(selfUp, isLeader, hasRole, requiredRole));
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public async Task HealthCheck_RoleLess_NoActorSystem_ReturnsDegraded()
|
||||||
|
{
|
||||||
|
var provider = new ServiceCollection().BuildServiceProvider();
|
||||||
|
var check = new ActiveNodeHealthCheck(provider);
|
||||||
|
|
||||||
|
var result = await check.CheckHealthAsync(NewContext(check));
|
||||||
|
|
||||||
|
Assert.Equal(HealthStatus.Degraded, result.Status);
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public async Task HealthCheck_RoleFiltered_NoActorSystem_ReturnsDegraded()
|
||||||
|
{
|
||||||
|
var provider = new ServiceCollection().BuildServiceProvider();
|
||||||
|
var check = new ActiveNodeHealthCheck(provider, "admin");
|
||||||
|
|
||||||
|
var result = await check.CheckHealthAsync(NewContext(check));
|
||||||
|
|
||||||
|
Assert.Equal(HealthStatus.Degraded, result.Status);
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void Gate_NoActorSystem_IsActiveNodeFalse()
|
||||||
|
{
|
||||||
|
var provider = new ServiceCollection().BuildServiceProvider();
|
||||||
|
var gate = new AkkaActiveNodeGate(provider);
|
||||||
|
|
||||||
|
Assert.False(gate.IsActiveNode);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static HealthCheckContext NewContext(IHealthCheck check) => new()
|
||||||
|
{
|
||||||
|
Registration = new HealthCheckRegistration("active-node", check, HealthStatus.Unhealthy, tags: null),
|
||||||
|
};
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user