using Microsoft.Extensions.Diagnostics.HealthChecks;
using ZB.MOM.WW.CBDDC.Core;
using ZB.MOM.WW.CBDDC.Core.Storage;
using ZB.MOM.WW.CBDDC.Hosting.Configuration;
using ZB.MOM.WW.CBDDC.Hosting.HealthChecks;
namespace ZB.MOM.WW.CBDDC.Hosting.Tests;
public class CBDDCHealthCheckTests
{
///
/// Verifies that health is reported as healthy when persistence is available and all peers are within lag thresholds.
///
[Fact]
public async Task CheckHealthAsync_WhenPersistenceOkAndPeersWithinLagThreshold_ReturnsHealthyWithPayload()
{
var store = Substitute.For();
var confirmationStore = Substitute.For();
var peer1LastUpdate = DateTimeOffset.UtcNow.AddSeconds(-5);
var peer2LastUpdate = DateTimeOffset.UtcNow.AddSeconds(-2);
store.GetLatestTimestampAsync(Arg.Any()).Returns(new HlcTimestamp(1_000, 0, "node-1"));
confirmationStore.GetActiveTrackedPeersAsync(Arg.Any())
.Returns(Task.FromResult>(new[] { "peer-1", "peer-2" }));
confirmationStore.GetConfirmationsForPeerAsync("peer-1", Arg.Any())
.Returns(Task.FromResult>(new[]
{
new PeerOplogConfirmation
{
PeerNodeId = "peer-1",
SourceNodeId = "source-1",
ConfirmedWall = 995,
ConfirmedLogic = 0,
LastConfirmedUtc = peer1LastUpdate,
IsActive = true
}
}));
confirmationStore.GetConfirmationsForPeerAsync("peer-2", Arg.Any())
.Returns(Task.FromResult>(new[]
{
new PeerOplogConfirmation
{
PeerNodeId = "peer-2",
SourceNodeId = "source-1",
ConfirmedWall = 990,
ConfirmedLogic = 0,
LastConfirmedUtc = peer2LastUpdate,
IsActive = true
}
}));
var healthCheck = new CBDDCHealthCheck(
store,
confirmationStore,
CreateOptions(lagThresholdMs: 20, criticalLagThresholdMs: 50));
var result = await healthCheck.CheckHealthAsync(new HealthCheckContext());
result.Status.ShouldBe(HealthStatus.Healthy);
result.Data["trackedPeerCount"].ShouldBe(2);
result.Data["maxLagMs"].ShouldBe(10L);
result.Data["laggingPeers"].ShouldBeOfType>().Count.ShouldBe(0);
result.Data["peersWithNoConfirmation"].ShouldBeOfType>().Count.ShouldBe(0);
var lastUpdates = result.Data["lastSuccessfulConfirmationUpdateByPeer"]
.ShouldBeOfType>();
lastUpdates["peer-1"].ShouldBe(peer1LastUpdate);
lastUpdates["peer-2"].ShouldBe(peer2LastUpdate);
}
///
/// Verifies that health is reported as degraded when at least one peer is lagging or has no confirmation.
///
[Fact]
public async Task CheckHealthAsync_WhenPeersLaggingOrUnconfirmed_ReturnsDegradedWithPayload()
{
var store = Substitute.For();
var confirmationStore = Substitute.For();
var peer1LastUpdate = DateTimeOffset.UtcNow.AddSeconds(-10);
store.GetLatestTimestampAsync(Arg.Any()).Returns(new HlcTimestamp(1_000, 0, "node-1"));
confirmationStore.GetActiveTrackedPeersAsync(Arg.Any())
.Returns(Task.FromResult>(new[] { "peer-1", "peer-2", "peer-3" }));
confirmationStore.GetConfirmationsForPeerAsync("peer-1", Arg.Any())
.Returns(Task.FromResult>(new[]
{
new PeerOplogConfirmation
{
PeerNodeId = "peer-1",
SourceNodeId = "source-1",
ConfirmedWall = 960,
ConfirmedLogic = 0,
LastConfirmedUtc = peer1LastUpdate,
IsActive = true
}
}));
confirmationStore.GetConfirmationsForPeerAsync("peer-2", Arg.Any())
.Returns(Task.FromResult>(Array.Empty()));
confirmationStore.GetConfirmationsForPeerAsync("peer-3", Arg.Any())
.Returns(Task.FromResult>(new[]
{
new PeerOplogConfirmation
{
PeerNodeId = "peer-3",
SourceNodeId = "source-1",
ConfirmedWall = 995,
ConfirmedLogic = 0,
LastConfirmedUtc = DateTimeOffset.UtcNow.AddSeconds(-4),
IsActive = true
}
}));
var healthCheck = new CBDDCHealthCheck(
store,
confirmationStore,
CreateOptions(lagThresholdMs: 30, criticalLagThresholdMs: 100));
var result = await healthCheck.CheckHealthAsync(new HealthCheckContext());
result.Status.ShouldBe(HealthStatus.Degraded);
result.Data["trackedPeerCount"].ShouldBe(3);
result.Data["maxLagMs"].ShouldBe(40L);
result.Data["laggingPeers"].ShouldBeOfType>().ShouldContain("peer-1");
result.Data["peersWithNoConfirmation"].ShouldBeOfType>().ShouldContain("peer-2");
var lastUpdates = result.Data["lastSuccessfulConfirmationUpdateByPeer"]
.ShouldBeOfType>();
lastUpdates["peer-1"].ShouldBe(peer1LastUpdate);
lastUpdates["peer-2"].ShouldBeNull();
}
///
/// Verifies that health is reported as unhealthy when critical lag threshold is exceeded.
///
[Fact]
public async Task CheckHealthAsync_WhenCriticalLagBreached_ReturnsUnhealthyWithPayload()
{
var store = Substitute.For();
var confirmationStore = Substitute.For();
store.GetLatestTimestampAsync(Arg.Any()).Returns(new HlcTimestamp(1_000, 0, "node-1"));
confirmationStore.GetActiveTrackedPeersAsync(Arg.Any())
.Returns(Task.FromResult>(new[] { "peer-critical" }));
confirmationStore.GetConfirmationsForPeerAsync("peer-critical", Arg.Any())
.Returns(Task.FromResult>(new[]
{
new PeerOplogConfirmation
{
PeerNodeId = "peer-critical",
SourceNodeId = "source-1",
ConfirmedWall = 850,
ConfirmedLogic = 0,
LastConfirmedUtc = DateTimeOffset.UtcNow.AddMinutes(-1),
IsActive = true
}
}));
var healthCheck = new CBDDCHealthCheck(
store,
confirmationStore,
CreateOptions(lagThresholdMs: 30, criticalLagThresholdMs: 80));
var result = await healthCheck.CheckHealthAsync(new HealthCheckContext());
result.Status.ShouldBe(HealthStatus.Unhealthy);
result.Data["maxLagMs"].ShouldBe(150L);
result.Data["laggingPeers"].ShouldBeOfType>().ShouldContain("peer-critical");
}
///
/// Verifies that worst-case lag is used when a peer has multiple source confirmations.
///
[Fact]
public async Task CheckHealthAsync_WhenPeerHasMultipleSourceConfirmations_UsesWorstCaseLag()
{
var store = Substitute.For();
var confirmationStore = Substitute.For();
store.GetLatestTimestampAsync(Arg.Any()).Returns(new HlcTimestamp(1_000, 0, "node-1"));
confirmationStore.GetActiveTrackedPeersAsync(Arg.Any())
.Returns(Task.FromResult>(new[] { "peer-1" }));
confirmationStore.GetConfirmationsForPeerAsync("peer-1", Arg.Any())
.Returns(Task.FromResult>(new[]
{
new PeerOplogConfirmation
{
PeerNodeId = "peer-1",
SourceNodeId = "source-fast",
ConfirmedWall = 995,
ConfirmedLogic = 0,
LastConfirmedUtc = DateTimeOffset.UtcNow.AddSeconds(-1),
IsActive = true
},
new PeerOplogConfirmation
{
PeerNodeId = "peer-1",
SourceNodeId = "source-slow",
ConfirmedWall = 900,
ConfirmedLogic = 0,
LastConfirmedUtc = DateTimeOffset.UtcNow.AddSeconds(-10),
IsActive = true
}
}));
var healthCheck = new CBDDCHealthCheck(
store,
confirmationStore,
CreateOptions(lagThresholdMs: 80, criticalLagThresholdMs: 150));
var result = await healthCheck.CheckHealthAsync(new HealthCheckContext());
result.Status.ShouldBe(HealthStatus.Degraded);
result.Data["maxLagMs"].ShouldBe(100L);
result.Data["laggingPeers"].ShouldBeOfType>().ShouldContain("peer-1");
}
///
/// Verifies that health is reported as unhealthy when the persistence store throws.
///
[Fact]
public async Task CheckHealthAsync_WhenStoreThrows_ReturnsUnhealthy()
{
var store = Substitute.For();
var confirmationStore = Substitute.For();
var error = new InvalidOperationException("store unavailable");
store.GetLatestTimestampAsync(Arg.Any())
.Returns(Task.FromException(error));
var healthCheck = new CBDDCHealthCheck(
store,
confirmationStore,
CreateOptions());
var result = await healthCheck.CheckHealthAsync(new HealthCheckContext());
result.Status.ShouldBe(HealthStatus.Unhealthy);
result.Exception.ShouldBe(error);
result.Description.ShouldNotBeNull();
result.Description.ShouldContain("persistence layer is unavailable");
}
private static CBDDCHostingOptions CreateOptions(
long lagThresholdMs = 30_000,
long criticalLagThresholdMs = 120_000)
{
return new CBDDCHostingOptions
{
Cluster = new ClusterOptions
{
PeerConfirmationLagThresholdMs = lagThresholdMs,
PeerConfirmationCriticalLagThresholdMs = criticalLagThresholdMs
}
};
}
}