using Microsoft.Extensions.Diagnostics.HealthChecks; using ZB.MOM.WW.CBDDC.Core; using ZB.MOM.WW.CBDDC.Core.Storage; using ZB.MOM.WW.CBDDC.Hosting.Configuration; using ZB.MOM.WW.CBDDC.Hosting.HealthChecks; namespace ZB.MOM.WW.CBDDC.Hosting.Tests; public class CBDDCHealthCheckTests { /// /// Verifies that health is reported as healthy when persistence is available and all peers are within lag thresholds. /// [Fact] public async Task CheckHealthAsync_WhenPersistenceOkAndPeersWithinLagThreshold_ReturnsHealthyWithPayload() { var store = Substitute.For(); var confirmationStore = Substitute.For(); var peer1LastUpdate = DateTimeOffset.UtcNow.AddSeconds(-5); var peer2LastUpdate = DateTimeOffset.UtcNow.AddSeconds(-2); store.GetLatestTimestampAsync(Arg.Any()).Returns(new HlcTimestamp(1_000, 0, "node-1")); confirmationStore.GetActiveTrackedPeersAsync(Arg.Any()) .Returns(Task.FromResult>(new[] { "peer-1", "peer-2" })); confirmationStore.GetConfirmationsForPeerAsync("peer-1", Arg.Any()) .Returns(Task.FromResult>(new[] { new PeerOplogConfirmation { PeerNodeId = "peer-1", SourceNodeId = "source-1", ConfirmedWall = 995, ConfirmedLogic = 0, LastConfirmedUtc = peer1LastUpdate, IsActive = true } })); confirmationStore.GetConfirmationsForPeerAsync("peer-2", Arg.Any()) .Returns(Task.FromResult>(new[] { new PeerOplogConfirmation { PeerNodeId = "peer-2", SourceNodeId = "source-1", ConfirmedWall = 990, ConfirmedLogic = 0, LastConfirmedUtc = peer2LastUpdate, IsActive = true } })); var healthCheck = new CBDDCHealthCheck( store, confirmationStore, CreateOptions(lagThresholdMs: 20, criticalLagThresholdMs: 50)); var result = await healthCheck.CheckHealthAsync(new HealthCheckContext()); result.Status.ShouldBe(HealthStatus.Healthy); result.Data["trackedPeerCount"].ShouldBe(2); result.Data["maxLagMs"].ShouldBe(10L); result.Data["laggingPeers"].ShouldBeOfType>().Count.ShouldBe(0); result.Data["peersWithNoConfirmation"].ShouldBeOfType>().Count.ShouldBe(0); var lastUpdates = result.Data["lastSuccessfulConfirmationUpdateByPeer"] .ShouldBeOfType>(); lastUpdates["peer-1"].ShouldBe(peer1LastUpdate); lastUpdates["peer-2"].ShouldBe(peer2LastUpdate); } /// /// Verifies that health is reported as degraded when at least one peer is lagging or has no confirmation. /// [Fact] public async Task CheckHealthAsync_WhenPeersLaggingOrUnconfirmed_ReturnsDegradedWithPayload() { var store = Substitute.For(); var confirmationStore = Substitute.For(); var peer1LastUpdate = DateTimeOffset.UtcNow.AddSeconds(-10); store.GetLatestTimestampAsync(Arg.Any()).Returns(new HlcTimestamp(1_000, 0, "node-1")); confirmationStore.GetActiveTrackedPeersAsync(Arg.Any()) .Returns(Task.FromResult>(new[] { "peer-1", "peer-2", "peer-3" })); confirmationStore.GetConfirmationsForPeerAsync("peer-1", Arg.Any()) .Returns(Task.FromResult>(new[] { new PeerOplogConfirmation { PeerNodeId = "peer-1", SourceNodeId = "source-1", ConfirmedWall = 960, ConfirmedLogic = 0, LastConfirmedUtc = peer1LastUpdate, IsActive = true } })); confirmationStore.GetConfirmationsForPeerAsync("peer-2", Arg.Any()) .Returns(Task.FromResult>(Array.Empty())); confirmationStore.GetConfirmationsForPeerAsync("peer-3", Arg.Any()) .Returns(Task.FromResult>(new[] { new PeerOplogConfirmation { PeerNodeId = "peer-3", SourceNodeId = "source-1", ConfirmedWall = 995, ConfirmedLogic = 0, LastConfirmedUtc = DateTimeOffset.UtcNow.AddSeconds(-4), IsActive = true } })); var healthCheck = new CBDDCHealthCheck( store, confirmationStore, CreateOptions(lagThresholdMs: 30, criticalLagThresholdMs: 100)); var result = await healthCheck.CheckHealthAsync(new HealthCheckContext()); result.Status.ShouldBe(HealthStatus.Degraded); result.Data["trackedPeerCount"].ShouldBe(3); result.Data["maxLagMs"].ShouldBe(40L); result.Data["laggingPeers"].ShouldBeOfType>().ShouldContain("peer-1"); result.Data["peersWithNoConfirmation"].ShouldBeOfType>().ShouldContain("peer-2"); var lastUpdates = result.Data["lastSuccessfulConfirmationUpdateByPeer"] .ShouldBeOfType>(); lastUpdates["peer-1"].ShouldBe(peer1LastUpdate); lastUpdates["peer-2"].ShouldBeNull(); } /// /// Verifies that health is reported as unhealthy when critical lag threshold is exceeded. /// [Fact] public async Task CheckHealthAsync_WhenCriticalLagBreached_ReturnsUnhealthyWithPayload() { var store = Substitute.For(); var confirmationStore = Substitute.For(); store.GetLatestTimestampAsync(Arg.Any()).Returns(new HlcTimestamp(1_000, 0, "node-1")); confirmationStore.GetActiveTrackedPeersAsync(Arg.Any()) .Returns(Task.FromResult>(new[] { "peer-critical" })); confirmationStore.GetConfirmationsForPeerAsync("peer-critical", Arg.Any()) .Returns(Task.FromResult>(new[] { new PeerOplogConfirmation { PeerNodeId = "peer-critical", SourceNodeId = "source-1", ConfirmedWall = 850, ConfirmedLogic = 0, LastConfirmedUtc = DateTimeOffset.UtcNow.AddMinutes(-1), IsActive = true } })); var healthCheck = new CBDDCHealthCheck( store, confirmationStore, CreateOptions(lagThresholdMs: 30, criticalLagThresholdMs: 80)); var result = await healthCheck.CheckHealthAsync(new HealthCheckContext()); result.Status.ShouldBe(HealthStatus.Unhealthy); result.Data["maxLagMs"].ShouldBe(150L); result.Data["laggingPeers"].ShouldBeOfType>().ShouldContain("peer-critical"); } /// /// Verifies that worst-case lag is used when a peer has multiple source confirmations. /// [Fact] public async Task CheckHealthAsync_WhenPeerHasMultipleSourceConfirmations_UsesWorstCaseLag() { var store = Substitute.For(); var confirmationStore = Substitute.For(); store.GetLatestTimestampAsync(Arg.Any()).Returns(new HlcTimestamp(1_000, 0, "node-1")); confirmationStore.GetActiveTrackedPeersAsync(Arg.Any()) .Returns(Task.FromResult>(new[] { "peer-1" })); confirmationStore.GetConfirmationsForPeerAsync("peer-1", Arg.Any()) .Returns(Task.FromResult>(new[] { new PeerOplogConfirmation { PeerNodeId = "peer-1", SourceNodeId = "source-fast", ConfirmedWall = 995, ConfirmedLogic = 0, LastConfirmedUtc = DateTimeOffset.UtcNow.AddSeconds(-1), IsActive = true }, new PeerOplogConfirmation { PeerNodeId = "peer-1", SourceNodeId = "source-slow", ConfirmedWall = 900, ConfirmedLogic = 0, LastConfirmedUtc = DateTimeOffset.UtcNow.AddSeconds(-10), IsActive = true } })); var healthCheck = new CBDDCHealthCheck( store, confirmationStore, CreateOptions(lagThresholdMs: 80, criticalLagThresholdMs: 150)); var result = await healthCheck.CheckHealthAsync(new HealthCheckContext()); result.Status.ShouldBe(HealthStatus.Degraded); result.Data["maxLagMs"].ShouldBe(100L); result.Data["laggingPeers"].ShouldBeOfType>().ShouldContain("peer-1"); } /// /// Verifies that health is reported as unhealthy when the persistence store throws. /// [Fact] public async Task CheckHealthAsync_WhenStoreThrows_ReturnsUnhealthy() { var store = Substitute.For(); var confirmationStore = Substitute.For(); var error = new InvalidOperationException("store unavailable"); store.GetLatestTimestampAsync(Arg.Any()) .Returns(Task.FromException(error)); var healthCheck = new CBDDCHealthCheck( store, confirmationStore, CreateOptions()); var result = await healthCheck.CheckHealthAsync(new HealthCheckContext()); result.Status.ShouldBe(HealthStatus.Unhealthy); result.Exception.ShouldBe(error); result.Description.ShouldNotBeNull(); result.Description.ShouldContain("persistence layer is unavailable"); } private static CBDDCHostingOptions CreateOptions( long lagThresholdMs = 30_000, long criticalLagThresholdMs = 120_000) { return new CBDDCHostingOptions { Cluster = new ClusterOptions { PeerConfirmationLagThresholdMs = lagThresholdMs, PeerConfirmationCriticalLagThresholdMs = criticalLagThresholdMs } }; } }