Files
CBDDC/tests/ZB.MOM.WW.CBDDC.Hosting.Tests/CBDDCHealthCheckTests.cs

257 lines
11 KiB
C#

using Microsoft.Extensions.Diagnostics.HealthChecks;
using ZB.MOM.WW.CBDDC.Core;
using ZB.MOM.WW.CBDDC.Core.Storage;
using ZB.MOM.WW.CBDDC.Hosting.Configuration;
using ZB.MOM.WW.CBDDC.Hosting.HealthChecks;
namespace ZB.MOM.WW.CBDDC.Hosting.Tests;
public class CBDDCHealthCheckTests
{
/// <summary>
/// Verifies that health is reported as healthy when persistence is available and all peers are within lag thresholds.
/// </summary>
[Fact]
public async Task CheckHealthAsync_WhenPersistenceOkAndPeersWithinLagThreshold_ReturnsHealthyWithPayload()
{
var store = Substitute.For<IOplogStore>();
var confirmationStore = Substitute.For<IPeerOplogConfirmationStore>();
var peer1LastUpdate = DateTimeOffset.UtcNow.AddSeconds(-5);
var peer2LastUpdate = DateTimeOffset.UtcNow.AddSeconds(-2);
store.GetLatestTimestampAsync(Arg.Any<CancellationToken>()).Returns(new HlcTimestamp(1_000, 0, "node-1"));
confirmationStore.GetActiveTrackedPeersAsync(Arg.Any<CancellationToken>())
.Returns(Task.FromResult<IEnumerable<string>>(new[] { "peer-1", "peer-2" }));
confirmationStore.GetConfirmationsForPeerAsync("peer-1", Arg.Any<CancellationToken>())
.Returns(Task.FromResult<IEnumerable<PeerOplogConfirmation>>(new[]
{
new PeerOplogConfirmation
{
PeerNodeId = "peer-1",
SourceNodeId = "source-1",
ConfirmedWall = 995,
ConfirmedLogic = 0,
LastConfirmedUtc = peer1LastUpdate,
IsActive = true
}
}));
confirmationStore.GetConfirmationsForPeerAsync("peer-2", Arg.Any<CancellationToken>())
.Returns(Task.FromResult<IEnumerable<PeerOplogConfirmation>>(new[]
{
new PeerOplogConfirmation
{
PeerNodeId = "peer-2",
SourceNodeId = "source-1",
ConfirmedWall = 990,
ConfirmedLogic = 0,
LastConfirmedUtc = peer2LastUpdate,
IsActive = true
}
}));
var healthCheck = new CBDDCHealthCheck(
store,
confirmationStore,
CreateOptions(lagThresholdMs: 20, criticalLagThresholdMs: 50));
var result = await healthCheck.CheckHealthAsync(new HealthCheckContext());
result.Status.ShouldBe(HealthStatus.Healthy);
result.Data["trackedPeerCount"].ShouldBe(2);
result.Data["maxLagMs"].ShouldBe(10L);
result.Data["laggingPeers"].ShouldBeOfType<List<string>>().Count.ShouldBe(0);
result.Data["peersWithNoConfirmation"].ShouldBeOfType<List<string>>().Count.ShouldBe(0);
var lastUpdates = result.Data["lastSuccessfulConfirmationUpdateByPeer"]
.ShouldBeOfType<Dictionary<string, DateTimeOffset?>>();
lastUpdates["peer-1"].ShouldBe(peer1LastUpdate);
lastUpdates["peer-2"].ShouldBe(peer2LastUpdate);
}
/// <summary>
/// Verifies that health is reported as degraded when at least one peer is lagging or has no confirmation.
/// </summary>
[Fact]
public async Task CheckHealthAsync_WhenPeersLaggingOrUnconfirmed_ReturnsDegradedWithPayload()
{
var store = Substitute.For<IOplogStore>();
var confirmationStore = Substitute.For<IPeerOplogConfirmationStore>();
var peer1LastUpdate = DateTimeOffset.UtcNow.AddSeconds(-10);
store.GetLatestTimestampAsync(Arg.Any<CancellationToken>()).Returns(new HlcTimestamp(1_000, 0, "node-1"));
confirmationStore.GetActiveTrackedPeersAsync(Arg.Any<CancellationToken>())
.Returns(Task.FromResult<IEnumerable<string>>(new[] { "peer-1", "peer-2", "peer-3" }));
confirmationStore.GetConfirmationsForPeerAsync("peer-1", Arg.Any<CancellationToken>())
.Returns(Task.FromResult<IEnumerable<PeerOplogConfirmation>>(new[]
{
new PeerOplogConfirmation
{
PeerNodeId = "peer-1",
SourceNodeId = "source-1",
ConfirmedWall = 960,
ConfirmedLogic = 0,
LastConfirmedUtc = peer1LastUpdate,
IsActive = true
}
}));
confirmationStore.GetConfirmationsForPeerAsync("peer-2", Arg.Any<CancellationToken>())
.Returns(Task.FromResult<IEnumerable<PeerOplogConfirmation>>(Array.Empty<PeerOplogConfirmation>()));
confirmationStore.GetConfirmationsForPeerAsync("peer-3", Arg.Any<CancellationToken>())
.Returns(Task.FromResult<IEnumerable<PeerOplogConfirmation>>(new[]
{
new PeerOplogConfirmation
{
PeerNodeId = "peer-3",
SourceNodeId = "source-1",
ConfirmedWall = 995,
ConfirmedLogic = 0,
LastConfirmedUtc = DateTimeOffset.UtcNow.AddSeconds(-4),
IsActive = true
}
}));
var healthCheck = new CBDDCHealthCheck(
store,
confirmationStore,
CreateOptions(lagThresholdMs: 30, criticalLagThresholdMs: 100));
var result = await healthCheck.CheckHealthAsync(new HealthCheckContext());
result.Status.ShouldBe(HealthStatus.Degraded);
result.Data["trackedPeerCount"].ShouldBe(3);
result.Data["maxLagMs"].ShouldBe(40L);
result.Data["laggingPeers"].ShouldBeOfType<List<string>>().ShouldContain("peer-1");
result.Data["peersWithNoConfirmation"].ShouldBeOfType<List<string>>().ShouldContain("peer-2");
var lastUpdates = result.Data["lastSuccessfulConfirmationUpdateByPeer"]
.ShouldBeOfType<Dictionary<string, DateTimeOffset?>>();
lastUpdates["peer-1"].ShouldBe(peer1LastUpdate);
lastUpdates["peer-2"].ShouldBeNull();
}
/// <summary>
/// Verifies that health is reported as unhealthy when critical lag threshold is exceeded.
/// </summary>
[Fact]
public async Task CheckHealthAsync_WhenCriticalLagBreached_ReturnsUnhealthyWithPayload()
{
var store = Substitute.For<IOplogStore>();
var confirmationStore = Substitute.For<IPeerOplogConfirmationStore>();
store.GetLatestTimestampAsync(Arg.Any<CancellationToken>()).Returns(new HlcTimestamp(1_000, 0, "node-1"));
confirmationStore.GetActiveTrackedPeersAsync(Arg.Any<CancellationToken>())
.Returns(Task.FromResult<IEnumerable<string>>(new[] { "peer-critical" }));
confirmationStore.GetConfirmationsForPeerAsync("peer-critical", Arg.Any<CancellationToken>())
.Returns(Task.FromResult<IEnumerable<PeerOplogConfirmation>>(new[]
{
new PeerOplogConfirmation
{
PeerNodeId = "peer-critical",
SourceNodeId = "source-1",
ConfirmedWall = 850,
ConfirmedLogic = 0,
LastConfirmedUtc = DateTimeOffset.UtcNow.AddMinutes(-1),
IsActive = true
}
}));
var healthCheck = new CBDDCHealthCheck(
store,
confirmationStore,
CreateOptions(lagThresholdMs: 30, criticalLagThresholdMs: 80));
var result = await healthCheck.CheckHealthAsync(new HealthCheckContext());
result.Status.ShouldBe(HealthStatus.Unhealthy);
result.Data["maxLagMs"].ShouldBe(150L);
result.Data["laggingPeers"].ShouldBeOfType<List<string>>().ShouldContain("peer-critical");
}
/// <summary>
/// Verifies that worst-case lag is used when a peer has multiple source confirmations.
/// </summary>
[Fact]
public async Task CheckHealthAsync_WhenPeerHasMultipleSourceConfirmations_UsesWorstCaseLag()
{
var store = Substitute.For<IOplogStore>();
var confirmationStore = Substitute.For<IPeerOplogConfirmationStore>();
store.GetLatestTimestampAsync(Arg.Any<CancellationToken>()).Returns(new HlcTimestamp(1_000, 0, "node-1"));
confirmationStore.GetActiveTrackedPeersAsync(Arg.Any<CancellationToken>())
.Returns(Task.FromResult<IEnumerable<string>>(new[] { "peer-1" }));
confirmationStore.GetConfirmationsForPeerAsync("peer-1", Arg.Any<CancellationToken>())
.Returns(Task.FromResult<IEnumerable<PeerOplogConfirmation>>(new[]
{
new PeerOplogConfirmation
{
PeerNodeId = "peer-1",
SourceNodeId = "source-fast",
ConfirmedWall = 995,
ConfirmedLogic = 0,
LastConfirmedUtc = DateTimeOffset.UtcNow.AddSeconds(-1),
IsActive = true
},
new PeerOplogConfirmation
{
PeerNodeId = "peer-1",
SourceNodeId = "source-slow",
ConfirmedWall = 900,
ConfirmedLogic = 0,
LastConfirmedUtc = DateTimeOffset.UtcNow.AddSeconds(-10),
IsActive = true
}
}));
var healthCheck = new CBDDCHealthCheck(
store,
confirmationStore,
CreateOptions(lagThresholdMs: 80, criticalLagThresholdMs: 150));
var result = await healthCheck.CheckHealthAsync(new HealthCheckContext());
result.Status.ShouldBe(HealthStatus.Degraded);
result.Data["maxLagMs"].ShouldBe(100L);
result.Data["laggingPeers"].ShouldBeOfType<List<string>>().ShouldContain("peer-1");
}
/// <summary>
/// Verifies that health is reported as unhealthy when the persistence store throws.
/// </summary>
[Fact]
public async Task CheckHealthAsync_WhenStoreThrows_ReturnsUnhealthy()
{
var store = Substitute.For<IOplogStore>();
var confirmationStore = Substitute.For<IPeerOplogConfirmationStore>();
var error = new InvalidOperationException("store unavailable");
store.GetLatestTimestampAsync(Arg.Any<CancellationToken>())
.Returns(Task.FromException<HlcTimestamp>(error));
var healthCheck = new CBDDCHealthCheck(
store,
confirmationStore,
CreateOptions());
var result = await healthCheck.CheckHealthAsync(new HealthCheckContext());
result.Status.ShouldBe(HealthStatus.Unhealthy);
result.Exception.ShouldBe(error);
result.Description.ShouldNotBeNull();
result.Description.ShouldContain("persistence layer is unavailable");
}
private static CBDDCHostingOptions CreateOptions(
long lagThresholdMs = 30_000,
long criticalLagThresholdMs = 120_000)
{
return new CBDDCHostingOptions
{
Cluster = new ClusterOptions
{
PeerConfirmationLagThresholdMs = lagThresholdMs,
PeerConfirmationCriticalLagThresholdMs = criticalLagThresholdMs
}
};
}
}