Initial import of the CBDDC codebase with docs and tests. Add a .NET-focused gitignore to keep generated artifacts out of source control.
Some checks failed
CI / verify (push) Has been cancelled
Some checks failed
CI / verify (push) Has been cancelled
This commit is contained in:
256
tests/ZB.MOM.WW.CBDDC.Hosting.Tests/CBDDCHealthCheckTests.cs
Normal file
256
tests/ZB.MOM.WW.CBDDC.Hosting.Tests/CBDDCHealthCheckTests.cs
Normal file
@@ -0,0 +1,256 @@
|
||||
using Microsoft.Extensions.Diagnostics.HealthChecks;
|
||||
using ZB.MOM.WW.CBDDC.Core;
|
||||
using ZB.MOM.WW.CBDDC.Core.Storage;
|
||||
using ZB.MOM.WW.CBDDC.Hosting.Configuration;
|
||||
using ZB.MOM.WW.CBDDC.Hosting.HealthChecks;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Hosting.Tests;
|
||||
|
||||
public class CBDDCHealthCheckTests
|
||||
{
|
||||
/// <summary>
|
||||
/// Verifies that health is reported as healthy when persistence is available and all peers are within lag thresholds.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task CheckHealthAsync_WhenPersistenceOkAndPeersWithinLagThreshold_ReturnsHealthyWithPayload()
|
||||
{
|
||||
var store = Substitute.For<IOplogStore>();
|
||||
var confirmationStore = Substitute.For<IPeerOplogConfirmationStore>();
|
||||
var peer1LastUpdate = DateTimeOffset.UtcNow.AddSeconds(-5);
|
||||
var peer2LastUpdate = DateTimeOffset.UtcNow.AddSeconds(-2);
|
||||
|
||||
store.GetLatestTimestampAsync(Arg.Any<CancellationToken>()).Returns(new HlcTimestamp(1_000, 0, "node-1"));
|
||||
confirmationStore.GetActiveTrackedPeersAsync(Arg.Any<CancellationToken>())
|
||||
.Returns(Task.FromResult<IEnumerable<string>>(new[] { "peer-1", "peer-2" }));
|
||||
confirmationStore.GetConfirmationsForPeerAsync("peer-1", Arg.Any<CancellationToken>())
|
||||
.Returns(Task.FromResult<IEnumerable<PeerOplogConfirmation>>(new[]
|
||||
{
|
||||
new PeerOplogConfirmation
|
||||
{
|
||||
PeerNodeId = "peer-1",
|
||||
SourceNodeId = "source-1",
|
||||
ConfirmedWall = 995,
|
||||
ConfirmedLogic = 0,
|
||||
LastConfirmedUtc = peer1LastUpdate,
|
||||
IsActive = true
|
||||
}
|
||||
}));
|
||||
confirmationStore.GetConfirmationsForPeerAsync("peer-2", Arg.Any<CancellationToken>())
|
||||
.Returns(Task.FromResult<IEnumerable<PeerOplogConfirmation>>(new[]
|
||||
{
|
||||
new PeerOplogConfirmation
|
||||
{
|
||||
PeerNodeId = "peer-2",
|
||||
SourceNodeId = "source-1",
|
||||
ConfirmedWall = 990,
|
||||
ConfirmedLogic = 0,
|
||||
LastConfirmedUtc = peer2LastUpdate,
|
||||
IsActive = true
|
||||
}
|
||||
}));
|
||||
|
||||
var healthCheck = new CBDDCHealthCheck(
|
||||
store,
|
||||
confirmationStore,
|
||||
CreateOptions(lagThresholdMs: 20, criticalLagThresholdMs: 50));
|
||||
|
||||
var result = await healthCheck.CheckHealthAsync(new HealthCheckContext());
|
||||
|
||||
result.Status.ShouldBe(HealthStatus.Healthy);
|
||||
result.Data["trackedPeerCount"].ShouldBe(2);
|
||||
result.Data["maxLagMs"].ShouldBe(10L);
|
||||
result.Data["laggingPeers"].ShouldBeOfType<List<string>>().Count.ShouldBe(0);
|
||||
result.Data["peersWithNoConfirmation"].ShouldBeOfType<List<string>>().Count.ShouldBe(0);
|
||||
|
||||
var lastUpdates = result.Data["lastSuccessfulConfirmationUpdateByPeer"]
|
||||
.ShouldBeOfType<Dictionary<string, DateTimeOffset?>>();
|
||||
lastUpdates["peer-1"].ShouldBe(peer1LastUpdate);
|
||||
lastUpdates["peer-2"].ShouldBe(peer2LastUpdate);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verifies that health is reported as degraded when at least one peer is lagging or has no confirmation.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task CheckHealthAsync_WhenPeersLaggingOrUnconfirmed_ReturnsDegradedWithPayload()
|
||||
{
|
||||
var store = Substitute.For<IOplogStore>();
|
||||
var confirmationStore = Substitute.For<IPeerOplogConfirmationStore>();
|
||||
var peer1LastUpdate = DateTimeOffset.UtcNow.AddSeconds(-10);
|
||||
|
||||
store.GetLatestTimestampAsync(Arg.Any<CancellationToken>()).Returns(new HlcTimestamp(1_000, 0, "node-1"));
|
||||
confirmationStore.GetActiveTrackedPeersAsync(Arg.Any<CancellationToken>())
|
||||
.Returns(Task.FromResult<IEnumerable<string>>(new[] { "peer-1", "peer-2", "peer-3" }));
|
||||
confirmationStore.GetConfirmationsForPeerAsync("peer-1", Arg.Any<CancellationToken>())
|
||||
.Returns(Task.FromResult<IEnumerable<PeerOplogConfirmation>>(new[]
|
||||
{
|
||||
new PeerOplogConfirmation
|
||||
{
|
||||
PeerNodeId = "peer-1",
|
||||
SourceNodeId = "source-1",
|
||||
ConfirmedWall = 960,
|
||||
ConfirmedLogic = 0,
|
||||
LastConfirmedUtc = peer1LastUpdate,
|
||||
IsActive = true
|
||||
}
|
||||
}));
|
||||
confirmationStore.GetConfirmationsForPeerAsync("peer-2", Arg.Any<CancellationToken>())
|
||||
.Returns(Task.FromResult<IEnumerable<PeerOplogConfirmation>>(Array.Empty<PeerOplogConfirmation>()));
|
||||
confirmationStore.GetConfirmationsForPeerAsync("peer-3", Arg.Any<CancellationToken>())
|
||||
.Returns(Task.FromResult<IEnumerable<PeerOplogConfirmation>>(new[]
|
||||
{
|
||||
new PeerOplogConfirmation
|
||||
{
|
||||
PeerNodeId = "peer-3",
|
||||
SourceNodeId = "source-1",
|
||||
ConfirmedWall = 995,
|
||||
ConfirmedLogic = 0,
|
||||
LastConfirmedUtc = DateTimeOffset.UtcNow.AddSeconds(-4),
|
||||
IsActive = true
|
||||
}
|
||||
}));
|
||||
|
||||
var healthCheck = new CBDDCHealthCheck(
|
||||
store,
|
||||
confirmationStore,
|
||||
CreateOptions(lagThresholdMs: 30, criticalLagThresholdMs: 100));
|
||||
|
||||
var result = await healthCheck.CheckHealthAsync(new HealthCheckContext());
|
||||
|
||||
result.Status.ShouldBe(HealthStatus.Degraded);
|
||||
result.Data["trackedPeerCount"].ShouldBe(3);
|
||||
result.Data["maxLagMs"].ShouldBe(40L);
|
||||
result.Data["laggingPeers"].ShouldBeOfType<List<string>>().ShouldContain("peer-1");
|
||||
result.Data["peersWithNoConfirmation"].ShouldBeOfType<List<string>>().ShouldContain("peer-2");
|
||||
|
||||
var lastUpdates = result.Data["lastSuccessfulConfirmationUpdateByPeer"]
|
||||
.ShouldBeOfType<Dictionary<string, DateTimeOffset?>>();
|
||||
lastUpdates["peer-1"].ShouldBe(peer1LastUpdate);
|
||||
lastUpdates["peer-2"].ShouldBeNull();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verifies that health is reported as unhealthy when critical lag threshold is exceeded.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task CheckHealthAsync_WhenCriticalLagBreached_ReturnsUnhealthyWithPayload()
|
||||
{
|
||||
var store = Substitute.For<IOplogStore>();
|
||||
var confirmationStore = Substitute.For<IPeerOplogConfirmationStore>();
|
||||
|
||||
store.GetLatestTimestampAsync(Arg.Any<CancellationToken>()).Returns(new HlcTimestamp(1_000, 0, "node-1"));
|
||||
confirmationStore.GetActiveTrackedPeersAsync(Arg.Any<CancellationToken>())
|
||||
.Returns(Task.FromResult<IEnumerable<string>>(new[] { "peer-critical" }));
|
||||
confirmationStore.GetConfirmationsForPeerAsync("peer-critical", Arg.Any<CancellationToken>())
|
||||
.Returns(Task.FromResult<IEnumerable<PeerOplogConfirmation>>(new[]
|
||||
{
|
||||
new PeerOplogConfirmation
|
||||
{
|
||||
PeerNodeId = "peer-critical",
|
||||
SourceNodeId = "source-1",
|
||||
ConfirmedWall = 850,
|
||||
ConfirmedLogic = 0,
|
||||
LastConfirmedUtc = DateTimeOffset.UtcNow.AddMinutes(-1),
|
||||
IsActive = true
|
||||
}
|
||||
}));
|
||||
|
||||
var healthCheck = new CBDDCHealthCheck(
|
||||
store,
|
||||
confirmationStore,
|
||||
CreateOptions(lagThresholdMs: 30, criticalLagThresholdMs: 80));
|
||||
|
||||
var result = await healthCheck.CheckHealthAsync(new HealthCheckContext());
|
||||
|
||||
result.Status.ShouldBe(HealthStatus.Unhealthy);
|
||||
result.Data["maxLagMs"].ShouldBe(150L);
|
||||
result.Data["laggingPeers"].ShouldBeOfType<List<string>>().ShouldContain("peer-critical");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verifies that worst-case lag is used when a peer has multiple source confirmations.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task CheckHealthAsync_WhenPeerHasMultipleSourceConfirmations_UsesWorstCaseLag()
|
||||
{
|
||||
var store = Substitute.For<IOplogStore>();
|
||||
var confirmationStore = Substitute.For<IPeerOplogConfirmationStore>();
|
||||
|
||||
store.GetLatestTimestampAsync(Arg.Any<CancellationToken>()).Returns(new HlcTimestamp(1_000, 0, "node-1"));
|
||||
confirmationStore.GetActiveTrackedPeersAsync(Arg.Any<CancellationToken>())
|
||||
.Returns(Task.FromResult<IEnumerable<string>>(new[] { "peer-1" }));
|
||||
confirmationStore.GetConfirmationsForPeerAsync("peer-1", Arg.Any<CancellationToken>())
|
||||
.Returns(Task.FromResult<IEnumerable<PeerOplogConfirmation>>(new[]
|
||||
{
|
||||
new PeerOplogConfirmation
|
||||
{
|
||||
PeerNodeId = "peer-1",
|
||||
SourceNodeId = "source-fast",
|
||||
ConfirmedWall = 995,
|
||||
ConfirmedLogic = 0,
|
||||
LastConfirmedUtc = DateTimeOffset.UtcNow.AddSeconds(-1),
|
||||
IsActive = true
|
||||
},
|
||||
new PeerOplogConfirmation
|
||||
{
|
||||
PeerNodeId = "peer-1",
|
||||
SourceNodeId = "source-slow",
|
||||
ConfirmedWall = 900,
|
||||
ConfirmedLogic = 0,
|
||||
LastConfirmedUtc = DateTimeOffset.UtcNow.AddSeconds(-10),
|
||||
IsActive = true
|
||||
}
|
||||
}));
|
||||
|
||||
var healthCheck = new CBDDCHealthCheck(
|
||||
store,
|
||||
confirmationStore,
|
||||
CreateOptions(lagThresholdMs: 80, criticalLagThresholdMs: 150));
|
||||
|
||||
var result = await healthCheck.CheckHealthAsync(new HealthCheckContext());
|
||||
|
||||
result.Status.ShouldBe(HealthStatus.Degraded);
|
||||
result.Data["maxLagMs"].ShouldBe(100L);
|
||||
result.Data["laggingPeers"].ShouldBeOfType<List<string>>().ShouldContain("peer-1");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verifies that health is reported as unhealthy when the persistence store throws.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task CheckHealthAsync_WhenStoreThrows_ReturnsUnhealthy()
|
||||
{
|
||||
var store = Substitute.For<IOplogStore>();
|
||||
var confirmationStore = Substitute.For<IPeerOplogConfirmationStore>();
|
||||
var error = new InvalidOperationException("store unavailable");
|
||||
|
||||
store.GetLatestTimestampAsync(Arg.Any<CancellationToken>())
|
||||
.Returns(Task.FromException<HlcTimestamp>(error));
|
||||
|
||||
var healthCheck = new CBDDCHealthCheck(
|
||||
store,
|
||||
confirmationStore,
|
||||
CreateOptions());
|
||||
|
||||
var result = await healthCheck.CheckHealthAsync(new HealthCheckContext());
|
||||
|
||||
result.Status.ShouldBe(HealthStatus.Unhealthy);
|
||||
result.Exception.ShouldBe(error);
|
||||
result.Description.ShouldNotBeNull();
|
||||
result.Description.ShouldContain("persistence layer is unavailable");
|
||||
}
|
||||
|
||||
private static CBDDCHostingOptions CreateOptions(
|
||||
long lagThresholdMs = 30_000,
|
||||
long criticalLagThresholdMs = 120_000)
|
||||
{
|
||||
return new CBDDCHostingOptions
|
||||
{
|
||||
Cluster = new ClusterOptions
|
||||
{
|
||||
PeerConfirmationLagThresholdMs = lagThresholdMs,
|
||||
PeerConfirmationCriticalLagThresholdMs = criticalLagThresholdMs
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user