Closes task #116 (GA hardening backlog). Before this commit the RedundancyStatePublisher saw PeerReachability.Unknown for every peer because the tracker had no writers — every healthy peer got degraded to the Isolated-Primary band (230) even when fully reachable. Not release-blocking (safe default), but not the full non-transparent- redundancy UX either. Two-layer probe model per docs/v2/implementation/phase-6-3-redundancy-runtime.md §Stream B: - PeerHttpProbeLoop (Stream B.1) — fast-fail layer at 2 s / 1 s timeout. Hits each peer's http://{Host}:{DashboardPort}/healthz via an injected IHttpClientFactory. Writes the HTTP bit of PeerReachability while preserving the UA bit from the last UA probe so a transient HTTP blip doesn't clobber the authoritative UA reading. - PeerUaProbeLoop (Stream B.2) — authoritative layer at 10 s / 5 s timeout. Calls DiscoveryClient.GetEndpoints against opc.tcp://{Host}: {OpcUaPort} — cheap compared to a full Session.Create, no cert trust required. Short-circuits when the HTTP probe last reported the peer unhealthy (no wasted handshakes on a known-dead endpoint), clearing the stale UaHealthy bit in that case. Both inherit from BackgroundService, follow the tick/delay/catch pattern RedundancyPublisherHostedService + ResilienceStatusPublisherHostedService established, and expose TickAsync() as internal for test drive-through. New PeerProbeOptions class carries the four intervals/timeouts so operators can tune cadence per site. Registered as singleton in Program.cs; HTTP client registered by name so the OtOpcUa handler chain (Serilog enrichers, potential future OpenTelemetry instrumentation) isn't bypassed. Tests — 9 new unit tests across PeerHttpProbeLoopTests (5) and PeerUaProbeLoopTests (4). All pass. Server.Tests total 243 → 252. Full solution build clean. Docs: v2-release-readiness.md Phase 6.3 follow-ups list marks the peer-probe bullet struck-through with a close-out note. Still deferred in Phase 6.3: - OPC UA variable-node binding (task #117 — ServiceLevel + ServerUriArray) - sp_PublishGeneration lease wrap (task #118) - Client interop matrix (task #119) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
147 lines
5.7 KiB
C#
147 lines
5.7 KiB
C#
using Microsoft.EntityFrameworkCore;
|
|
using Microsoft.Extensions.Logging.Abstractions;
|
|
using Shouldly;
|
|
using Xunit;
|
|
using ZB.MOM.WW.OtOpcUa.Configuration;
|
|
using ZB.MOM.WW.OtOpcUa.Configuration.Entities;
|
|
using ZB.MOM.WW.OtOpcUa.Configuration.Enums;
|
|
using ZB.MOM.WW.OtOpcUa.Server.Hosting;
|
|
using ZB.MOM.WW.OtOpcUa.Server.Redundancy;
|
|
|
|
namespace ZB.MOM.WW.OtOpcUa.Server.Tests;
|
|
|
|
/// <summary>
|
|
/// Unit tests for <see cref="PeerUaProbeLoop"/>. Drives <c>TickAsync</c> synchronously
|
|
/// with an injected endpoint-probe delegate so no real OPC UA server is needed.
|
|
/// </summary>
|
|
[Trait("Category", "Unit")]
|
|
public sealed class PeerUaProbeLoopTests : IDisposable
|
|
{
|
|
private readonly OtOpcUaConfigDbContext _db;
|
|
private readonly IDbContextFactory<OtOpcUaConfigDbContext> _dbFactory;
|
|
|
|
public PeerUaProbeLoopTests()
|
|
{
|
|
var opts = new DbContextOptionsBuilder<OtOpcUaConfigDbContext>()
|
|
.UseInMemoryDatabase($"peer-ua-{Guid.NewGuid():N}")
|
|
.Options;
|
|
_db = new OtOpcUaConfigDbContext(opts);
|
|
_dbFactory = new DbContextFactory(opts);
|
|
}
|
|
|
|
public void Dispose() => _db.Dispose();
|
|
|
|
[Fact]
|
|
public async Task Tick_short_circuits_when_HttpHealthy_is_false()
|
|
{
|
|
var coordinator = await SeedAndInitializeAsync("A",
|
|
("A", RedundancyRole.Primary, "urn:A"),
|
|
("B", RedundancyRole.Secondary, "urn:B"));
|
|
var tracker = new PeerReachabilityTracker();
|
|
tracker.Update("B", new PeerReachability(HttpHealthy: false, UaHealthy: true));
|
|
|
|
var probeCallCount = 0;
|
|
var loop = new PeerUaProbeLoop(coordinator, tracker, NullLogger<PeerUaProbeLoop>.Instance,
|
|
options: null,
|
|
endpointProbe: (_, _, _) => { probeCallCount++; return Task.FromResult(true); });
|
|
|
|
await loop.TickAsync(CancellationToken.None);
|
|
|
|
probeCallCount.ShouldBe(0, "UA probe must not run when HTTP reports the peer unhealthy");
|
|
var current = tracker.Get("B");
|
|
current.HttpHealthy.ShouldBeFalse();
|
|
current.UaHealthy.ShouldBeFalse("stale UaHealthy=true must be cleared when HTTP says dead");
|
|
}
|
|
|
|
[Fact]
|
|
public async Task Tick_marks_UaHealthy_true_when_probe_succeeds()
|
|
{
|
|
var coordinator = await SeedAndInitializeAsync("A",
|
|
("A", RedundancyRole.Primary, "urn:A"),
|
|
("B", RedundancyRole.Secondary, "urn:B"));
|
|
var tracker = new PeerReachabilityTracker();
|
|
tracker.Update("B", new PeerReachability(HttpHealthy: true, UaHealthy: false));
|
|
|
|
string? calledEndpoint = null;
|
|
var loop = new PeerUaProbeLoop(coordinator, tracker, NullLogger<PeerUaProbeLoop>.Instance,
|
|
options: null,
|
|
endpointProbe: (endpoint, _, _) => { calledEndpoint = endpoint; return Task.FromResult(true); });
|
|
|
|
await loop.TickAsync(CancellationToken.None);
|
|
|
|
calledEndpoint.ShouldNotBeNull();
|
|
calledEndpoint!.ShouldStartWith("opc.tcp://b:");
|
|
tracker.Get("B").UaHealthy.ShouldBeTrue();
|
|
}
|
|
|
|
[Fact]
|
|
public async Task Tick_marks_UaHealthy_false_when_probe_fails()
|
|
{
|
|
var coordinator = await SeedAndInitializeAsync("A",
|
|
("A", RedundancyRole.Primary, "urn:A"),
|
|
("B", RedundancyRole.Secondary, "urn:B"));
|
|
var tracker = new PeerReachabilityTracker();
|
|
tracker.Update("B", new PeerReachability(HttpHealthy: true, UaHealthy: true));
|
|
|
|
var loop = new PeerUaProbeLoop(coordinator, tracker, NullLogger<PeerUaProbeLoop>.Instance,
|
|
options: null,
|
|
endpointProbe: (_, _, _) => Task.FromResult(false));
|
|
|
|
await loop.TickAsync(CancellationToken.None);
|
|
|
|
tracker.Get("B").UaHealthy.ShouldBeFalse();
|
|
}
|
|
|
|
[Fact]
|
|
public async Task Tick_preserves_HttpHealthy_bit_across_UA_update()
|
|
{
|
|
var coordinator = await SeedAndInitializeAsync("A",
|
|
("A", RedundancyRole.Primary, "urn:A"),
|
|
("B", RedundancyRole.Secondary, "urn:B"));
|
|
var tracker = new PeerReachabilityTracker();
|
|
tracker.Update("B", new PeerReachability(HttpHealthy: true, UaHealthy: false));
|
|
|
|
var loop = new PeerUaProbeLoop(coordinator, tracker, NullLogger<PeerUaProbeLoop>.Instance,
|
|
options: null,
|
|
endpointProbe: (_, _, _) => Task.FromResult(true));
|
|
|
|
await loop.TickAsync(CancellationToken.None);
|
|
|
|
var current = tracker.Get("B");
|
|
current.HttpHealthy.ShouldBeTrue("HTTP bit must not be clobbered by the UA probe");
|
|
current.UaHealthy.ShouldBeTrue();
|
|
}
|
|
|
|
// ---- fixture helpers ---------------------------------------------------
|
|
|
|
private async Task<RedundancyCoordinator> SeedAndInitializeAsync(string selfNodeId, params (string id, RedundancyRole role, string appUri)[] nodes)
|
|
{
|
|
_db.ServerClusters.Add(new ServerCluster
|
|
{
|
|
ClusterId = "c1", Name = "Warsaw", Enterprise = "zb", Site = "warsaw",
|
|
RedundancyMode = nodes.Length == 1 ? RedundancyMode.None : RedundancyMode.Warm,
|
|
CreatedBy = "test",
|
|
});
|
|
foreach (var (id, role, appUri) in nodes)
|
|
{
|
|
_db.ClusterNodes.Add(new ClusterNode
|
|
{
|
|
NodeId = id, ClusterId = "c1",
|
|
RedundancyRole = role, Host = id.ToLowerInvariant(),
|
|
ApplicationUri = appUri, CreatedBy = "test",
|
|
});
|
|
}
|
|
await _db.SaveChangesAsync();
|
|
|
|
var coordinator = new RedundancyCoordinator(_dbFactory, NullLogger<RedundancyCoordinator>.Instance, selfNodeId, "c1");
|
|
await coordinator.InitializeAsync(CancellationToken.None);
|
|
return coordinator;
|
|
}
|
|
|
|
private sealed class DbContextFactory(DbContextOptions<OtOpcUaConfigDbContext> options)
|
|
: IDbContextFactory<OtOpcUaConfigDbContext>
|
|
{
|
|
public OtOpcUaConfigDbContext CreateDbContext() => new(options);
|
|
}
|
|
}
|