Files
lmxopcua/tests/Server/ZB.MOM.WW.OtOpcUa.Server.Tests/GenerationRefreshHostedServiceTests.cs
Joseph Doherty f23e368a74 fix(server, admin): wire sp_RegisterNodeGenerationApplied + overlay heartbeat onto ClusterNode
dbo.sp_RegisterNodeGenerationApplied was defined by the initial
StoredProcedures migration but had zero callers in src/. The server
polled sp_GetCurrentGenerationForCluster every 5s but never reported
back, so dbo.ClusterNodeGenerationState stayed empty for every node
and both the Admin UI Fleet status page ("No node state recorded")
and the cluster-detail Redundancy LastSeenAt indicator ("never
STALE") showed broken liveness forever.

Server side (GenerationRefreshHostedService):
* New testable seam: Func<long, NodeApplyStatus, string?, CT, Task>?
  registerAppliedAsync constructor parameter, defaulting to a real
  sp_RegisterNodeGenerationApplied call against the central DB.
* TickAsync now calls the proc at two points: after every successful
  apply with NodeApplyStatus.Applied, and on every no-change tick as
  a heartbeat (also Applied) so LastSeenAt stays fresh.
* Apply failures now wrap the lease + coordinator.RefreshAsync in a
  try/catch, report NodeApplyStatus.Failed with the exception message,
  and advance LastAppliedGenerationId regardless of outcome so we
  don't loop on the same broken apply every 5s.
* Register-call failures are best-effort (LogDebug heartbeat, LogWarning
  apply-report) — a transient DB outage during reporting must not
  crash the publisher or block the next apply.

Admin side (ClusterNodeService.ListByClusterAsync): the Redundancy tab
reads ClusterNode.LastSeenAt, but no current writer maintains that
column — the heartbeat goes to ClusterNodeGenerationState.LastSeenAt.
Overlay the GenerationState heartbeat onto the returned ClusterNode
rows when more recent, so IsStale + the Redundancy table column
reflect actual liveness without a schema change or new write path.

Tests: 3 new cases on GenerationRefreshHostedServiceTests verify
first-apply reports Applied, no-change ticks heartbeat with Applied,
and register-call failure does not roll back the cursor or block
subsequent ticks. All 8 GenerationRefresh tests pass.

Verified live on node-dev-a / cluster-dev: dbo.ClusterNodeGenerationState
now populated with CurrentGenerationId=1, LastAppliedStatus=Applied,
fresh LastSeenAt. Fleet status page shows the node (KPIs NODES 1 /
APPLIED 1 / STALE 0 / FAILED 0). Redundancy tab KPI STALE went 1\xe2\x86\x920 and
the row shows a real LAST SEEN timestamp. Bonus: FleetStatusHub
SignalR push now fires the cluster-page Live update banner on every
heartbeat because there are finally state changes to push.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-25 02:22:59 -04:00

215 lines
8.9 KiB
C#

using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.Logging.Abstractions;
using Shouldly;
using Xunit;
using ZB.MOM.WW.OtOpcUa.Configuration;
using ZB.MOM.WW.OtOpcUa.Configuration.Entities;
using ZB.MOM.WW.OtOpcUa.Configuration.Enums;
using ZB.MOM.WW.OtOpcUa.Server.Hosting;
using ZB.MOM.WW.OtOpcUa.Server.Redundancy;
namespace ZB.MOM.WW.OtOpcUa.Server.Tests;
/// <summary>
/// Unit tests for <see cref="GenerationRefreshHostedService"/>. Exercises the
/// lease-around-refresh semantics via a stub generation-query delegate — the real
/// DB path is exercised end-to-end by the Phase 6.3 compliance script.
/// </summary>
[Trait("Category", "Unit")]
public sealed class GenerationRefreshHostedServiceTests : IDisposable
{
private readonly OtOpcUaConfigDbContext _db;
private readonly IDbContextFactory<OtOpcUaConfigDbContext> _dbFactory;
public GenerationRefreshHostedServiceTests()
{
var opts = new DbContextOptionsBuilder<OtOpcUaConfigDbContext>()
.UseInMemoryDatabase($"gen-refresh-{Guid.NewGuid():N}")
.Options;
_db = new OtOpcUaConfigDbContext(opts);
_dbFactory = new DbContextFactory(opts);
}
public void Dispose() => _db.Dispose();
[Fact]
public async Task First_tick_applies_current_generation_and_closes_the_lease()
{
var coordinator = await SeedCoordinatorAsync();
var leases = new ApplyLeaseRegistry();
var service = NewService(coordinator, leases, currentGeneration: () => 42);
leases.IsApplyInProgress.ShouldBeFalse("no lease before first tick");
await service.TickAsync(CancellationToken.None);
service.LastAppliedGenerationId.ShouldBe(42);
service.TickCount.ShouldBe(1);
service.RefreshCount.ShouldBe(1);
leases.IsApplyInProgress.ShouldBeFalse("lease must be disposed after the apply window");
}
[Fact]
public async Task Subsequent_tick_with_same_generation_is_a_no_op()
{
var coordinator = await SeedCoordinatorAsync();
var leases = new ApplyLeaseRegistry();
var service = NewService(coordinator, leases, currentGeneration: () => 42);
await service.TickAsync(CancellationToken.None);
await service.TickAsync(CancellationToken.None);
service.TickCount.ShouldBe(2);
service.RefreshCount.ShouldBe(1, "second identical tick must skip the refresh");
leases.IsApplyInProgress.ShouldBeFalse();
}
[Fact]
public async Task Generation_change_triggers_new_refresh()
{
var coordinator = await SeedCoordinatorAsync();
var leases = new ApplyLeaseRegistry();
var current = 42L;
var service = NewService(coordinator, leases, currentGeneration: () => current);
await service.TickAsync(CancellationToken.None);
current = 43L;
await service.TickAsync(CancellationToken.None);
service.LastAppliedGenerationId.ShouldBe(43);
service.RefreshCount.ShouldBe(2);
}
[Fact]
public async Task Null_generation_means_no_published_config_yet_and_does_not_apply()
{
var coordinator = await SeedCoordinatorAsync();
var leases = new ApplyLeaseRegistry();
var service = NewService(coordinator, leases, currentGeneration: () => null);
await service.TickAsync(CancellationToken.None);
service.LastAppliedGenerationId.ShouldBeNull();
service.RefreshCount.ShouldBe(0);
service.TickCount.ShouldBe(1);
}
[Fact]
public async Task Lease_is_opened_during_the_refresh_window()
{
// Drive a query delegate that *also* observes lease state mid-call: the delegate
// fires before BeginApplyLease, so it sees IsApplyInProgress=false here, not
// during the lease window. We observe the lease from the outside by checking
// OpenLeaseCount on completion — if the `await using` mis-disposed we'd see 1
// dangling. Cleanest assertion in a stub-only world.
var coordinator = await SeedCoordinatorAsync();
var leases = new ApplyLeaseRegistry();
var service = NewService(coordinator, leases, currentGeneration: () => 42);
await service.TickAsync(CancellationToken.None);
leases.OpenLeaseCount.ShouldBe(0, "IAsyncDisposable dispose must fire regardless of outcome");
}
// Bug #12 fix — verifies the previously-missing wiring: applies and heartbeats both
// emit sp_RegisterNodeGenerationApplied so Admin UI Fleet status + Redundancy LastSeenAt
// surface live state.
[Fact]
public async Task First_apply_reports_Applied_status_to_central_db()
{
var coordinator = await SeedCoordinatorAsync();
var leases = new ApplyLeaseRegistry();
var calls = new List<(long Gen, NodeApplyStatus Status, string? Error)>();
var service = NewService(coordinator, leases, currentGeneration: () => 42, registerCalls: calls);
await service.TickAsync(CancellationToken.None);
calls.Count.ShouldBe(1, "exactly one register call per apply window");
calls[0].Gen.ShouldBe(42);
calls[0].Status.ShouldBe(NodeApplyStatus.Applied);
calls[0].Error.ShouldBeNull();
}
[Fact]
public async Task No_change_tick_heartbeats_with_Applied_status()
{
var coordinator = await SeedCoordinatorAsync();
var leases = new ApplyLeaseRegistry();
var calls = new List<(long Gen, NodeApplyStatus Status, string? Error)>();
var service = NewService(coordinator, leases, currentGeneration: () => 42, registerCalls: calls);
await service.TickAsync(CancellationToken.None); // initial apply
await service.TickAsync(CancellationToken.None); // no-change heartbeat
await service.TickAsync(CancellationToken.None); // no-change heartbeat
calls.Count.ShouldBe(3, "one apply call + two heartbeat calls");
calls.ShouldAllBe(c => c.Gen == 42 && c.Status == NodeApplyStatus.Applied && c.Error == null);
}
[Fact]
public async Task Register_call_failure_does_not_break_apply_or_block_subsequent_ticks()
{
var coordinator = await SeedCoordinatorAsync();
var leases = new ApplyLeaseRegistry();
var registerCallCount = 0;
var service = new GenerationRefreshHostedService(
new NodeOptions { NodeId = "A", ClusterId = "c1", ConfigDbConnectionString = "unused" },
leases, coordinator, NullLogger<GenerationRefreshHostedService>.Instance,
tickInterval: TimeSpan.FromSeconds(1),
currentGenerationQuery: _ => Task.FromResult<long?>(42),
registerAppliedAsync: (gen, status, err, ct) =>
{
registerCallCount++;
throw new InvalidOperationException("simulated DB outage during register");
});
await service.TickAsync(CancellationToken.None); // apply succeeds, register throws
await service.TickAsync(CancellationToken.None); // heartbeat throws
registerCallCount.ShouldBe(2, "both register attempts must run");
service.LastAppliedGenerationId.ShouldBe(42, "register failure must not roll back the cursor");
}
// ---- fixture helpers ---------------------------------------------------
private async Task<RedundancyCoordinator> SeedCoordinatorAsync()
{
_db.ServerClusters.Add(new ServerCluster
{
ClusterId = "c1", Name = "W", Enterprise = "zb", Site = "w",
RedundancyMode = RedundancyMode.None, CreatedBy = "test",
});
_db.ClusterNodes.Add(new ClusterNode
{
NodeId = "A", ClusterId = "c1",
RedundancyRole = RedundancyRole.Primary, Host = "a",
ApplicationUri = "urn:A", CreatedBy = "test",
});
await _db.SaveChangesAsync();
var coordinator = new RedundancyCoordinator(
_dbFactory, NullLogger<RedundancyCoordinator>.Instance, "A", "c1");
await coordinator.InitializeAsync(CancellationToken.None);
return coordinator;
}
private static GenerationRefreshHostedService NewService(
RedundancyCoordinator coordinator,
ApplyLeaseRegistry leases,
Func<long?> currentGeneration,
List<(long Gen, NodeApplyStatus Status, string? Error)>? registerCalls = null) =>
new(new NodeOptions { NodeId = "A", ClusterId = "c1", ConfigDbConnectionString = "unused" },
leases, coordinator, NullLogger<GenerationRefreshHostedService>.Instance,
tickInterval: TimeSpan.FromSeconds(1),
currentGenerationQuery: _ => Task.FromResult(currentGeneration()),
registerAppliedAsync: registerCalls is null
? (_, _, _, _) => Task.CompletedTask
: (gen, status, err, _) => { registerCalls.Add((gen, status, err)); return Task.CompletedTask; });
private sealed class DbContextFactory(DbContextOptions<OtOpcUaConfigDbContext> options)
: IDbContextFactory<OtOpcUaConfigDbContext>
{
public OtOpcUaConfigDbContext CreateDbContext() => new(options);
}
}