fix(server, admin): wire sp_RegisterNodeGenerationApplied + overlay heartbeat onto ClusterNode
dbo.sp_RegisterNodeGenerationApplied was defined by the initial
StoredProcedures migration but had zero callers in src/. The server
polled sp_GetCurrentGenerationForCluster every 5s but never reported
back, so dbo.ClusterNodeGenerationState stayed empty for every node
and both the Admin UI Fleet status page ("No node state recorded")
and the cluster-detail Redundancy LastSeenAt indicator ("never
STALE") showed broken liveness forever.
Server side (GenerationRefreshHostedService):
* New testable seam: Func<long, NodeApplyStatus, string?, CT, Task>?
registerAppliedAsync constructor parameter, defaulting to a real
sp_RegisterNodeGenerationApplied call against the central DB.
* TickAsync now calls the proc at two points: after every successful
apply with NodeApplyStatus.Applied, and on every no-change tick as
a heartbeat (also Applied) so LastSeenAt stays fresh.
* Apply failures now wrap the lease + coordinator.RefreshAsync in a
try/catch, report NodeApplyStatus.Failed with the exception message,
and advance LastAppliedGenerationId regardless of outcome so we
don't loop on the same broken apply every 5s.
* Register-call failures are best-effort (LogDebug heartbeat, LogWarning
apply-report) — a transient DB outage during reporting must not
crash the publisher or block the next apply.
Admin side (ClusterNodeService.ListByClusterAsync): the Redundancy tab
reads ClusterNode.LastSeenAt, but no current writer maintains that
column — the heartbeat goes to ClusterNodeGenerationState.LastSeenAt.
Overlay the GenerationState heartbeat onto the returned ClusterNode
rows when more recent, so IsStale + the Redundancy table column
reflect actual liveness without a schema change or new write path.
Tests: 3 new cases on GenerationRefreshHostedServiceTests verify
first-apply reports Applied, no-change ticks heartbeat with Applied,
and register-call failure does not roll back the cursor or block
subsequent ticks. All 8 GenerationRefresh tests pass.
Verified live on node-dev-a / cluster-dev: dbo.ClusterNodeGenerationState
now populated with CurrentGenerationId=1, LastAppliedStatus=Applied,
fresh LastSeenAt. Fleet status page shows the node (KPIs NODES 1 /
APPLIED 1 / STALE 0 / FAILED 0). Redundancy tab KPI STALE went 1\xe2\x86\x920 and
the row shows a real LAST SEEN timestamp. Bonus: FleetStatusHub
SignalR push now fires the cluster-page Live update banner on every
heartbeat because there are finally state changes to push.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -110,6 +110,66 @@ public sealed class GenerationRefreshHostedServiceTests : IDisposable
|
||||
leases.OpenLeaseCount.ShouldBe(0, "IAsyncDisposable dispose must fire regardless of outcome");
|
||||
}
|
||||
|
||||
// Bug #12 fix — verifies the previously-missing wiring: applies and heartbeats both
|
||||
// emit sp_RegisterNodeGenerationApplied so Admin UI Fleet status + Redundancy LastSeenAt
|
||||
// surface live state.
|
||||
|
||||
[Fact]
|
||||
public async Task First_apply_reports_Applied_status_to_central_db()
|
||||
{
|
||||
var coordinator = await SeedCoordinatorAsync();
|
||||
var leases = new ApplyLeaseRegistry();
|
||||
var calls = new List<(long Gen, NodeApplyStatus Status, string? Error)>();
|
||||
var service = NewService(coordinator, leases, currentGeneration: () => 42, registerCalls: calls);
|
||||
|
||||
await service.TickAsync(CancellationToken.None);
|
||||
|
||||
calls.Count.ShouldBe(1, "exactly one register call per apply window");
|
||||
calls[0].Gen.ShouldBe(42);
|
||||
calls[0].Status.ShouldBe(NodeApplyStatus.Applied);
|
||||
calls[0].Error.ShouldBeNull();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task No_change_tick_heartbeats_with_Applied_status()
|
||||
{
|
||||
var coordinator = await SeedCoordinatorAsync();
|
||||
var leases = new ApplyLeaseRegistry();
|
||||
var calls = new List<(long Gen, NodeApplyStatus Status, string? Error)>();
|
||||
var service = NewService(coordinator, leases, currentGeneration: () => 42, registerCalls: calls);
|
||||
|
||||
await service.TickAsync(CancellationToken.None); // initial apply
|
||||
await service.TickAsync(CancellationToken.None); // no-change heartbeat
|
||||
await service.TickAsync(CancellationToken.None); // no-change heartbeat
|
||||
|
||||
calls.Count.ShouldBe(3, "one apply call + two heartbeat calls");
|
||||
calls.ShouldAllBe(c => c.Gen == 42 && c.Status == NodeApplyStatus.Applied && c.Error == null);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Register_call_failure_does_not_break_apply_or_block_subsequent_ticks()
|
||||
{
|
||||
var coordinator = await SeedCoordinatorAsync();
|
||||
var leases = new ApplyLeaseRegistry();
|
||||
var registerCallCount = 0;
|
||||
var service = new GenerationRefreshHostedService(
|
||||
new NodeOptions { NodeId = "A", ClusterId = "c1", ConfigDbConnectionString = "unused" },
|
||||
leases, coordinator, NullLogger<GenerationRefreshHostedService>.Instance,
|
||||
tickInterval: TimeSpan.FromSeconds(1),
|
||||
currentGenerationQuery: _ => Task.FromResult<long?>(42),
|
||||
registerAppliedAsync: (gen, status, err, ct) =>
|
||||
{
|
||||
registerCallCount++;
|
||||
throw new InvalidOperationException("simulated DB outage during register");
|
||||
});
|
||||
|
||||
await service.TickAsync(CancellationToken.None); // apply succeeds, register throws
|
||||
await service.TickAsync(CancellationToken.None); // heartbeat throws
|
||||
|
||||
registerCallCount.ShouldBe(2, "both register attempts must run");
|
||||
service.LastAppliedGenerationId.ShouldBe(42, "register failure must not roll back the cursor");
|
||||
}
|
||||
|
||||
// ---- fixture helpers ---------------------------------------------------
|
||||
|
||||
private async Task<RedundancyCoordinator> SeedCoordinatorAsync()
|
||||
@@ -136,11 +196,15 @@ public sealed class GenerationRefreshHostedServiceTests : IDisposable
|
||||
private static GenerationRefreshHostedService NewService(
|
||||
RedundancyCoordinator coordinator,
|
||||
ApplyLeaseRegistry leases,
|
||||
Func<long?> currentGeneration) =>
|
||||
Func<long?> currentGeneration,
|
||||
List<(long Gen, NodeApplyStatus Status, string? Error)>? registerCalls = null) =>
|
||||
new(new NodeOptions { NodeId = "A", ClusterId = "c1", ConfigDbConnectionString = "unused" },
|
||||
leases, coordinator, NullLogger<GenerationRefreshHostedService>.Instance,
|
||||
tickInterval: TimeSpan.FromSeconds(1),
|
||||
currentGenerationQuery: _ => Task.FromResult(currentGeneration()));
|
||||
currentGenerationQuery: _ => Task.FromResult(currentGeneration()),
|
||||
registerAppliedAsync: registerCalls is null
|
||||
? (_, _, _, _) => Task.CompletedTask
|
||||
: (gen, status, err, _) => { registerCalls.Add((gen, status, err)); return Task.CompletedTask; });
|
||||
|
||||
private sealed class DbContextFactory(DbContextOptions<OtOpcUaConfigDbContext> options)
|
||||
: IDbContextFactory<OtOpcUaConfigDbContext>
|
||||
|
||||
Reference in New Issue
Block a user