Files
ScadaBridge/tests/ScadaLink.HealthMonitoring.Tests/CentralHealthReportLoopTests.cs
T
Joseph Doherty d190345ef0 test(coverage): close Theme 8 — 13 test-coverage findings, +35 tests
13 well-bounded test-coverage gaps closed across 11 test projects.
Net +35 regression tests; no production code changes except the
SiteEventLogger src reference unchanged (W3 redacted only test code).

Test additions:
- CLI-022: CommandTreeTests pinned-count assertion bumped 14→16 and
  3 InlineData rows added for the audit + bundle command groups.
- Commons-020: new TransportRecordsTests covers BundleManifest /
  ExportSelection / ImportPreview / ImportResolution / ImportResult —
  ctor + System.Text.Json round-trip + record-equality (14 tests).
- CD-024: SPLIT-RANGE failure-continuation now under
  EnsureLookahead_SecondSplitThrows_LoopAborts_FirstBoundaryStillCommitted
  (Skippable MS-SQL fixture); production-shape rowversion delete
  asserted by DeleteDeploymentRecord_CurrentRowVersion_StubAttachPath_DeleteSucceeds.
- CentralUI-033: new QueryStringDrillInTests with 4 bUnit cases for
  Transport + SiteCalls drill-in / query-string handling.
- DM-024: probe actors (ReconcileProbeActor, SerializationProbeActor,
  ArtifactProbeActor) refactored from static fields to per-test instances
  (Interlocked on counter) — all 31 callers updated; no production
  changes required.
- HM-022: real-time PeriodicTimer test flake fixed by replacing
  fixed-budget Task.Delay with a RunLoopUntil poll-until-condition
  helper (5s/25ms). Production loop untouched.
- InboundAPI-023: new EndpointExtensionsTests covers the
  POST /api/{methodName} composition wiring via TestServer (7 cases:
  happy path, missing key 401, unknown method 403, invalid JSON 400,
  missing param 400, script-throws 500 sanitised, AuditActorItemKey
  stash invariant).
- MgmtSvc-021: 6 new ManagementActorTests cover the Transport bundle
  handlers (role gate for Export/Preview/Import, unknown-name
  ManagementCommandException, blocker-rejection, dedupe last-write-wins).
- SCA-006: SiteCallQueryRequest_StuckOnly_CursorAtNonStuckBoundary_SkipsToNextStuckRow
  pins the missing boundary case.
- SEL-023: stress-test `bool stop` promoted to `volatile bool` for
  cross-thread visibility under release/JIT.

Verify-only resolutions:
- NS-024: closed by NS-019 (commit ac96b83 deletion of
  NotificationDeliveryService + its test file). No edits needed.
- NotifOutbox-008: FallbackMaxRetries/FallbackRetryDelay are private
  forward-compat constants returned only when no SMTP-config row exists
  (in which case EmailNotificationDeliveryAdapter returns Permanent,
  bypassing the values entirely). Marked Resolved with note.
- Transport-010: Overwrite child-collection sync covered by the T-001/
  T-002 tests added in commit e3ca9af; per-IP throttle by
  BundleUnlockRateLimiterTests; failed-session retention by
  BundleSessionStoreTests; T-009 closed structurally via AsyncLocal.
  Marked Resolved by reference.

Build clean; all 11 affected test suites green. README regenerated:
33 open (was 46).
2026-05-28 08:21:03 -04:00

283 lines
12 KiB
C#

using Microsoft.Extensions.Logging.Abstractions;
using Microsoft.Extensions.Options;
using ScadaLink.Commons.Messages.Health;
namespace ScadaLink.HealthMonitoring.Tests;
/// <summary>
/// HealthMonitoring-009 regression: the central self-report loop had no test
/// coverage at all. These tests exercise leader-only gating (SelfIsPrimary),
/// self-report generation for siteId="central", and monotonic sequence
/// assignment.
/// </summary>
public class CentralHealthReportLoopTests
{
private sealed class FakeClusterNodeProvider : IClusterNodeProvider
{
public bool SelfIsPrimary { get; set; }
public IReadOnlyList<NodeStatus> Nodes { get; set; } = [];
public IReadOnlyList<NodeStatus> GetClusterNodes() => Nodes;
}
private sealed class RecordingAggregator : ICentralHealthAggregator
{
public List<SiteHealthReport> Processed { get; } = [];
public void ProcessReport(SiteHealthReport report) => Processed.Add(report);
public void MarkHeartbeat(string siteId, DateTimeOffset receivedAt) { }
public IReadOnlyDictionary<string, SiteHealthState> GetAllSiteStates() =>
new Dictionary<string, SiteHealthState>();
public SiteHealthState? GetSiteState(string siteId) => null;
}
/// <summary>
/// HealthMonitoring-022 de-flake: <see cref="CentralHealthReportLoop"/>'s
/// internal cadence is a real <see cref="PeriodicTimer"/>, so the loop is
/// timing-sensitive. We can't drive a virtual clock (PeriodicTimer doesn't
/// consume <see cref="TimeProvider"/>) without refactoring the production
/// loop, so we keep wall-clock waits but use a *generous* budget: a 5 s
/// outer cancellation cap with a poll-until-condition wait, instead of a
/// fixed <see cref="Task.Delay"/> that fails fast on a slow CI runner. The
/// loop's <c>ReportInterval</c> is set to 50 ms in each test, so under
/// normal conditions the condition is met almost immediately; under heavy
/// CI load the poll loop tolerates the slow tick instead of asserting on a
/// timed-out empty list.
/// </summary>
private static async Task RunLoopUntil(
CentralHealthReportLoop loop,
Func<bool> condition,
TimeSpan? maxWait = null)
{
var deadline = maxWait ?? TimeSpan.FromSeconds(5);
using var cts = new CancellationTokenSource(deadline + TimeSpan.FromSeconds(1));
try
{
await loop.StartAsync(cts.Token);
var sw = System.Diagnostics.Stopwatch.StartNew();
while (sw.Elapsed < deadline && !condition())
{
await Task.Delay(25, CancellationToken.None);
}
await loop.StopAsync(CancellationToken.None);
}
catch (OperationCanceledException) { }
}
/// <summary>
/// Used by tests that need the loop to run for a bounded period without
/// waiting on a specific condition (e.g. asserting <i>no</i> reports were
/// produced). The wait is generous (1 s default) — see
/// <see cref="RunLoopUntil"/> for the rationale.
/// </summary>
private static async Task RunLoopBriefly(CentralHealthReportLoop loop, int runForMs)
{
var totalMs = Math.Max(runForMs, 1000);
using var cts = new CancellationTokenSource(TimeSpan.FromMilliseconds(totalMs + 1000));
try
{
await loop.StartAsync(cts.Token);
await Task.Delay(totalMs, CancellationToken.None);
await loop.StopAsync(CancellationToken.None);
}
catch (OperationCanceledException) { }
}
[Fact]
public async Task GeneratesCentralReports_WhenSelfIsPrimary()
{
var collector = new SiteHealthCollector();
var aggregator = new RecordingAggregator();
var clusterNodes = new FakeClusterNodeProvider { SelfIsPrimary = true };
var options = Options.Create(new HealthMonitoringOptions
{
ReportInterval = TimeSpan.FromMilliseconds(50)
});
var loop = new CentralHealthReportLoop(
collector, aggregator, clusterNodes, options,
NullLogger<CentralHealthReportLoop>.Instance);
// HealthMonitoring-022: wait up to 5 s for at least one report to fire
// rather than fixed-budget Task.Delay; tolerates slow CI runners.
await RunLoopUntil(loop, () => aggregator.Processed.Count >= 1);
Assert.NotEmpty(aggregator.Processed);
Assert.All(aggregator.Processed,
r => Assert.Equal(CentralHealthReportLoop.CentralSiteId, r.SiteId));
}
[Fact]
public async Task GeneratesNoReports_WhenNotPrimary()
{
var collector = new SiteHealthCollector();
var aggregator = new RecordingAggregator();
var clusterNodes = new FakeClusterNodeProvider { SelfIsPrimary = false };
var options = Options.Create(new HealthMonitoringOptions
{
ReportInterval = TimeSpan.FromMilliseconds(50)
});
var loop = new CentralHealthReportLoop(
collector, aggregator, clusterNodes, options,
NullLogger<CentralHealthReportLoop>.Instance);
await RunLoopBriefly(loop, 250);
Assert.Empty(aggregator.Processed);
}
[Fact]
public async Task AssignsMonotonicSequenceNumbers()
{
var collector = new SiteHealthCollector();
var aggregator = new RecordingAggregator();
var clusterNodes = new FakeClusterNodeProvider { SelfIsPrimary = true };
var options = Options.Create(new HealthMonitoringOptions
{
ReportInterval = TimeSpan.FromMilliseconds(50)
});
var loop = new CentralHealthReportLoop(
collector, aggregator, clusterNodes, options,
NullLogger<CentralHealthReportLoop>.Instance);
// HealthMonitoring-022: wait up to 5 s for at least 2 reports rather
// than a fixed 300 ms window that could miss the second tick on a
// slow CI runner; the assertion below proves the sequence is monotonic.
await RunLoopUntil(loop, () => aggregator.Processed.Count >= 2);
Assert.True(aggregator.Processed.Count >= 2,
$"Expected at least 2 reports, got {aggregator.Processed.Count}");
for (int i = 1; i < aggregator.Processed.Count; i++)
{
Assert.True(
aggregator.Processed[i].SequenceNumber > aggregator.Processed[i - 1].SequenceNumber,
$"Sequence numbers not strictly increasing at index {i}");
}
}
/// <summary>
/// HealthMonitoring-006 regression: the central loop's sequence-number seed
/// must be derived from the injected <see cref="TimeProvider"/> (Unix-ms),
/// not from <c>DateTimeOffset.UtcNow</c> read at field initialization, so the
/// seeding strategy is deterministically testable.
/// </summary>
[Fact]
public void SequenceNumberSeed_UsesInjectedTimeProvider()
{
var fixedInstant = new DateTimeOffset(2026, 5, 16, 12, 0, 0, TimeSpan.Zero);
var timeProvider = new TestTimeProvider(fixedInstant);
var loop = new CentralHealthReportLoop(
new SiteHealthCollector(),
new RecordingAggregator(),
new FakeClusterNodeProvider { SelfIsPrimary = true },
Options.Create(new HealthMonitoringOptions()),
NullLogger<CentralHealthReportLoop>.Instance,
timeProvider);
Assert.Equal(fixedInstant.ToUnixTimeMilliseconds(), loop.CurrentSequenceNumber);
}
/// <summary>
/// HealthMonitoring-018 regression: when <see cref="ICentralHealthAggregator.ProcessReport"/>
/// throws, the per-interval counters that
/// <see cref="SiteHealthCollector.CollectReport"/> just drained must be
/// restored back into the shared collector so they roll forward into the
/// next interval rather than being silently lost. Same shape as the
/// HealthMonitoring-017 fix in <see cref="HealthReportSender"/>.
/// </summary>
[Fact]
public async Task ProcessReportFailure_PreservesIntervalCountersForNextReport()
{
var collector = new SiteHealthCollector();
// Pre-populate every per-interval counter so the restore path on each
// field is exercised. The loop's first iteration will read-and-reset
// these via CollectReport, then ProcessReport will throw, and the
// restore must put them back.
collector.IncrementScriptError();
collector.IncrementScriptError();
collector.IncrementAlarmError();
collector.IncrementDeadLetter();
collector.IncrementDeadLetter();
collector.IncrementDeadLetter();
collector.IncrementSiteAuditWriteFailures();
collector.IncrementAuditRedactionFailure();
collector.IncrementAuditRedactionFailure();
var aggregator = new FailingThenSucceedingAggregator();
var clusterNodes = new FakeClusterNodeProvider { SelfIsPrimary = true };
var options = Options.Create(new HealthMonitoringOptions
{
ReportInterval = TimeSpan.FromMilliseconds(50)
});
var loop = new CentralHealthReportLoop(
collector, aggregator, clusterNodes, options,
NullLogger<CentralHealthReportLoop>.Instance);
// HealthMonitoring-022: the first ProcessReport call throws (counters
// get restored), the second succeeds. Wait up to 5 s for that second
// (successful) call rather than a fixed 450 ms budget.
await RunLoopUntil(loop, () => aggregator.Processed.Count >= 1);
// First call threw, later succeeded — the first successful report
// must carry the previously-failed interval's accumulated counts.
Assert.NotEmpty(aggregator.Processed);
var firstSuccess = aggregator.Processed[0];
Assert.Equal(2, firstSuccess.ScriptErrorCount);
Assert.Equal(1, firstSuccess.AlarmEvaluationErrorCount);
Assert.Equal(3, firstSuccess.DeadLetterCount);
Assert.Equal(1, firstSuccess.SiteAuditWriteFailures);
Assert.Equal(2, firstSuccess.AuditRedactionFailure);
}
/// <summary>
/// <see cref="ICentralHealthAggregator"/> whose first <c>ProcessReport</c>
/// call throws (only the first), then subsequent calls succeed. Used by
/// <see cref="ProcessReportFailure_PreservesIntervalCountersForNextReport"/>
/// to verify the HealthMonitoring-018 restore-on-failure path.
/// </summary>
private sealed class FailingThenSucceedingAggregator : ICentralHealthAggregator
{
private int _callCount;
public List<SiteHealthReport> Processed { get; } = [];
public void ProcessReport(SiteHealthReport report)
{
var n = Interlocked.Increment(ref _callCount);
if (n == 1)
{
throw new InvalidOperationException("aggregator temporarily unavailable");
}
Processed.Add(report);
}
public void MarkHeartbeat(string siteId, DateTimeOffset receivedAt) { }
public IReadOnlyDictionary<string, SiteHealthState> GetAllSiteStates() =>
new Dictionary<string, SiteHealthState>();
public SiteHealthState? GetSiteState(string siteId) => null;
}
[Fact]
public async Task SetsActiveNodeFlag_EvenWhenNotPrimary()
{
// The loop must still report the node's role to the collector when it is
// the standby, so the standby's own node card shows the correct role.
var collector = new SiteHealthCollector();
var aggregator = new RecordingAggregator();
var clusterNodes = new FakeClusterNodeProvider { SelfIsPrimary = false };
var options = Options.Create(new HealthMonitoringOptions
{
ReportInterval = TimeSpan.FromMilliseconds(50)
});
var loop = new CentralHealthReportLoop(
collector, aggregator, clusterNodes, options,
NullLogger<CentralHealthReportLoop>.Instance);
await RunLoopBriefly(loop, 150);
Assert.False(collector.IsActiveNode);
}
}