fix(health-monitoring): resolve HealthMonitoring-004,006,010,011,012 — heartbeat-doc accuracy, testable sequence seeding, logged failures, dead-code removal

This commit is contained in:
Joseph Doherty
2026-05-16 22:14:23 -04:00
parent e57ccd78b7
commit 2d7ac5b57f
9 changed files with 260 additions and 35 deletions

View File

@@ -1,4 +1,5 @@
using Microsoft.Data.Sqlite;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Logging.Abstractions;
using Microsoft.Extensions.Options;
using ScadaLink.Commons.Messages.Health;
@@ -20,6 +21,44 @@ public class HealthReportSenderTests
public string SiteId { get; set; } = "test-site";
}
/// <summary>
/// Captures emitted log entries so tests can assert that non-fatal failures
/// are surfaced (HealthMonitoring-010) rather than silently swallowed.
/// </summary>
private sealed class CapturingLogger<T> : ILogger<T>
{
public sealed record Entry(LogLevel Level, string Message, Exception? Exception);
public List<Entry> Entries { get; } = [];
public IDisposable BeginScope<TState>(TState state) where TState : notnull => NullScope.Instance;
public bool IsEnabled(LogLevel logLevel) => true;
public void Log<TState>(
LogLevel logLevel, EventId eventId, TState state, Exception? exception,
Func<TState, Exception?, string> formatter)
{
lock (Entries)
{
Entries.Add(new Entry(logLevel, formatter(state, exception), exception));
}
}
private sealed class NullScope : IDisposable
{
public static readonly NullScope Instance = new();
public void Dispose() { }
}
}
/// <summary>An <see cref="IClusterNodeProvider"/> whose query always throws.</summary>
private sealed class ThrowingClusterNodeProvider : IClusterNodeProvider
{
public bool SelfIsPrimary => true;
public IReadOnlyList<NodeStatus> GetClusterNodes() =>
throw new InvalidOperationException("cluster query failed");
}
[Fact]
public async Task SendsReportsWithMonotonicSequenceNumbers()
{
@@ -226,4 +265,76 @@ public class HealthReportSenderTests
Assert.InRange(sender.CurrentSequenceNumber, beforeCtor, afterCtor);
}
/// <summary>
/// HealthMonitoring-010 regression: a failure refreshing cluster nodes is
/// non-fatal (the report still ships) but must no longer be swallowed by a
/// bare <c>catch {}</c> — it must be logged as a warning with the exception so
/// persistent degradation is diagnosable.
/// </summary>
[Fact]
public async Task ClusterNodeRefreshFailure_IsLoggedNotSwallowed()
{
var transport = new FakeTransport();
var collector = new SiteHealthCollector();
collector.SetActiveNode(true);
var logger = new CapturingLogger<HealthReportSender>();
var options = Options.Create(new HealthMonitoringOptions
{
ReportInterval = TimeSpan.FromMilliseconds(50)
});
var sender = new HealthReportSender(
collector,
transport,
options,
logger,
new FakeSiteIdentityProvider(),
clusterNodeProvider: new ThrowingClusterNodeProvider());
using var cts = new CancellationTokenSource(TimeSpan.FromMilliseconds(300));
try
{
await sender.StartAsync(cts.Token);
await Task.Delay(250, CancellationToken.None);
await sender.StopAsync(CancellationToken.None);
}
catch (OperationCanceledException) { }
// The report loop continues despite the failure...
Assert.NotEmpty(transport.SentReports);
// ...but the failure is surfaced as a warning carrying the exception.
CapturingLogger<HealthReportSender>.Entry[] warnings;
lock (logger.Entries)
{
warnings = logger.Entries
.Where(e => e.Level == LogLevel.Warning && e.Exception is InvalidOperationException)
.ToArray();
}
Assert.NotEmpty(warnings);
Assert.Contains(warnings, w => w.Message.Contains("cluster nodes", StringComparison.OrdinalIgnoreCase));
}
/// <summary>
/// HealthMonitoring-006 regression: the sequence-number seed must be derived
/// from the injected <see cref="TimeProvider"/> so the Unix-ms seeding strategy
/// is deterministically testable and the clock dependency is explicit, rather
/// than reading <c>DateTimeOffset.UtcNow</c> directly at field initialization.
/// </summary>
[Fact]
public void SequenceNumberSeed_UsesInjectedTimeProvider()
{
var fixedInstant = new DateTimeOffset(2026, 5, 16, 12, 0, 0, TimeSpan.Zero);
var timeProvider = new TestTimeProvider(fixedInstant);
var sender = new HealthReportSender(
new SiteHealthCollector(),
new FakeTransport(),
Options.Create(new HealthMonitoringOptions()),
NullLogger<HealthReportSender>.Instance,
new FakeSiteIdentityProvider(),
timeProvider: timeProvider);
Assert.Equal(fixedInstant.ToUnixTimeMilliseconds(), sender.CurrentSequenceNumber);
}
}