fix(health-monitoring): resolve HealthMonitoring-013,014,016 — shorter-timeout cadence, options validation, injected TimeProvider; HealthMonitoring-015 left open (cross-module design decision)

This commit is contained in:
Joseph Doherty
2026-05-17 03:18:24 -04:00
parent da8c9f171b
commit eae4077414
8 changed files with 296 additions and 12 deletions

View File

@@ -191,9 +191,10 @@ public class CentralHealthAggregator : BackgroundService, ICentralHealthAggregat
"Central health aggregator started, offline timeout {Timeout}s (central {CentralTimeout}s)",
_options.OfflineTimeout.TotalSeconds, _options.CentralOfflineTimeout.TotalSeconds);
// Check at half the (shorter) offline timeout interval for timely detection
var checkInterval = TimeSpan.FromMilliseconds(_options.OfflineTimeout.TotalMilliseconds / 2);
using var timer = new PeriodicTimer(checkInterval);
// Check at half the shorter of the two offline timeouts so detection is
// timely for whichever site class (real or "central") has the tighter
// window — see ComputeCheckInterval.
using var timer = new PeriodicTimer(ComputeCheckInterval(_options));
while (await timer.WaitForNextTickAsync(stoppingToken).ConfigureAwait(false))
{
@@ -201,6 +202,24 @@ public class CentralHealthAggregator : BackgroundService, ICentralHealthAggregat
}
}
/// <summary>
/// Computes the offline-check timer cadence: half of the <em>shorter</em> of
/// <see cref="HealthMonitoringOptions.OfflineTimeout"/> and
/// <see cref="HealthMonitoringOptions.CentralOfflineTimeout"/>. Deriving it
/// from the shorter timeout guarantees that whichever site class has the
/// tighter window is still polled at least twice within it — so if an
/// operator configures <c>CentralOfflineTimeout</c> smaller than
/// <c>OfflineTimeout</c>, central offline detection is not delayed by up to a
/// full <c>OfflineTimeout / 2</c>.
/// </summary>
internal static TimeSpan ComputeCheckInterval(HealthMonitoringOptions options)
{
var shorter = options.OfflineTimeout < options.CentralOfflineTimeout
? options.OfflineTimeout
: options.CentralOfflineTimeout;
return TimeSpan.FromMilliseconds(shorter.TotalMilliseconds / 2);
}
internal void CheckForOfflineSites()
{
var now = _timeProvider.GetUtcNow();