fix(health-monitoring): resolve HealthMonitoring-003..009 — central offline grace, register unknown-site heartbeats, test coverage
This commit is contained in:
@@ -4,4 +4,17 @@ public class HealthMonitoringOptions
|
||||
{
|
||||
public TimeSpan ReportInterval { get; set; } = TimeSpan.FromSeconds(30);
|
||||
public TimeSpan OfflineTimeout { get; set; } = TimeSpan.FromMinutes(1);
|
||||
|
||||
/// <summary>
|
||||
/// Offline timeout applied to the synthetic "central" site only. Real sites
|
||||
/// emit frequent heartbeats that keep <c>LastHeartbeatAt</c> fresh, so the
|
||||
/// normal <see cref="OfflineTimeout"/> only fires on genuine total loss. The
|
||||
/// "central" self-report has no heartbeat source — its only signal is the
|
||||
/// 30s <see cref="CentralHealthReportLoop"/>, so a single skipped/late
|
||||
/// self-report (leader GC pause, brief stall, mid-failover before the new
|
||||
/// leader's loop spins up) would flap it offline under the 60s site timeout.
|
||||
/// A longer central grace gives the equivalent of "one missed report" that
|
||||
/// the design doc grants real sites. Default: 3x the report interval.
|
||||
/// </summary>
|
||||
public TimeSpan CentralOfflineTimeout { get; set; } = TimeSpan.FromMinutes(3);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user