using System.Collections.Concurrent; using Microsoft.Extensions.Hosting; using Microsoft.Extensions.Logging; using Microsoft.Extensions.Options; using ScadaLink.Commons.Messages.Health; namespace ScadaLink.HealthMonitoring; /// /// Central-side aggregator that receives health reports from all sites, /// tracks latest metrics in memory, and detects offline sites. /// No persistence — display-only for Central UI consumption. /// public class CentralHealthAggregator : BackgroundService, ICentralHealthAggregator { private readonly ConcurrentDictionary _siteStates = new(); private readonly HealthMonitoringOptions _options; private readonly ILogger _logger; private readonly TimeProvider _timeProvider; public CentralHealthAggregator( IOptions options, ILogger logger, TimeProvider? timeProvider = null) { _options = options.Value; _logger = logger; _timeProvider = timeProvider ?? TimeProvider.System; } /// /// Process an incoming health report from a site. /// Only replaces stored state if incoming sequence number is greater than last received. /// Auto-marks previously offline sites as online. /// public void ProcessReport(SiteHealthReport report) { var now = _timeProvider.GetUtcNow(); _siteStates.AddOrUpdate( report.SiteId, _ => { _logger.LogInformation("Site {SiteId} registered with sequence #{Seq}", report.SiteId, report.SequenceNumber); return new SiteHealthState { SiteId = report.SiteId, LatestReport = report, LastReportReceivedAt = now, LastSequenceNumber = report.SequenceNumber, IsOnline = true }; }, (_, existing) => { if (report.SequenceNumber <= existing.LastSequenceNumber) { _logger.LogDebug( "Rejecting stale report from site {SiteId}: seq {Incoming} <= {Last}", report.SiteId, report.SequenceNumber, existing.LastSequenceNumber); return existing; } var wasOffline = !existing.IsOnline; existing.LatestReport = report; existing.LastReportReceivedAt = now; existing.LastSequenceNumber = report.SequenceNumber; existing.IsOnline = true; if (wasOffline) { _logger.LogInformation("Site {SiteId} is back online (seq #{Seq})", report.SiteId, report.SequenceNumber); } return existing; }); } /// /// Get the current health state for all known sites. /// public IReadOnlyDictionary GetAllSiteStates() { return new Dictionary(_siteStates); } /// /// Get the current health state for a specific site, or null if unknown. /// public SiteHealthState? GetSiteState(string siteId) { _siteStates.TryGetValue(siteId, out var state); return state; } /// /// Background task that periodically checks for offline sites. /// protected override async Task ExecuteAsync(CancellationToken stoppingToken) { _logger.LogInformation( "Central health aggregator started, offline timeout {Timeout}s", _options.OfflineTimeout.TotalSeconds); // Check at half the offline timeout interval for timely detection var checkInterval = TimeSpan.FromMilliseconds(_options.OfflineTimeout.TotalMilliseconds / 2); using var timer = new PeriodicTimer(checkInterval); while (await timer.WaitForNextTickAsync(stoppingToken).ConfigureAwait(false)) { CheckForOfflineSites(); } } internal void CheckForOfflineSites() { var now = _timeProvider.GetUtcNow(); foreach (var kvp in _siteStates) { var state = kvp.Value; if (!state.IsOnline) continue; var elapsed = now - state.LastReportReceivedAt; if (elapsed > _options.OfflineTimeout) { state.IsOnline = false; _logger.LogWarning( "Site {SiteId} marked offline — no report for {Elapsed}s (timeout: {Timeout}s)", state.SiteId, elapsed.TotalSeconds, _options.OfflineTimeout.TotalSeconds); } } } }