using Microsoft.EntityFrameworkCore; using Microsoft.Extensions.Hosting; using Microsoft.Extensions.Logging; using ZB.MOM.WW.OtOpcUa.Configuration; using ZB.MOM.WW.OtOpcUa.Configuration.Entities; using ZB.MOM.WW.OtOpcUa.Core.Resilience; namespace ZB.MOM.WW.OtOpcUa.Server.Hosting; /// /// Samples at a fixed tick + upserts each /// (DriverInstanceId, HostName) snapshot into /// so Admin /hosts can render live resilience counters across restarts. /// /// /// Closes the HostedService piece of Phase 6.1 Stream E.2 flagged as a follow-up /// when the tracker shipped in PR #82. The Admin UI column-refresh piece (red badge when /// ConsecutiveFailures > breakerThreshold / 2 + SignalR push) is still deferred to /// the visual-compliance pass — this service owns the persistence half alone. /// /// Tick interval defaults to 5 s. Persistence is best-effort: a DB outage during /// a tick logs + continues; the next tick tries again with the latest snapshots. The /// hosted service never crashes the app on sample failure. /// /// factored as a public method so tests can drive /// it directly, matching the /// pattern for deterministic unit-test timing. /// public sealed class ResilienceStatusPublisherHostedService : BackgroundService { private readonly DriverResilienceStatusTracker _tracker; private readonly IDbContextFactory _dbContextFactory; private readonly ILogger _logger; private readonly TimeProvider _timeProvider; /// Tick interval — how often the tracker snapshot is persisted. public TimeSpan TickInterval { get; } /// Snapshot of the tick count for diagnostics + test assertions. public int TickCount { get; private set; } public ResilienceStatusPublisherHostedService( DriverResilienceStatusTracker tracker, IDbContextFactory dbContextFactory, ILogger logger, TimeProvider? timeProvider = null, TimeSpan? tickInterval = null) { ArgumentNullException.ThrowIfNull(tracker); ArgumentNullException.ThrowIfNull(dbContextFactory); _tracker = tracker; _dbContextFactory = dbContextFactory; _logger = logger; _timeProvider = timeProvider ?? TimeProvider.System; TickInterval = tickInterval ?? TimeSpan.FromSeconds(5); } protected override async Task ExecuteAsync(CancellationToken stoppingToken) { _logger.LogInformation( "ResilienceStatusPublisherHostedService starting — tick interval = {Interval}", TickInterval); while (!stoppingToken.IsCancellationRequested) { try { await Task.Delay(TickInterval, _timeProvider, stoppingToken).ConfigureAwait(false); } catch (OperationCanceledException) when (stoppingToken.IsCancellationRequested) { break; } await PersistOnceAsync(stoppingToken).ConfigureAwait(false); } _logger.LogInformation("ResilienceStatusPublisherHostedService stopping after {TickCount} tick(s).", TickCount); } /// /// Take one snapshot of the tracker + upsert each pair into the persistence table. /// Swallows transient exceptions + logs them; never throws from a sample failure. /// public async Task PersistOnceAsync(CancellationToken cancellationToken) { TickCount++; var snapshot = _tracker.Snapshot(); if (snapshot.Count == 0) return; try { await using var db = await _dbContextFactory.CreateDbContextAsync(cancellationToken).ConfigureAwait(false); var now = _timeProvider.GetUtcNow().UtcDateTime; foreach (var (driverInstanceId, hostName, counters) in snapshot) { var existing = await db.DriverInstanceResilienceStatuses .FirstOrDefaultAsync(x => x.DriverInstanceId == driverInstanceId && x.HostName == hostName, cancellationToken) .ConfigureAwait(false); if (existing is null) { db.DriverInstanceResilienceStatuses.Add(new DriverInstanceResilienceStatus { DriverInstanceId = driverInstanceId, HostName = hostName, LastCircuitBreakerOpenUtc = counters.LastBreakerOpenUtc, ConsecutiveFailures = counters.ConsecutiveFailures, CurrentBulkheadDepth = counters.CurrentInFlight, LastRecycleUtc = counters.LastRecycleUtc, BaselineFootprintBytes = counters.BaselineFootprintBytes, CurrentFootprintBytes = counters.CurrentFootprintBytes, LastSampledUtc = now, }); } else { existing.LastCircuitBreakerOpenUtc = counters.LastBreakerOpenUtc; existing.ConsecutiveFailures = counters.ConsecutiveFailures; existing.CurrentBulkheadDepth = counters.CurrentInFlight; existing.LastRecycleUtc = counters.LastRecycleUtc; existing.BaselineFootprintBytes = counters.BaselineFootprintBytes; existing.CurrentFootprintBytes = counters.CurrentFootprintBytes; existing.LastSampledUtc = now; } } await db.SaveChangesAsync(cancellationToken).ConfigureAwait(false); } catch (OperationCanceledException) { throw; } catch (Exception ex) { _logger.LogWarning(ex, "ResilienceStatusPublisher persistence tick failed; next tick will retry with latest snapshots."); } } }