using Microsoft.EntityFrameworkCore; using ZB.MOM.WW.OtOpcUa.Configuration; using ZB.MOM.WW.OtOpcUa.Configuration.Entities; using ZB.MOM.WW.OtOpcUa.Configuration.Enums; namespace ZB.MOM.WW.OtOpcUa.Admin.Services; /// /// One row per record, enriched with the owning /// ClusterNode.ClusterId (left-join) + the per-(DriverInstanceId, HostName) /// counters (also left-join) so the Admin /// /hosts page renders the resilience surface inline with host state. /// public sealed record HostStatusRow( string NodeId, string? ClusterId, string DriverInstanceId, string HostName, DriverHostState State, DateTime StateChangedUtc, DateTime LastSeenUtc, string? Detail, int ConsecutiveFailures, DateTime? LastCircuitBreakerOpenUtc, int CurrentBulkheadDepth, DateTime? LastRecycleUtc); /// /// Read-side service for the Admin UI's per-host drill-down. Loads /// rows (written by the Server process's /// HostStatusPublisher) and left-joins ClusterNode so each row knows which /// cluster it belongs to — the Admin UI groups by cluster for the fleet-wide view. /// /// /// The publisher heartbeat is 10s (HostStatusPublisher.HeartbeatInterval). The /// Admin page also polls every ~10s and treats rows with LastSeenUtc older than /// StaleThreshold (30s) as stale — covers a missed heartbeat tolerance plus /// a generous buffer for clock skew and publisher GC pauses. /// public sealed class HostStatusService(OtOpcUaConfigDbContext db) { public static readonly TimeSpan StaleThreshold = TimeSpan.FromSeconds(30); /// Consecutive-failure threshold at which returns true /// so the Admin UI can paint a red badge. Matches Phase 6.1 decision #143's conservative /// half-of-breaker-threshold convention — flags before the breaker actually opens. public const int FailureFlagThreshold = 3; public async Task> ListAsync(CancellationToken ct = default) { // Two LEFT JOINs: // 1. ClusterNodes on NodeId — row persists even when its owning ClusterNode row // hasn't been created yet (first-boot bootstrap case). // 2. DriverInstanceResilienceStatuses on (DriverInstanceId, HostName) — resilience // counters haven't been sampled yet for brand-new hosts, so a missing row means // zero failures + never-opened breaker. var rows = await (from s in db.DriverHostStatuses.AsNoTracking() join n in db.ClusterNodes.AsNoTracking() on s.NodeId equals n.NodeId into nodeJoin from n in nodeJoin.DefaultIfEmpty() join r in db.DriverInstanceResilienceStatuses.AsNoTracking() on new { s.DriverInstanceId, s.HostName } equals new { r.DriverInstanceId, r.HostName } into resilJoin from r in resilJoin.DefaultIfEmpty() orderby s.NodeId, s.DriverInstanceId, s.HostName select new HostStatusRow( s.NodeId, n != null ? n.ClusterId : null, s.DriverInstanceId, s.HostName, s.State, s.StateChangedUtc, s.LastSeenUtc, s.Detail, r != null ? r.ConsecutiveFailures : 0, r != null ? r.LastCircuitBreakerOpenUtc : null, r != null ? r.CurrentBulkheadDepth : 0, r != null ? r.LastRecycleUtc : null)).ToListAsync(ct); return rows; } public static bool IsStale(HostStatusRow row) => DateTime.UtcNow - row.LastSeenUtc > StaleThreshold; /// /// Red-badge predicate — true when the host has accumulated enough consecutive /// failures that an operator should take notice before the breaker trips. /// public static bool IsFlagged(HostStatusRow row) => row.ConsecutiveFailures >= FailureFlagThreshold; }