using System.Net; using System.Text; using System.Text.Json; using Microsoft.Extensions.Logging; using ZB.MOM.WW.OtOpcUa.Core.Abstractions; using ZB.MOM.WW.OtOpcUa.Core.Hosting; using ZB.MOM.WW.OtOpcUa.Core.Observability; namespace ZB.MOM.WW.OtOpcUa.Server.Observability; /// /// Standalone host for /healthz and /readyz /// separate from the OPC UA binding. Per docs/v2/implementation/phase-6-1-resilience- /// and-observability.md §Stream C.1. /// /// /// Binds to http://localhost:4841 by default — loopback avoids the Windows URL-ACL /// elevation requirement that binding to http://+:4841 (wildcard) would impose. /// When a deployment needs remote probing, a reverse proxy or explicit netsh urlacl grant /// is the expected path; documented in docs/v2/Server-Deployment.md in a follow-up. /// public sealed class HealthEndpointsHost : IAsyncDisposable { private readonly string _prefix; private readonly DriverHost _driverHost; private readonly Func _configDbHealthy; private readonly Func _usingStaleConfig; private readonly ILogger _logger; private readonly HttpListener _listener = new(); private readonly DateTime _startedUtc = DateTime.UtcNow; private CancellationTokenSource? _cts; private Task? _acceptLoop; private bool _disposed; public HealthEndpointsHost( DriverHost driverHost, ILogger logger, Func? configDbHealthy = null, Func? usingStaleConfig = null, string prefix = "http://localhost:4841/") { _driverHost = driverHost; _logger = logger; _configDbHealthy = configDbHealthy ?? (() => true); _usingStaleConfig = usingStaleConfig ?? (() => false); _prefix = prefix.EndsWith('/') ? prefix : prefix + "/"; _listener.Prefixes.Add(_prefix); } public void Start() { _listener.Start(); _cts = new CancellationTokenSource(); _acceptLoop = Task.Run(() => AcceptLoopAsync(_cts.Token)); _logger.LogInformation("Health endpoints listening on {Prefix}", _prefix); } private async Task AcceptLoopAsync(CancellationToken ct) { while (!ct.IsCancellationRequested) { HttpListenerContext ctx; try { ctx = await _listener.GetContextAsync().ConfigureAwait(false); } catch (HttpListenerException) when (ct.IsCancellationRequested) { break; } catch (ObjectDisposedException) { break; } _ = Task.Run(() => HandleAsync(ctx), ct); } } private async Task HandleAsync(HttpListenerContext ctx) { try { var path = ctx.Request.Url?.AbsolutePath ?? "/"; switch (path) { case "/healthz": await WriteHealthzAsync(ctx).ConfigureAwait(false); break; case "/readyz": await WriteReadyzAsync(ctx).ConfigureAwait(false); break; default: ctx.Response.StatusCode = 404; break; } } catch (Exception ex) { _logger.LogWarning(ex, "Health endpoint handler failure"); try { ctx.Response.StatusCode = 500; } catch { /* ignore */ } } finally { try { ctx.Response.Close(); } catch { /* ignore */ } } } private async Task WriteHealthzAsync(HttpListenerContext ctx) { var configHealthy = _configDbHealthy(); var staleConfig = _usingStaleConfig(); // /healthz is 200 when process alive + (config DB reachable OR cache-warm). // Stale-config still serves 200 so the process isn't flagged dead when the DB // blips; the body surfaces the stale flag for operators. var healthy = configHealthy || staleConfig; ctx.Response.StatusCode = healthy ? 200 : 503; var body = JsonSerializer.Serialize(new { status = healthy ? "healthy" : "unhealthy", uptimeSeconds = (int)(DateTime.UtcNow - _startedUtc).TotalSeconds, configDbReachable = configHealthy, usingStaleConfig = staleConfig, }); await WriteBodyAsync(ctx, body).ConfigureAwait(false); } private async Task WriteReadyzAsync(HttpListenerContext ctx) { var snapshots = BuildSnapshots(); var verdict = DriverHealthReport.Aggregate(snapshots); ctx.Response.StatusCode = DriverHealthReport.HttpStatus(verdict); var body = JsonSerializer.Serialize(new { verdict = verdict.ToString(), uptimeSeconds = (int)(DateTime.UtcNow - _startedUtc).TotalSeconds, drivers = snapshots.Select(d => new { id = d.DriverInstanceId, state = d.State.ToString(), detail = d.DetailMessage, }).ToArray(), degradedDrivers = snapshots .Where(d => d.State == DriverState.Degraded || d.State == DriverState.Reconnecting) .Select(d => d.DriverInstanceId) .ToArray(), }); await WriteBodyAsync(ctx, body).ConfigureAwait(false); } private IReadOnlyList BuildSnapshots() { var list = new List(); foreach (var id in _driverHost.RegisteredDriverIds) { var driver = _driverHost.GetDriver(id); if (driver is null) continue; var health = driver.GetHealth(); list.Add(new DriverHealthSnapshot(driver.DriverInstanceId, health.State, health.LastError)); } return list; } private static async Task WriteBodyAsync(HttpListenerContext ctx, string body) { var bytes = Encoding.UTF8.GetBytes(body); ctx.Response.ContentType = "application/json; charset=utf-8"; ctx.Response.ContentLength64 = bytes.LongLength; await ctx.Response.OutputStream.WriteAsync(bytes).ConfigureAwait(false); } public async ValueTask DisposeAsync() { if (_disposed) return; _disposed = true; _cts?.Cancel(); try { _listener.Stop(); } catch { /* ignore */ } if (_acceptLoop is not null) { try { await _acceptLoop.ConfigureAwait(false); } catch { /* ignore */ } } _listener.Close(); _cts?.Dispose(); } }