From 9d3599fbb606a2d22d14671b716efb0dc1e8b6da Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Sat, 28 Mar 2026 16:44:31 -0400 Subject: [PATCH] Add rich HTTP health endpoints for cluster monitoring Enhance /api/health with component-level health, ServiceLevel, and redundancy state for load balancer probes. Add /health HTML page for operators to monitor node health in clustered System Platform deployments. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../Status/StatusData.cs | 67 ++++++++ .../Status/StatusReportService.cs | 128 +++++++++++++++ .../Status/StatusWebServer.cs | 11 +- .../Status/StatusReportServiceTests.cs | 149 ++++++++++++++++++ .../Status/StatusWebServerTests.cs | 29 ++++ 5 files changed, 381 insertions(+), 3 deletions(-) diff --git a/src/ZB.MOM.WW.LmxOpcUa.Host/Status/StatusData.cs b/src/ZB.MOM.WW.LmxOpcUa.Host/Status/StatusData.cs index 3f4ad4d..ee82c15 100644 --- a/src/ZB.MOM.WW.LmxOpcUa.Host/Status/StatusData.cs +++ b/src/ZB.MOM.WW.LmxOpcUa.Host/Status/StatusData.cs @@ -201,6 +201,73 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.Status public List ServerUris { get; set; } = new(); } + /// + /// DTO for the /api/health endpoint. Includes component-level health, ServiceLevel, and redundancy state. + /// + public class HealthEndpointData + { + /// + /// Gets or sets the overall health status: Healthy, Degraded, or Unhealthy. + /// + public string Status { get; set; } = "Unknown"; + + /// + /// Gets or sets the computed OPC UA ServiceLevel byte (0-255). Only meaningful when redundancy is enabled. + /// + public byte ServiceLevel { get; set; } + + /// + /// Gets or sets whether redundancy is enabled. + /// + public bool RedundancyEnabled { get; set; } + + /// + /// Gets or sets this instance's redundancy role when enabled (Primary/Secondary), or null when disabled. + /// + public string? RedundancyRole { get; set; } + + /// + /// Gets or sets the redundancy mode when enabled (Warm/Hot), or null when disabled. + /// + public string? RedundancyMode { get; set; } + + /// + /// Gets or sets the per-component health breakdown. + /// + public ComponentHealth Components { get; set; } = new(); + + /// + /// Gets or sets the server uptime since the health endpoint was initialized. + /// + public string Uptime { get; set; } = ""; + + /// + /// Gets or sets the UTC timestamp of this health snapshot. + /// + public DateTime Timestamp { get; set; } = DateTime.UtcNow; + } + + /// + /// Per-component health breakdown for the health endpoint. + /// + public class ComponentHealth + { + /// + /// Gets or sets MXAccess runtime connectivity status. + /// + public string MxAccess { get; set; } = "Disconnected"; + + /// + /// Gets or sets Galaxy repository database connectivity status. + /// + public string Database { get; set; } = "Disconnected"; + + /// + /// Gets or sets OPC UA server status. + /// + public string OpcUaServer { get; set; } = "Stopped"; + } + /// /// Dashboard model for the status page footer. /// diff --git a/src/ZB.MOM.WW.LmxOpcUa.Host/Status/StatusReportService.cs b/src/ZB.MOM.WW.LmxOpcUa.Host/Status/StatusReportService.cs index 42c5e82..b82a42b 100644 --- a/src/ZB.MOM.WW.LmxOpcUa.Host/Status/StatusReportService.cs +++ b/src/ZB.MOM.WW.LmxOpcUa.Host/Status/StatusReportService.cs @@ -16,6 +16,7 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.Status { private readonly HealthCheckService _healthCheck; private readonly int _refreshIntervalSeconds; + private readonly DateTime _startTime = DateTime.UtcNow; private IMxAccessClient? _mxAccessClient; private PerformanceMetrics? _metrics; @@ -229,5 +230,132 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.Status var state = _mxAccessClient?.State ?? ConnectionState.Disconnected; return _healthCheck.IsHealthy(state, _metrics); } + + /// + /// Builds the rich health endpoint data including component health, ServiceLevel, and redundancy state. + /// + public HealthEndpointData GetHealthData() + { + var connectionState = _mxAccessClient?.State ?? ConnectionState.Disconnected; + var mxConnected = connectionState == ConnectionState.Connected; + var dbConnected = _galaxyStats?.DbConnected ?? false; + var health = _healthCheck.CheckHealth(connectionState, _metrics); + var uptime = DateTime.UtcNow - _startTime; + + var data = new HealthEndpointData + { + Status = health.Status, + RedundancyEnabled = _redundancyConfig?.Enabled ?? false, + Components = new ComponentHealth + { + MxAccess = connectionState.ToString(), + Database = dbConnected ? "Connected" : "Disconnected", + OpcUaServer = (_serverHost?.IsRunning ?? false) ? "Running" : "Stopped" + }, + Uptime = FormatUptime(uptime), + Timestamp = DateTime.UtcNow + }; + + if (_redundancyConfig != null && _redundancyConfig.Enabled) + { + var isPrimary = string.Equals(_redundancyConfig.Role, "Primary", StringComparison.OrdinalIgnoreCase); + var baseLevel = isPrimary + ? _redundancyConfig.ServiceLevelBase + : Math.Max(0, _redundancyConfig.ServiceLevelBase - 50); + var calculator = new ServiceLevelCalculator(); + + data.ServiceLevel = calculator.Calculate(baseLevel, mxConnected, dbConnected); + data.RedundancyRole = _redundancyConfig.Role; + data.RedundancyMode = _redundancyConfig.Mode; + } + else + { + // Non-redundant: 255 when healthy, 0 when both down + data.ServiceLevel = mxConnected ? (byte)255 : (byte)0; + } + + return data; + } + + /// + /// Generates the JSON payload for the /api/health endpoint. + /// + public string GenerateHealthJson() + { + var data = GetHealthData(); + return JsonSerializer.Serialize(data, new JsonSerializerOptions { WriteIndented = true }); + } + + /// + /// Generates a focused health status HTML page for operators and monitoring dashboards. + /// + public string GenerateHealthHtml() + { + var data = GetHealthData(); + var sb = new StringBuilder(); + + var statusColor = data.Status == "Healthy" ? "#00cc66" : data.Status == "Degraded" ? "#cccc33" : "#cc3333"; + var mxColor = data.Components.MxAccess == "Connected" ? "#00cc66" : "#cc3333"; + var dbColor = data.Components.Database == "Connected" ? "#00cc66" : "#cc3333"; + var uaColor = data.Components.OpcUaServer == "Running" ? "#00cc66" : "#cc3333"; + + sb.AppendLine(""); + sb.AppendLine(""); + sb.AppendLine($""); + sb.AppendLine("LmxOpcUa Health"); + sb.AppendLine(""); + + // Status badge + sb.AppendLine("
"); + sb.AppendLine($"
{data.Status.ToUpperInvariant()}
"); + sb.AppendLine("
"); + + // Service Level + sb.AppendLine($"
"); + sb.AppendLine("SERVICE LEVEL"); + sb.AppendLine($"{data.ServiceLevel}"); + sb.AppendLine("
"); + + // Redundancy info + if (data.RedundancyEnabled) + { + sb.AppendLine($"
Role: {data.RedundancyRole} | Mode: {data.RedundancyMode}
"); + } + + // Component health cards + sb.AppendLine("
"); + sb.AppendLine($"
MXAccess
{data.Components.MxAccess}
"); + sb.AppendLine($"
Galaxy Database
{data.Components.Database}
"); + sb.AppendLine($"
OPC UA Server
{data.Components.OpcUaServer}
"); + sb.AppendLine("
"); + + // Footer + sb.AppendLine($"
Uptime: {data.Uptime} | {data.Timestamp:O}
"); + + sb.AppendLine(""); + return sb.ToString(); + } + + private static string FormatUptime(TimeSpan ts) + { + if (ts.TotalDays >= 1) + return $"{(int)ts.TotalDays}d {ts.Hours}h {ts.Minutes}m"; + if (ts.TotalHours >= 1) + return $"{(int)ts.TotalHours}h {ts.Minutes}m"; + return $"{(int)ts.TotalMinutes}m {ts.Seconds}s"; + } } } diff --git a/src/ZB.MOM.WW.LmxOpcUa.Host/Status/StatusWebServer.cs b/src/ZB.MOM.WW.LmxOpcUa.Host/Status/StatusWebServer.cs index 5dec508..59cb34b 100644 --- a/src/ZB.MOM.WW.LmxOpcUa.Host/Status/StatusWebServer.cs +++ b/src/ZB.MOM.WW.LmxOpcUa.Host/Status/StatusWebServer.cs @@ -120,14 +120,19 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.Status await WriteResponse(response, _reportService.GenerateHtml(), "text/html", 200); break; + case "/health": + await WriteResponse(response, _reportService.GenerateHealthHtml(), "text/html", 200); + break; + case "/api/status": await WriteResponse(response, _reportService.GenerateJson(), "application/json", 200); break; case "/api/health": - var isHealthy = _reportService.IsHealthy(); - var healthJson = isHealthy ? "{\"status\":\"healthy\"}" : "{\"status\":\"unhealthy\"}"; - await WriteResponse(response, healthJson, "application/json", isHealthy ? 200 : 503); + var healthData = _reportService.GetHealthData(); + var healthJson = _reportService.GenerateHealthJson(); + var healthStatusCode = healthData.Status == "Unhealthy" ? 503 : 200; + await WriteResponse(response, healthJson, "application/json", healthStatusCode); break; default: diff --git a/tests/ZB.MOM.WW.LmxOpcUa.Tests/Status/StatusReportServiceTests.cs b/tests/ZB.MOM.WW.LmxOpcUa.Tests/Status/StatusReportServiceTests.cs index bf4f99c..f0f0771 100644 --- a/tests/ZB.MOM.WW.LmxOpcUa.Tests/Status/StatusReportServiceTests.cs +++ b/tests/ZB.MOM.WW.LmxOpcUa.Tests/Status/StatusReportServiceTests.cs @@ -132,6 +132,139 @@ namespace ZB.MOM.WW.LmxOpcUa.Tests.Status sut.IsHealthy().ShouldBe(false); } + [Fact] + public void GetHealthData_WhenConnected_ReturnsHealthyStatus() + { + var sut = CreateService(); + var data = sut.GetHealthData(); + + data.Status.ShouldBe("Healthy"); + data.Components.MxAccess.ShouldBe("Connected"); + data.Components.Database.ShouldBe("Connected"); + } + + [Fact] + public void GetHealthData_WhenDisconnected_ReturnsUnhealthyStatus() + { + var mxClient = new FakeMxAccessClient { State = ConnectionState.Disconnected }; + var galaxyStats = new GalaxyRepositoryStats { DbConnected = false }; + var sut = new StatusReportService(new HealthCheckService(), 10); + sut.SetComponents(mxClient, null, galaxyStats, null); + + var data = sut.GetHealthData(); + + data.Status.ShouldBe("Unhealthy"); + data.ServiceLevel.ShouldBe((byte)0); + data.Components.MxAccess.ShouldBe("Disconnected"); + data.Components.Database.ShouldBe("Disconnected"); + } + + [Fact] + public void GetHealthData_NoRedundancy_ServiceLevel255WhenHealthy() + { + var sut = CreateService(); + var data = sut.GetHealthData(); + + data.RedundancyEnabled.ShouldBe(false); + data.ServiceLevel.ShouldBe((byte)255); + data.RedundancyRole.ShouldBeNull(); + data.RedundancyMode.ShouldBeNull(); + } + + [Fact] + public void GetHealthData_WithRedundancy_IncludesRoleAndServiceLevel() + { + var sut = CreateServiceWithRedundancy("Primary"); + var data = sut.GetHealthData(); + + data.RedundancyEnabled.ShouldBe(true); + data.RedundancyRole.ShouldBe("Primary"); + data.RedundancyMode.ShouldBe("Warm"); + data.ServiceLevel.ShouldBe((byte)200); + } + + [Fact] + public void GetHealthData_SecondaryRole_LowerServiceLevel() + { + var sut = CreateServiceWithRedundancy("Secondary"); + var data = sut.GetHealthData(); + + data.ServiceLevel.ShouldBe((byte)150); + } + + [Fact] + public void GetHealthData_ContainsUptime() + { + var sut = CreateService(); + var data = sut.GetHealthData(); + + data.Uptime.ShouldNotBeNullOrWhiteSpace(); + } + + [Fact] + public void GetHealthData_ContainsTimestamp() + { + var sut = CreateService(); + var data = sut.GetHealthData(); + + data.Timestamp.ShouldBeGreaterThan(DateTime.UtcNow.AddMinutes(-1)); + } + + [Fact] + public void GenerateHealthJson_ContainsExpectedFields() + { + var sut = CreateService(); + var json = sut.GenerateHealthJson(); + + json.ShouldContain("Status"); + json.ShouldContain("ServiceLevel"); + json.ShouldContain("Components"); + json.ShouldContain("MxAccess"); + json.ShouldContain("Database"); + json.ShouldContain("OpcUaServer"); + json.ShouldContain("Uptime"); + } + + [Fact] + public void GenerateHealthHtml_ContainsStatusBadge() + { + var sut = CreateService(); + var html = sut.GenerateHealthHtml(); + + html.ShouldContain("HEALTHY"); + html.ShouldContain("SERVICE LEVEL"); + html.ShouldContain("255"); + } + + [Fact] + public void GenerateHealthHtml_ContainsComponentCards() + { + var sut = CreateService(); + var html = sut.GenerateHealthHtml(); + + html.ShouldContain("MXAccess"); + html.ShouldContain("Galaxy Database"); + html.ShouldContain("OPC UA Server"); + } + + [Fact] + public void GenerateHealthHtml_WithRedundancy_ShowsRoleAndMode() + { + var sut = CreateServiceWithRedundancy("Primary"); + var html = sut.GenerateHealthHtml(); + + html.ShouldContain("Primary"); + html.ShouldContain("Warm"); + } + + [Fact] + public void GenerateHealthHtml_ContainsAutoRefresh() + { + var sut = CreateService(); + var html = sut.GenerateHealthHtml(); + html.ShouldContain("meta http-equiv='refresh' content='10'"); + } + /// /// Creates a status report service preloaded with representative runtime, Galaxy, and metrics data. /// @@ -157,5 +290,21 @@ namespace ZB.MOM.WW.LmxOpcUa.Tests.Status sut.SetComponents(mxClient, metrics, galaxyStats, null); return sut; } + + private static StatusReportService CreateServiceWithRedundancy(string role) + { + var mxClient = new FakeMxAccessClient(); + var galaxyStats = new GalaxyRepositoryStats { GalaxyName = "TestGalaxy", DbConnected = true }; + var redundancyConfig = new Host.Configuration.RedundancyConfiguration + { + Enabled = true, + Mode = "Warm", + Role = role, + ServiceLevelBase = 200 + }; + var sut = new StatusReportService(new HealthCheckService(), 10); + sut.SetComponents(mxClient, null, galaxyStats, null, null, redundancyConfig, "urn:test:instance1"); + return sut; + } } } diff --git a/tests/ZB.MOM.WW.LmxOpcUa.Tests/Status/StatusWebServerTests.cs b/tests/ZB.MOM.WW.LmxOpcUa.Tests/Status/StatusWebServerTests.cs index 92be67a..0192e2b 100644 --- a/tests/ZB.MOM.WW.LmxOpcUa.Tests/Status/StatusWebServerTests.cs +++ b/tests/ZB.MOM.WW.LmxOpcUa.Tests/Status/StatusWebServerTests.cs @@ -107,6 +107,35 @@ namespace ZB.MOM.WW.LmxOpcUa.Tests.Status response.Headers.CacheControl?.NoStore.ShouldBe(true); } + /// + /// Confirms that the /health route returns an HTML health page. + /// + [Fact] + public async Task HealthPage_ReturnsHtml200() + { + var response = await _client.GetAsync("/health"); + response.StatusCode.ShouldBe(HttpStatusCode.OK); + response.Content.Headers.ContentType?.MediaType.ShouldBe("text/html"); + var body = await response.Content.ReadAsStringAsync(); + body.ShouldContain("SERVICE LEVEL"); + body.ShouldContain("MXAccess"); + } + + /// + /// Confirms that /api/health returns rich JSON with component health details. + /// + [Fact] + public async Task ApiHealth_ReturnsRichJson() + { + var response = await _client.GetAsync("/api/health"); + response.StatusCode.ShouldBe(HttpStatusCode.OK); + response.Content.Headers.ContentType?.MediaType.ShouldBe("application/json"); + var body = await response.Content.ReadAsStringAsync(); + body.ShouldContain("ServiceLevel"); + body.ShouldContain("Components"); + body.ShouldContain("Uptime"); + } + /// /// Confirms that the server can be started and stopped cleanly. ///