Add rich HTTP health endpoints for cluster monitoring

Enhance /api/health with component-level health, ServiceLevel, and
redundancy state for load balancer probes. Add /health HTML page for
operators to monitor node health in clustered System Platform deployments.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Joseph Doherty
2026-03-28 16:44:31 -04:00
parent f0a076ec26
commit 9d3599fbb6
5 changed files with 381 additions and 3 deletions

View File

@@ -16,6 +16,7 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.Status
{
private readonly HealthCheckService _healthCheck;
private readonly int _refreshIntervalSeconds;
private readonly DateTime _startTime = DateTime.UtcNow;
private IMxAccessClient? _mxAccessClient;
private PerformanceMetrics? _metrics;
@@ -229,5 +230,132 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.Status
var state = _mxAccessClient?.State ?? ConnectionState.Disconnected;
return _healthCheck.IsHealthy(state, _metrics);
}
/// <summary>
/// Builds the rich health endpoint data including component health, ServiceLevel, and redundancy state.
/// </summary>
public HealthEndpointData GetHealthData()
{
var connectionState = _mxAccessClient?.State ?? ConnectionState.Disconnected;
var mxConnected = connectionState == ConnectionState.Connected;
var dbConnected = _galaxyStats?.DbConnected ?? false;
var health = _healthCheck.CheckHealth(connectionState, _metrics);
var uptime = DateTime.UtcNow - _startTime;
var data = new HealthEndpointData
{
Status = health.Status,
RedundancyEnabled = _redundancyConfig?.Enabled ?? false,
Components = new ComponentHealth
{
MxAccess = connectionState.ToString(),
Database = dbConnected ? "Connected" : "Disconnected",
OpcUaServer = (_serverHost?.IsRunning ?? false) ? "Running" : "Stopped"
},
Uptime = FormatUptime(uptime),
Timestamp = DateTime.UtcNow
};
if (_redundancyConfig != null && _redundancyConfig.Enabled)
{
var isPrimary = string.Equals(_redundancyConfig.Role, "Primary", StringComparison.OrdinalIgnoreCase);
var baseLevel = isPrimary
? _redundancyConfig.ServiceLevelBase
: Math.Max(0, _redundancyConfig.ServiceLevelBase - 50);
var calculator = new ServiceLevelCalculator();
data.ServiceLevel = calculator.Calculate(baseLevel, mxConnected, dbConnected);
data.RedundancyRole = _redundancyConfig.Role;
data.RedundancyMode = _redundancyConfig.Mode;
}
else
{
// Non-redundant: 255 when healthy, 0 when both down
data.ServiceLevel = mxConnected ? (byte)255 : (byte)0;
}
return data;
}
/// <summary>
/// Generates the JSON payload for the /api/health endpoint.
/// </summary>
public string GenerateHealthJson()
{
var data = GetHealthData();
return JsonSerializer.Serialize(data, new JsonSerializerOptions { WriteIndented = true });
}
/// <summary>
/// Generates a focused health status HTML page for operators and monitoring dashboards.
/// </summary>
public string GenerateHealthHtml()
{
var data = GetHealthData();
var sb = new StringBuilder();
var statusColor = data.Status == "Healthy" ? "#00cc66" : data.Status == "Degraded" ? "#cccc33" : "#cc3333";
var mxColor = data.Components.MxAccess == "Connected" ? "#00cc66" : "#cc3333";
var dbColor = data.Components.Database == "Connected" ? "#00cc66" : "#cc3333";
var uaColor = data.Components.OpcUaServer == "Running" ? "#00cc66" : "#cc3333";
sb.AppendLine("<!DOCTYPE html><html><head>");
sb.AppendLine("<meta charset='utf-8'>");
sb.AppendLine($"<meta http-equiv='refresh' content='{_refreshIntervalSeconds}'>");
sb.AppendLine("<title>LmxOpcUa Health</title>");
sb.AppendLine("<style>");
sb.AppendLine("body { font-family: monospace; background: #1a1a2e; color: #eee; padding: 20px; margin: 0; }");
sb.AppendLine(".header { text-align: center; padding: 30px 0; }");
sb.AppendLine(".status-badge { display: inline-block; font-size: 2em; font-weight: bold; padding: 15px 40px; border-radius: 12px; letter-spacing: 2px; }");
sb.AppendLine(".service-level { text-align: center; font-size: 4em; font-weight: bold; margin: 20px 0; }");
sb.AppendLine(".service-level .label { font-size: 0.3em; color: #999; display: block; }");
sb.AppendLine(".components { display: flex; justify-content: center; gap: 20px; flex-wrap: wrap; margin: 30px auto; max-width: 800px; }");
sb.AppendLine(".component { border: 2px solid #444; border-radius: 8px; padding: 20px; min-width: 200px; text-align: center; }");
sb.AppendLine(".component .name { font-size: 0.9em; color: #999; margin-bottom: 8px; }");
sb.AppendLine(".component .value { font-size: 1.3em; font-weight: bold; }");
sb.AppendLine(".meta { text-align: center; margin-top: 30px; color: #666; font-size: 0.85em; }");
sb.AppendLine(".redundancy { text-align: center; margin: 10px 0; color: #999; }");
sb.AppendLine(".redundancy b { color: #66ccff; }");
sb.AppendLine("</style></head><body>");
// Status badge
sb.AppendLine("<div class='header'>");
sb.AppendLine($"<div class='status-badge' style='background: {statusColor}; color: #000;'>{data.Status.ToUpperInvariant()}</div>");
sb.AppendLine("</div>");
// Service Level
sb.AppendLine($"<div class='service-level' style='color: {statusColor};'>");
sb.AppendLine("<span class='label'>SERVICE LEVEL</span>");
sb.AppendLine($"{data.ServiceLevel}");
sb.AppendLine("</div>");
// Redundancy info
if (data.RedundancyEnabled)
{
sb.AppendLine($"<div class='redundancy'>Role: <b>{data.RedundancyRole}</b> | Mode: <b>{data.RedundancyMode}</b></div>");
}
// Component health cards
sb.AppendLine("<div class='components'>");
sb.AppendLine($"<div class='component' style='border-color: {mxColor};'><div class='name'>MXAccess</div><div class='value' style='color: {mxColor};'>{data.Components.MxAccess}</div></div>");
sb.AppendLine($"<div class='component' style='border-color: {dbColor};'><div class='name'>Galaxy Database</div><div class='value' style='color: {dbColor};'>{data.Components.Database}</div></div>");
sb.AppendLine($"<div class='component' style='border-color: {uaColor};'><div class='name'>OPC UA Server</div><div class='value' style='color: {uaColor};'>{data.Components.OpcUaServer}</div></div>");
sb.AppendLine("</div>");
// Footer
sb.AppendLine($"<div class='meta'>Uptime: {data.Uptime} | {data.Timestamp:O}</div>");
sb.AppendLine("</body></html>");
return sb.ToString();
}
private static string FormatUptime(TimeSpan ts)
{
if (ts.TotalDays >= 1)
return $"{(int)ts.TotalDays}d {ts.Hours}h {ts.Minutes}m";
if (ts.TotalHours >= 1)
return $"{(int)ts.TotalHours}h {ts.Minutes}m";
return $"{(int)ts.TotalMinutes}m {ts.Seconds}s";
}
}
}