Instrument the historian plugin with runtime query health counters and read-only cluster failover so operators can detect silent query degradation and keep serving history when a single cluster node goes down
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -125,6 +125,7 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.Status
|
||||
private HistorianStatusInfo BuildHistorianStatusInfo()
|
||||
{
|
||||
var outcome = HistorianPluginLoader.LastOutcome;
|
||||
var health = _nodeManager?.HistorianHealth;
|
||||
return new HistorianStatusInfo
|
||||
{
|
||||
Enabled = _historianConfig?.Enabled ?? false,
|
||||
@@ -132,7 +133,21 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.Status
|
||||
PluginError = outcome.Error,
|
||||
PluginPath = outcome.PluginPath,
|
||||
ServerName = _historianConfig?.ServerName ?? "",
|
||||
Port = _historianConfig?.Port ?? 0
|
||||
Port = _historianConfig?.Port ?? 0,
|
||||
QueryTotal = health?.TotalQueries ?? 0,
|
||||
QuerySuccesses = health?.TotalSuccesses ?? 0,
|
||||
QueryFailures = health?.TotalFailures ?? 0,
|
||||
ConsecutiveFailures = health?.ConsecutiveFailures ?? 0,
|
||||
LastSuccessTime = health?.LastSuccessTime,
|
||||
LastFailureTime = health?.LastFailureTime,
|
||||
LastQueryError = health?.LastError,
|
||||
ProcessConnectionOpen = health?.ProcessConnectionOpen ?? false,
|
||||
EventConnectionOpen = health?.EventConnectionOpen ?? false,
|
||||
NodeCount = health?.NodeCount ?? 0,
|
||||
HealthyNodeCount = health?.HealthyNodeCount ?? 0,
|
||||
ActiveProcessNode = health?.ActiveProcessNode,
|
||||
ActiveEventNode = health?.ActiveEventNode,
|
||||
Nodes = health?.Nodes ?? new List<Historian.HistorianClusterNodeState>()
|
||||
};
|
||||
}
|
||||
|
||||
@@ -304,13 +319,66 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.Status
|
||||
sb.AppendLine("</div>");
|
||||
|
||||
// Historian panel
|
||||
var histColor = data.Historian.PluginStatus == "Loaded" ? "green"
|
||||
: !data.Historian.Enabled ? "gray" : "red";
|
||||
var anyClusterNodeFailed =
|
||||
data.Historian.NodeCount > 0 && data.Historian.HealthyNodeCount < data.Historian.NodeCount;
|
||||
var allClusterNodesFailed =
|
||||
data.Historian.NodeCount > 0 && data.Historian.HealthyNodeCount == 0;
|
||||
var histColor = !data.Historian.Enabled ? "gray"
|
||||
: data.Historian.PluginStatus != "Loaded" ? "red"
|
||||
: allClusterNodesFailed ? "red"
|
||||
: data.Historian.ConsecutiveFailures >= 5 ? "red"
|
||||
: anyClusterNodeFailed || data.Historian.ConsecutiveFailures > 0 ? "yellow"
|
||||
: "green";
|
||||
sb.AppendLine($"<div class='panel {histColor}'><h2>Historian</h2>");
|
||||
sb.AppendLine(
|
||||
$"<p>Enabled: <b>{data.Historian.Enabled}</b> | Plugin: <b>{data.Historian.PluginStatus}</b> | Server: {WebUtility.HtmlEncode(data.Historian.ServerName)}:{data.Historian.Port}</p>");
|
||||
$"<p>Enabled: <b>{data.Historian.Enabled}</b> | Plugin: <b>{data.Historian.PluginStatus}</b> | Port: {data.Historian.Port}</p>");
|
||||
if (!string.IsNullOrEmpty(data.Historian.PluginError))
|
||||
sb.AppendLine($"<p>Error: {WebUtility.HtmlEncode(data.Historian.PluginError)}</p>");
|
||||
sb.AppendLine($"<p>Plugin Error: {WebUtility.HtmlEncode(data.Historian.PluginError)}</p>");
|
||||
if (data.Historian.PluginStatus == "Loaded")
|
||||
{
|
||||
sb.AppendLine(
|
||||
$"<p>Queries: <b>{data.Historian.QueryTotal:N0}</b> " +
|
||||
$"(Success: {data.Historian.QuerySuccesses:N0}, Failure: {data.Historian.QueryFailures:N0}) " +
|
||||
$"| Consecutive Failures: <b>{data.Historian.ConsecutiveFailures}</b></p>");
|
||||
var procBadge = data.Historian.ProcessConnectionOpen
|
||||
? $"open ({WebUtility.HtmlEncode(data.Historian.ActiveProcessNode ?? "?")})"
|
||||
: "closed";
|
||||
var evtBadge = data.Historian.EventConnectionOpen
|
||||
? $"open ({WebUtility.HtmlEncode(data.Historian.ActiveEventNode ?? "?")})"
|
||||
: "closed";
|
||||
sb.AppendLine(
|
||||
$"<p>Process Conn: <b>{procBadge}</b> | Event Conn: <b>{evtBadge}</b></p>");
|
||||
if (data.Historian.LastSuccessTime.HasValue)
|
||||
sb.AppendLine($"<p>Last Success: {data.Historian.LastSuccessTime:O}</p>");
|
||||
if (data.Historian.LastFailureTime.HasValue)
|
||||
sb.AppendLine($"<p>Last Failure: {data.Historian.LastFailureTime:O}</p>");
|
||||
if (!string.IsNullOrEmpty(data.Historian.LastQueryError))
|
||||
sb.AppendLine(
|
||||
$"<p>Last Error: <code>{WebUtility.HtmlEncode(data.Historian.LastQueryError)}</code></p>");
|
||||
|
||||
// Cluster table: only when a true multi-node cluster is configured.
|
||||
if (data.Historian.NodeCount > 1)
|
||||
{
|
||||
sb.AppendLine(
|
||||
$"<p><b>Cluster:</b> {data.Historian.HealthyNodeCount} of {data.Historian.NodeCount} nodes healthy</p>");
|
||||
sb.AppendLine(
|
||||
"<table><tr><th>Node</th><th>State</th><th>Cooldown Until</th><th>Failures</th><th>Last Error</th></tr>");
|
||||
foreach (var node in data.Historian.Nodes)
|
||||
{
|
||||
var state = node.IsHealthy ? "healthy" : "cooldown";
|
||||
var cooldown = node.CooldownUntil?.ToString("O") ?? "-";
|
||||
var lastErr = WebUtility.HtmlEncode(node.LastError ?? "");
|
||||
sb.AppendLine(
|
||||
$"<tr><td>{WebUtility.HtmlEncode(node.Name)}</td><td>{state}</td>" +
|
||||
$"<td>{cooldown}</td><td>{node.FailureCount}</td><td><code>{lastErr}</code></td></tr>");
|
||||
}
|
||||
sb.AppendLine("</table>");
|
||||
}
|
||||
else if (data.Historian.NodeCount == 1)
|
||||
{
|
||||
sb.AppendLine($"<p>Node: {WebUtility.HtmlEncode(data.Historian.Nodes[0].Name)}</p>");
|
||||
}
|
||||
}
|
||||
sb.AppendLine("</div>");
|
||||
|
||||
// Alarms panel
|
||||
|
||||
Reference in New Issue
Block a user