Track Galaxy Platform and AppEngine runtime state via ScanState probes and proactively invalidate descendant variable quality on Stopped transitions so operators can detect a stopped runtime host before downstream clients read stale data and so the bridge delivers a uniform bad-quality signal instead of relying on MxAccess per-tag fan-out

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Joseph Doherty
2026-04-13 15:40:44 -04:00
parent 8f340553d9
commit 9d49cdcc58
18 changed files with 1831 additions and 14 deletions

View File

@@ -88,10 +88,11 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.Status
ReconnectCount = _mxAccessClient?.ReconnectCount ?? 0,
ActiveSessions = _serverHost?.ActiveSessionCount ?? 0
},
Health = _healthCheck.CheckHealth(connectionState, _metrics, historianInfo, alarmInfo),
Health = _healthCheck.CheckHealth(connectionState, _metrics, historianInfo, alarmInfo, BuildRuntimeStatusInfo()),
Subscriptions = new SubscriptionInfo
{
ActiveCount = _mxAccessClient?.ActiveSubscriptionCount ?? 0
ActiveCount = _mxAccessClient?.ActiveSubscriptionCount ?? 0,
ProbeCount = _nodeManager?.ActiveRuntimeProbeCount ?? 0
},
Galaxy = new GalaxyInfo
{
@@ -114,6 +115,7 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.Status
Alarms = alarmInfo,
Redundancy = BuildRedundancyInfo(),
Endpoints = BuildEndpointsInfo(),
RuntimeStatus = BuildRuntimeStatusInfo(),
Footer = new FooterInfo
{
Timestamp = DateTime.UtcNow,
@@ -192,6 +194,26 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.Status
return info;
}
private RuntimeStatusInfo BuildRuntimeStatusInfo()
{
var hosts = _nodeManager?.RuntimeStatuses?.ToList() ?? new List<GalaxyRuntimeStatus>();
var info = new RuntimeStatusInfo
{
Total = hosts.Count,
Hosts = hosts
};
foreach (var host in hosts)
{
switch (host.State)
{
case GalaxyRuntimeState.Running: info.RunningCount++; break;
case GalaxyRuntimeState.Stopped: info.StoppedCount++; break;
default: info.UnknownCount++; break;
}
}
return info;
}
private RedundancyInfo? BuildRedundancyInfo()
{
if (_redundancyConfig == null || !_redundancyConfig.Enabled)
@@ -300,7 +322,10 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.Status
// Subscriptions panel
sb.AppendLine("<div class='panel gray'><h2>Subscriptions</h2>");
sb.AppendLine($"<p>Active: {data.Subscriptions.ActiveCount}</p>");
sb.AppendLine($"<p>Active: <b>{data.Subscriptions.ActiveCount}</b></p>");
if (data.Subscriptions.ProbeCount > 0)
sb.AppendLine(
$"<p>Probes: {data.Subscriptions.ProbeCount} (bridge-owned runtime status)</p>");
sb.AppendLine("</div>");
// Data Change Dispatch panel
@@ -318,6 +343,32 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.Status
sb.AppendLine($"<p>Last Rebuild: {data.Galaxy.LastRebuildTime:O}</p>");
sb.AppendLine("</div>");
// Galaxy Runtime panel — per-host Platform + AppEngine state
if (data.RuntimeStatus.Total > 0)
{
var rtColor = data.RuntimeStatus.StoppedCount > 0 ? "red"
: data.RuntimeStatus.UnknownCount > 0 ? "yellow"
: "green";
sb.AppendLine($"<div class='panel {rtColor}'><h2>Galaxy Runtime</h2>");
sb.AppendLine(
$"<p>{data.RuntimeStatus.RunningCount} of {data.RuntimeStatus.Total} hosts running" +
$" ({data.RuntimeStatus.StoppedCount} stopped, {data.RuntimeStatus.UnknownCount} unknown)</p>");
sb.AppendLine("<table><tr><th>Name</th><th>Kind</th><th>State</th><th>Since</th><th>Last Error</th></tr>");
foreach (var host in data.RuntimeStatus.Hosts)
{
var since = host.LastStateChangeTime?.ToString("O") ?? "-";
var err = WebUtility.HtmlEncode(host.LastError ?? "");
sb.AppendLine(
$"<tr><td>{WebUtility.HtmlEncode(host.ObjectName)}</td>" +
$"<td>{WebUtility.HtmlEncode(host.Kind)}</td>" +
$"<td>{host.State}</td>" +
$"<td>{since}</td>" +
$"<td><code>{err}</code></td></tr>");
}
sb.AppendLine("</table>");
sb.AppendLine("</div>");
}
// Historian panel
var anyClusterNodeFailed =
data.Historian.NodeCount > 0 && data.Historian.HealthyNodeCount < data.Historian.NodeCount;