Stop MxAccess from overwriting Bad quality on stopped-host variables: suppress pending data changes at dispatch, guard cross-host clear from wiping sibling state, and silence the Unknown→Running startup callback so recovering DevPlatform can no longer reset variables that a still-stopped DevAppEngine marked Bad.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Joseph Doherty
2026-04-13 23:22:28 -04:00
parent 4b209f64bb
commit 731092595f
4 changed files with 222 additions and 41 deletions

View File

@@ -108,6 +108,7 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.OpcUa
private readonly NodeId? _writeOperateRoleId;
private readonly NodeId? _writeTuneRoleId;
private long _dispatchCycleCount;
private long _suppressedUpdatesCount;
private volatile bool _dispatchDisposed;
private volatile bool _dispatchRunning;
private Thread? _dispatchThread;
@@ -790,24 +791,53 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.OpcUa
/// <param name="gobjectId">The runtime host's gobject_id.</param>
public void ClearHostVariablesBadQuality(int gobjectId)
{
List<BaseDataVariableState>? variables;
var clearedCount = 0;
var skippedCount = 0;
lock (Lock)
{
if (!_hostedVariables.TryGetValue(gobjectId, out variables))
return;
var now = DateTime.UtcNow;
foreach (var variable in variables)
// Iterate the full tag → host-list map so we can skip variables whose other
// ancestor hosts are still Stopped. Mass-clearing _hostedVariables[gobjectId]
// would wipe Bad status set by a concurrently-stopped sibling host (e.g.
// recovering DevPlatform must not clear variables that also live under a
// still-stopped DevAppEngine).
foreach (var kv in _hostIdsByTagRef)
{
variable.StatusCode = StatusCodes.Good;
variable.Timestamp = now;
variable.ClearChangeMasks(SystemContext, false);
var hostIds = kv.Value;
if (!hostIds.Contains(gobjectId))
continue;
var anotherStopped = false;
for (var i = 0; i < hostIds.Count; i++)
{
if (hostIds[i] == gobjectId)
continue;
if (_galaxyRuntimeProbeManager != null &&
_galaxyRuntimeProbeManager.IsHostStopped(hostIds[i]))
{
anotherStopped = true;
break;
}
}
if (anotherStopped)
{
skippedCount++;
continue;
}
if (_tagToVariableNode.TryGetValue(kv.Key, out var variable))
{
variable.StatusCode = StatusCodes.Good;
variable.Timestamp = now;
variable.ClearChangeMasks(SystemContext, false);
clearedCount++;
}
}
}
Log.Information(
"Cleared bad-quality override on {Count} variable(s) for recovered host gobject_id={GobjectId}",
variables.Count, gobjectId);
"Cleared bad-quality override on {Count} variable(s) for recovered host gobject_id={GobjectId} (skipped {Skipped} with other stopped ancestors)",
clearedCount, gobjectId, skippedCount);
}
private void SubscribeAlarmTags()
@@ -2554,6 +2584,18 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.OpcUa
if (!_pendingDataChanges.TryRemove(address, out var vtq))
continue;
// Suppress updates for tags whose owning Galaxy runtime host is currently
// Stopped. Without this, MxAccess keeps streaming cached values that would
// overwrite the BadOutOfService set by MarkHostVariablesBadQuality — the
// variables would flicker Bad→Good every dispatch cycle and subscribers
// would see a flood of notifications (the original "client freeze" symptom).
// Dropping at the source also means we do no lock/alarm work for dead data.
if (IsTagUnderStoppedHost(address))
{
Interlocked.Increment(ref _suppressedUpdatesCount);
continue;
}
AlarmInfo? alarmInfo = null;
AlarmInfo? ackedAlarmInfo = null;
var newInAlarm = false;
@@ -2728,6 +2770,7 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.OpcUa
var batchSize = Interlocked.Read(ref _totalDispatchBatchSize);
var cycles = Interlocked.Read(ref _dispatchCycleCount);
var avgQueueSize = cycles > 0 ? (double)batchSize / cycles : 0;
var suppressed = Interlocked.Exchange(ref _suppressedUpdatesCount, 0);
// Reset rolling counters
Interlocked.Exchange(ref _totalDispatchBatchSize, 0);
@@ -2738,8 +2781,8 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.OpcUa
AverageDispatchBatchSize = avgQueueSize;
Log.Information(
"DataChange dispatch: EventsPerSec={EventsPerSec:F1}, AvgBatchSize={AvgBatchSize:F1}, PendingItems={Pending}, TotalEvents={Total}",
eventsPerSecond, avgQueueSize, _pendingDataChanges.Count, totalEvents);
"DataChange dispatch: EventsPerSec={EventsPerSec:F1}, AvgBatchSize={AvgBatchSize:F1}, PendingItems={Pending}, TotalEvents={Total}, SuppressedStopped={Suppressed}",
eventsPerSecond, avgQueueSize, _pendingDataChanges.Count, totalEvents, suppressed);
}
/// <inheritdoc />