Surface historian plugin and alarm-tracking health in the status dashboard so operators can detect misconfiguration and runtime degradation that previously showed as fully healthy
Wraps the 4 HistoryRead overrides and OnAlarmAcknowledge with PerformanceMetrics.BeginOperation, adds alarm counters to LmxNodeManager, publishes a structured HistorianPluginOutcome from HistorianPluginLoader, and extends HealthCheckService with plugin-load, history-read, and alarm-ack-failure degradation rules. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -73,6 +73,11 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.OpcUa
|
||||
// Dispatch queue metrics
|
||||
private long _totalMxChangeEvents;
|
||||
|
||||
// Alarm instrumentation counters
|
||||
private long _alarmTransitionCount;
|
||||
private long _alarmAckEventCount;
|
||||
private long _alarmAckWriteFailures;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new node manager for the Galaxy-backed OPC UA namespace.
|
||||
/// </summary>
|
||||
@@ -151,6 +156,47 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.OpcUa
|
||||
/// </summary>
|
||||
public double AverageDispatchBatchSize { get; private set; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets a value indicating whether alarm condition tracking is enabled for this node manager.
|
||||
/// </summary>
|
||||
public bool AlarmTrackingEnabled => _alarmTrackingEnabled;
|
||||
|
||||
/// <summary>
|
||||
/// Gets the number of distinct alarm conditions currently tracked (one per alarm attribute).
|
||||
/// </summary>
|
||||
public int AlarmConditionCount => _alarmInAlarmTags.Count;
|
||||
|
||||
/// <summary>
|
||||
/// Gets the number of alarms currently in the InAlarm=true state.
|
||||
/// </summary>
|
||||
public int ActiveAlarmCount => CountActiveAlarms();
|
||||
|
||||
/// <summary>
|
||||
/// Gets the total number of InAlarm transition events observed in the dispatch loop since startup.
|
||||
/// </summary>
|
||||
public long AlarmTransitionCount => Interlocked.Read(ref _alarmTransitionCount);
|
||||
|
||||
/// <summary>
|
||||
/// Gets the total number of alarm acknowledgement transition events observed since startup.
|
||||
/// </summary>
|
||||
public long AlarmAckEventCount => Interlocked.Read(ref _alarmAckEventCount);
|
||||
|
||||
/// <summary>
|
||||
/// Gets the total number of MXAccess AckMsg writes that failed while processing alarm acknowledges.
|
||||
/// </summary>
|
||||
public long AlarmAckWriteFailures => Interlocked.Read(ref _alarmAckWriteFailures);
|
||||
|
||||
private int CountActiveAlarms()
|
||||
{
|
||||
var count = 0;
|
||||
lock (Lock)
|
||||
{
|
||||
foreach (var info in _alarmInAlarmTags.Values)
|
||||
if (info.LastInAlarm) count++;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override void CreateAddressSpace(IDictionary<NodeId, IList<IReference>> externalReferences)
|
||||
{
|
||||
@@ -421,6 +467,7 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.OpcUa
|
||||
if (alarmInfo == null)
|
||||
return new ServiceResult(StatusCodes.BadNodeIdUnknown);
|
||||
|
||||
using var scope = _metrics.BeginOperation("AlarmAcknowledge");
|
||||
try
|
||||
{
|
||||
var ackMessage = comment?.Text ?? "";
|
||||
@@ -432,6 +479,8 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.OpcUa
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
scope.SetSuccess(false);
|
||||
Interlocked.Increment(ref _alarmAckWriteFailures);
|
||||
Log.Warning(ex, "Failed to write AckMsg for {Source}", alarmInfo.SourceName);
|
||||
return new ServiceResult(StatusCodes.BadInternalError);
|
||||
}
|
||||
@@ -1522,6 +1571,7 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.OpcUa
|
||||
continue;
|
||||
}
|
||||
|
||||
using var historyScope = _metrics.BeginOperation("HistoryReadRaw");
|
||||
try
|
||||
{
|
||||
var maxValues = details.NumValuesPerNode > 0 ? (int)details.NumValuesPerNode : 0;
|
||||
@@ -1536,6 +1586,7 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.OpcUa
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
historyScope.SetSuccess(false);
|
||||
Log.Warning(ex, "HistoryRead raw failed for {TagRef}", tagRef);
|
||||
errors[idx] = new ServiceResult(StatusCodes.BadInternalError);
|
||||
}
|
||||
@@ -1598,6 +1649,7 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.OpcUa
|
||||
continue;
|
||||
}
|
||||
|
||||
using var historyScope = _metrics.BeginOperation("HistoryReadProcessed");
|
||||
try
|
||||
{
|
||||
var dataValues = _historianDataSource.ReadAggregateAsync(
|
||||
@@ -1609,6 +1661,7 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.OpcUa
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
historyScope.SetSuccess(false);
|
||||
Log.Warning(ex, "HistoryRead processed failed for {TagRef}", tagRef);
|
||||
errors[idx] = new ServiceResult(StatusCodes.BadInternalError);
|
||||
}
|
||||
@@ -1648,6 +1701,7 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.OpcUa
|
||||
continue;
|
||||
}
|
||||
|
||||
using var historyScope = _metrics.BeginOperation("HistoryReadAtTime");
|
||||
try
|
||||
{
|
||||
var timestamps = new DateTime[details.ReqTimes.Count];
|
||||
@@ -1669,6 +1723,7 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.OpcUa
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
historyScope.SetSuccess(false);
|
||||
Log.Warning(ex, "HistoryRead at-time failed for {TagRef}", tagRef);
|
||||
errors[idx] = new ServiceResult(StatusCodes.BadInternalError);
|
||||
}
|
||||
@@ -1714,6 +1769,7 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.OpcUa
|
||||
}
|
||||
}
|
||||
|
||||
using var historyScope = _metrics.BeginOperation("HistoryReadEvents");
|
||||
try
|
||||
{
|
||||
var maxEvents = details.NumValuesPerNode > 0 ? (int)details.NumValuesPerNode : 0;
|
||||
@@ -1751,6 +1807,7 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.OpcUa
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
historyScope.SetSuccess(false);
|
||||
Log.Warning(ex, "HistoryRead events failed for {NodeId}", nodeIdStr);
|
||||
errors[idx] = new ServiceResult(StatusCodes.BadInternalError);
|
||||
}
|
||||
@@ -2107,7 +2164,10 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.OpcUa
|
||||
if (ackedAlarmInfo.LastAcked.HasValue && newAcked == ackedAlarmInfo.LastAcked.Value)
|
||||
ackedAlarmInfo = null; // No transition → skip
|
||||
else
|
||||
{
|
||||
pendingAckedEvents.Add((ackedAlarmInfo, newAcked));
|
||||
Interlocked.Increment(ref _alarmAckEventCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2127,6 +2187,7 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.OpcUa
|
||||
}
|
||||
|
||||
pendingAlarmEvents.Add((address, alarmInfo, newInAlarm, severity, message));
|
||||
Interlocked.Increment(ref _alarmTransitionCount);
|
||||
}
|
||||
|
||||
// Apply under Lock so ClearChangeMasks propagates to monitored items.
|
||||
|
||||
Reference in New Issue
Block a user