Surface historian plugin and alarm-tracking health in the status dashboard so operators can detect misconfiguration and runtime degradation that previously showed as fully healthy

Wraps the 4 HistoryRead overrides and OnAlarmAcknowledge with PerformanceMetrics.BeginOperation, adds alarm counters to LmxNodeManager, publishes a structured HistorianPluginOutcome from HistorianPluginLoader, and extends HealthCheckService with plugin-load, history-read, and alarm-ack-failure degradation rules.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Joseph Doherty
2026-04-12 15:52:03 -04:00
parent 9b42b61eb6
commit c5ed5312a9
10 changed files with 647 additions and 26 deletions

View File

@@ -105,5 +105,108 @@ namespace ZB.MOM.WW.LmxOpcUa.Tests.Status
var result = _sut.CheckHealth(ConnectionState.Reconnecting, null);
result.Status.ShouldBe("Unhealthy");
}
/// <summary>
/// Historian enabled but plugin failed to load → Degraded with the plugin error in the message.
/// </summary>
[Fact]
public void HistorianEnabled_PluginLoadFailed_ReturnsDegraded()
{
var historian = new HistorianStatusInfo
{
Enabled = true,
PluginStatus = "LoadFailed",
PluginError = "aahClientManaged.dll could not be loaded"
};
var result = _sut.CheckHealth(ConnectionState.Connected, null, historian);
result.Status.ShouldBe("Degraded");
result.Color.ShouldBe("yellow");
result.Message.ShouldContain("LoadFailed");
result.Message.ShouldContain("aahClientManaged.dll");
}
/// <summary>
/// Historian disabled is healthy regardless of plugin status string.
/// </summary>
[Fact]
public void HistorianDisabled_ReturnsHealthy()
{
var historian = new HistorianStatusInfo
{
Enabled = false,
PluginStatus = "Disabled"
};
_sut.CheckHealth(ConnectionState.Connected, null, historian).Status.ShouldBe("Healthy");
}
/// <summary>
/// Historian enabled and plugin loaded is healthy.
/// </summary>
[Fact]
public void HistorianEnabled_PluginLoaded_ReturnsHealthy()
{
var historian = new HistorianStatusInfo { Enabled = true, PluginStatus = "Loaded" };
_sut.CheckHealth(ConnectionState.Connected, null, historian).Status.ShouldBe("Healthy");
}
/// <summary>
/// HistoryRead operations degrade after only 11 samples with &lt;50% success rate
/// (lower threshold than the regular 100-sample rule).
/// </summary>
[Fact]
public void HistoryReadLowSuccessRate_WithLowSampleCount_ReturnsDegraded()
{
using var metrics = new PerformanceMetrics();
for (var i = 0; i < 4; i++)
metrics.RecordOperation("HistoryReadRaw", TimeSpan.FromMilliseconds(10));
for (var i = 0; i < 8; i++)
metrics.RecordOperation("HistoryReadRaw", TimeSpan.FromMilliseconds(10), false);
var result = _sut.CheckHealth(ConnectionState.Connected, metrics);
result.Status.ShouldBe("Degraded");
result.Message.ShouldContain("HistoryReadRaw");
}
/// <summary>
/// A HistoryRead sample under the 10-sample threshold does not degrade the service.
/// </summary>
[Fact]
public void HistoryReadLowSuccessRate_BelowThreshold_ReturnsHealthy()
{
using var metrics = new PerformanceMetrics();
for (var i = 0; i < 5; i++)
metrics.RecordOperation("HistoryReadRaw", TimeSpan.FromMilliseconds(10), false);
_sut.CheckHealth(ConnectionState.Connected, metrics).Status.ShouldBe("Healthy");
}
/// <summary>
/// Alarm acknowledge write failures are latched — any non-zero count degrades the service.
/// </summary>
[Fact]
public void AlarmAckWriteFailures_AnyCount_ReturnsDegraded()
{
var alarms = new AlarmStatusInfo { TrackingEnabled = true, AckWriteFailures = 1 };
var result = _sut.CheckHealth(ConnectionState.Connected, null, null, alarms);
result.Status.ShouldBe("Degraded");
result.Message.ShouldContain("Alarm acknowledge");
}
/// <summary>
/// Alarm tracking disabled ignores any failure count.
/// </summary>
[Fact]
public void AlarmAckWriteFailures_TrackingDisabled_ReturnsHealthy()
{
var alarms = new AlarmStatusInfo { TrackingEnabled = false, AckWriteFailures = 99 };
_sut.CheckHealth(ConnectionState.Connected, null, null, alarms).Status.ShouldBe("Healthy");
}
}
}