Files
lmxopcua/tests/ZB.MOM.WW.OtOpcUa.Tests.v1Archive/Status/HealthCheckServiceTests.cs
Joseph Doherty a3d16a28f1 Phase 2 Stream D Option B — archive v1 surface + new Driver.Galaxy.E2E parity suite. Non-destructive intermediate state: the v1 OtOpcUa.Host + Historian.Aveva + Tests + IntegrationTests projects all still build (494 v1 unit + 6 v1 integration tests still pass when run explicitly), but solution-level dotnet test ZB.MOM.WW.OtOpcUa.slnx now skips them via IsTestProject=false on the test projects + archive-status PropertyGroup comments on the src projects. The destructive deletion is reserved for Phase 2 PR 3 with explicit operator review per CLAUDE.md "only use destructive operations when truly the best approach". tests/ZB.MOM.WW.OtOpcUa.Tests/ renamed via git mv to tests/ZB.MOM.WW.OtOpcUa.Tests.v1Archive/; csproj <AssemblyName> kept as the original ZB.MOM.WW.OtOpcUa.Tests so v1 OtOpcUa.Host's [InternalsVisibleTo("ZB.MOM.WW.OtOpcUa.Tests")] still matches and the project rebuilds clean. tests/ZB.MOM.WW.OtOpcUa.IntegrationTests gets <IsTestProject>false</IsTestProject>. src/ZB.MOM.WW.OtOpcUa.Host + src/ZB.MOM.WW.OtOpcUa.Historian.Aveva get PropertyGroup archive-status comments documenting they're functionally superseded but kept in-build because cascading dependencies (Historian.Aveva → Host; IntegrationTests → Host) make a single-PR deletion high blast-radius. New tests/ZB.MOM.WW.OtOpcUa.Driver.Galaxy.E2E/ project (.NET 10) with ParityFixture that spawns OtOpcUa.Driver.Galaxy.Host.exe (net48 x86) as a Process.Start subprocess with OTOPCUA_GALAXY_BACKEND=db env vars, awaits 2s for the PipeServer to bind, then exposes a connected GalaxyProxyDriver; skips on non-Windows / Administrator shells (PipeAcl denies admins per decision #76) / ZB unreachable / Host EXE not built — each skip carries a SkipReason string the test method reads via Assert.Skip(SkipReason). RecordingAddressSpaceBuilder captures every Folder/Variable/AddProperty registration so parity tests can assert on the same shape v1 LmxNodeManager produced. HierarchyParityTests (3) — Discover returns gobjects with attributes; attribute full references match the tag.attribute Galaxy reference grammar; HistoryExtension flag flows through correctly. StabilityFindingsRegressionTests (4) — one test per 2026-04-13 stability finding from commits c76ab8f and 7310925: phantom probe subscription doesn't corrupt unrelated host status; HostStatusChangedEventArgs structurally carries a specific HostName + OldState + NewState (event signature mathematically prevents the v1 cross-host quality-clear bug); all GalaxyProxyDriver capability methods return Task or Task<T> (sync-over-async would deadlock OPC UA stack thread); AcknowledgeAsync completes before returning (no fire-and-forget background work that could race shutdown). Solution test count: 470 pass / 7 skip (E2E on admin shell) / 1 pre-existing Phase 0 baseline. Run archived suites explicitly: dotnet test tests/ZB.MOM.WW.OtOpcUa.Tests.v1Archive (494 pass) + dotnet test tests/ZB.MOM.WW.OtOpcUa.IntegrationTests (6 pass). docs/v2/V1_ARCHIVE_STATUS.md inventories every archived surface with run-it-explicitly instructions + a 10-step deletion plan for PR 3 + rollback procedure (git revert restores all four projects). docs/v2/implementation/exit-gate-phase-2-final.md supersedes the two partial-exit docs with the per-stream status table (A/B/C/D/E all addressed, D split across PR 2/3 per safety protocol), the test count breakdown, fresh adversarial review of PR 2 deltas (4 new findings: medium IsTestProject=false safety net loss, medium structural-vs-behavioral stability tests, low backend=db default, low Process.Start env inheritance), the 8 carried-forward findings from exit-gate-phase-2.md, the recommended PR order (1 → 2 → 3 → 4). docs/v2/implementation/pr-2-body.md is the Gitea web-UI paste-in for opening PR 2 once pushed.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 00:56:21 -04:00

212 lines
7.8 KiB
C#

using System;
using Shouldly;
using Xunit;
using ZB.MOM.WW.OtOpcUa.Host.Domain;
using ZB.MOM.WW.OtOpcUa.Host.Metrics;
using ZB.MOM.WW.OtOpcUa.Host.Status;
namespace ZB.MOM.WW.OtOpcUa.Tests.Status
{
/// <summary>
/// Verifies how the dashboard health service classifies bridge health from connection state and metrics.
/// </summary>
public class HealthCheckServiceTests
{
private readonly HealthCheckService _sut = new();
/// <summary>
/// Confirms that a disconnected runtime is reported as unhealthy.
/// </summary>
[Fact]
public void NotConnected_ReturnsUnhealthy()
{
var result = _sut.CheckHealth(ConnectionState.Disconnected, null);
result.Status.ShouldBe("Unhealthy");
result.Color.ShouldBe("red");
result.Message.ShouldContain("not connected");
}
/// <summary>
/// Confirms that a connected runtime with no metrics history is still considered healthy.
/// </summary>
[Fact]
public void Connected_NoMetrics_ReturnsHealthy()
{
var result = _sut.CheckHealth(ConnectionState.Connected, null);
result.Status.ShouldBe("Healthy");
result.Color.ShouldBe("green");
}
/// <summary>
/// Confirms that good success-rate metrics keep the service in a healthy state.
/// </summary>
[Fact]
public void Connected_GoodMetrics_ReturnsHealthy()
{
using var metrics = new PerformanceMetrics();
for (var i = 0; i < 200; i++)
metrics.RecordOperation("Read", TimeSpan.FromMilliseconds(10));
var result = _sut.CheckHealth(ConnectionState.Connected, metrics);
result.Status.ShouldBe("Healthy");
}
/// <summary>
/// Confirms that poor operation success rates degrade the reported health state.
/// </summary>
[Fact]
public void Connected_LowSuccessRate_ReturnsDegraded()
{
using var metrics = new PerformanceMetrics();
for (var i = 0; i < 40; i++)
metrics.RecordOperation("Read", TimeSpan.FromMilliseconds(10));
for (var i = 0; i < 80; i++)
metrics.RecordOperation("Read", TimeSpan.FromMilliseconds(10), false);
var result = _sut.CheckHealth(ConnectionState.Connected, metrics);
result.Status.ShouldBe("Degraded");
result.Color.ShouldBe("yellow");
}
/// <summary>
/// Confirms that the boolean health helper reports true when the runtime is connected.
/// </summary>
[Fact]
public void IsHealthy_Connected_ReturnsTrue()
{
_sut.IsHealthy(ConnectionState.Connected, null).ShouldBe(true);
}
/// <summary>
/// Confirms that the boolean health helper reports false when the runtime is disconnected.
/// </summary>
[Fact]
public void IsHealthy_Disconnected_ReturnsFalse()
{
_sut.IsHealthy(ConnectionState.Disconnected, null).ShouldBe(false);
}
/// <summary>
/// Confirms that the error connection state is treated as unhealthy.
/// </summary>
[Fact]
public void Error_ReturnsUnhealthy()
{
var result = _sut.CheckHealth(ConnectionState.Error, null);
result.Status.ShouldBe("Unhealthy");
}
/// <summary>
/// Confirms that the reconnecting state is treated as unhealthy while recovery is in progress.
/// </summary>
[Fact]
public void Reconnecting_ReturnsUnhealthy()
{
var result = _sut.CheckHealth(ConnectionState.Reconnecting, null);
result.Status.ShouldBe("Unhealthy");
}
/// <summary>
/// Historian enabled but plugin failed to load → Degraded with the plugin error in the message.
/// </summary>
[Fact]
public void HistorianEnabled_PluginLoadFailed_ReturnsDegraded()
{
var historian = new HistorianStatusInfo
{
Enabled = true,
PluginStatus = "LoadFailed",
PluginError = "aahClientManaged.dll could not be loaded"
};
var result = _sut.CheckHealth(ConnectionState.Connected, null, historian);
result.Status.ShouldBe("Degraded");
result.Color.ShouldBe("yellow");
result.Message.ShouldContain("LoadFailed");
result.Message.ShouldContain("aahClientManaged.dll");
}
/// <summary>
/// Historian disabled is healthy regardless of plugin status string.
/// </summary>
[Fact]
public void HistorianDisabled_ReturnsHealthy()
{
var historian = new HistorianStatusInfo
{
Enabled = false,
PluginStatus = "Disabled"
};
_sut.CheckHealth(ConnectionState.Connected, null, historian).Status.ShouldBe("Healthy");
}
/// <summary>
/// Historian enabled and plugin loaded is healthy.
/// </summary>
[Fact]
public void HistorianEnabled_PluginLoaded_ReturnsHealthy()
{
var historian = new HistorianStatusInfo { Enabled = true, PluginStatus = "Loaded" };
_sut.CheckHealth(ConnectionState.Connected, null, historian).Status.ShouldBe("Healthy");
}
/// <summary>
/// HistoryRead operations degrade after only 11 samples with &lt;50% success rate
/// (lower threshold than the regular 100-sample rule).
/// </summary>
[Fact]
public void HistoryReadLowSuccessRate_WithLowSampleCount_ReturnsDegraded()
{
using var metrics = new PerformanceMetrics();
for (var i = 0; i < 4; i++)
metrics.RecordOperation("HistoryReadRaw", TimeSpan.FromMilliseconds(10));
for (var i = 0; i < 8; i++)
metrics.RecordOperation("HistoryReadRaw", TimeSpan.FromMilliseconds(10), false);
var result = _sut.CheckHealth(ConnectionState.Connected, metrics);
result.Status.ShouldBe("Degraded");
result.Message.ShouldContain("HistoryReadRaw");
}
/// <summary>
/// A HistoryRead sample under the 10-sample threshold does not degrade the service.
/// </summary>
[Fact]
public void HistoryReadLowSuccessRate_BelowThreshold_ReturnsHealthy()
{
using var metrics = new PerformanceMetrics();
for (var i = 0; i < 5; i++)
metrics.RecordOperation("HistoryReadRaw", TimeSpan.FromMilliseconds(10), false);
_sut.CheckHealth(ConnectionState.Connected, metrics).Status.ShouldBe("Healthy");
}
/// <summary>
/// Alarm acknowledge write failures are latched — any non-zero count degrades the service.
/// </summary>
[Fact]
public void AlarmAckWriteFailures_AnyCount_ReturnsDegraded()
{
var alarms = new AlarmStatusInfo { TrackingEnabled = true, AckWriteFailures = 1 };
var result = _sut.CheckHealth(ConnectionState.Connected, null, null, alarms);
result.Status.ShouldBe("Degraded");
result.Message.ShouldContain("Alarm acknowledge");
}
/// <summary>
/// Alarm tracking disabled ignores any failure count.
/// </summary>
[Fact]
public void AlarmAckWriteFailures_TrackingDisabled_ReturnsHealthy()
{
var alarms = new AlarmStatusInfo { TrackingEnabled = false, AckWriteFailures = 99 };
_sut.CheckHealth(ConnectionState.Connected, null, null, alarms).Status.ShouldBe("Healthy");
}
}
}