Close all four stability-review 2026-04-13 findings so a failed runtime probe subscription can no longer leave a phantom entry that Tick() flips to Stopped and fans out false BadOutOfService quality across a host's subtree, a silently-failed dashboard bind no longer lets the service advertise a successful start while an operator-visible endpoint is dead, the seven sync-over-async sites in LmxNodeManager (rebuild probe sync, Read, Write, four HistoryRead overrides) can no longer park the OPC UA stack thread indefinitely on a hung backend, and alarm auto-subscribe + transferred-subscription restore no longer race shutdown as untracked fire-and-forget tasks.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Joseph Doherty
2026-04-14 00:48:07 -04:00
parent 731092595f
commit c76ab8fdee
21 changed files with 869 additions and 53 deletions

View File

@@ -192,6 +192,43 @@ namespace ZB.MOM.WW.LmxOpcUa.Tests.Configuration
config.Security.MinimumCertificateKeySize.ShouldBe(2048);
}
/// <summary>
/// Stability review 2026-04-13 Finding 3: MxAccess.RequestTimeoutSeconds must be at
/// least 1. Zero or negative values disable the safety bound and are rejected.
/// </summary>
[Fact]
public void Validator_MxAccessRequestTimeoutZero_ReturnsFalse()
{
var config = LoadFromJson();
config.MxAccess.RequestTimeoutSeconds = 0;
ConfigurationValidator.ValidateAndLog(config).ShouldBe(false);
}
/// <summary>
/// Stability review 2026-04-13 Finding 3: Historian.RequestTimeoutSeconds must be at
/// least 1 when historian is enabled.
/// </summary>
[Fact]
public void Validator_HistorianRequestTimeoutZero_ReturnsFalse()
{
var config = LoadFromJson();
config.Historian.Enabled = true;
config.Historian.ServerName = "localhost";
config.Historian.RequestTimeoutSeconds = 0;
ConfigurationValidator.ValidateAndLog(config).ShouldBe(false);
}
/// <summary>
/// Confirms the bound AppConfiguration carries non-zero default request timeouts.
/// </summary>
[Fact]
public void Validator_DefaultRequestTimeouts_AreSensible()
{
var config = new AppConfiguration();
config.MxAccess.RequestTimeoutSeconds.ShouldBeGreaterThanOrEqualTo(1);
config.Historian.RequestTimeoutSeconds.ShouldBeGreaterThanOrEqualTo(1);
}
/// <summary>
/// Confirms that a minimum key size below 2048 is rejected by the validator.
/// </summary>

View File

@@ -402,6 +402,73 @@ namespace ZB.MOM.WW.LmxOpcUa.Tests.MxAccess
sut.IsHostStopped(20).ShouldBeFalse();
}
// ---------- Subscribe failure rollback (stability review 2026-04-13 Finding 1) ----------
[Fact]
public async Task Sync_SubscribeThrows_DoesNotLeavePhantomEntry()
{
var client = new FakeMxAccessClient
{
SubscribeException = new InvalidOperationException("advise failed")
};
var (stopSpy, runSpy) = (new List<int>(), new List<int>());
using var sut = Sut(client, 15, stopSpy, runSpy);
await sut.SyncAsync(new[] { Engine(20, "DevAppEngine") });
// A failed SubscribeAsync must not leave a phantom entry that Tick() can later
// transition from Unknown to Stopped.
sut.ActiveProbeCount.ShouldBe(0);
sut.GetSnapshot().ShouldBeEmpty();
sut.IsHostStopped(20).ShouldBeFalse();
}
[Fact]
public async Task Sync_SubscribeThrows_TickDoesNotFireStopCallback()
{
var client = new FakeMxAccessClient
{
SubscribeException = new InvalidOperationException("advise failed")
};
var clock = new Clock();
var (stopSpy, runSpy) = (new List<int>(), new List<int>());
using var sut = Sut(client, 15, stopSpy, runSpy, clock);
await sut.SyncAsync(new[] { Engine(20, "DevAppEngine") });
// Advance past the unknown timeout — if the rollback were incomplete, Tick() would
// transition the phantom entry to Stopped and fan out a false host-down signal.
clock.Now = clock.Now.AddSeconds(30);
sut.Tick();
stopSpy.ShouldBeEmpty();
runSpy.ShouldBeEmpty();
sut.ActiveProbeCount.ShouldBe(0);
}
[Fact]
public async Task Sync_SubscribeSucceedsAfterRetry_AppearsInSnapshot()
{
// After a failed subscribe rolls back cleanly, a subsequent successful SyncAsync
// against the same host must behave normally.
var client = new FakeMxAccessClient
{
SubscribeException = new InvalidOperationException("first attempt fails")
};
var (stopSpy, runSpy) = (new List<int>(), new List<int>());
using var sut = Sut(client, 15, stopSpy, runSpy);
await sut.SyncAsync(new[] { Engine(20, "DevAppEngine") });
sut.ActiveProbeCount.ShouldBe(0);
// Clear the fault and resync — the host must now appear with Unknown state.
client.SubscribeException = null;
await sut.SyncAsync(new[] { Engine(20, "DevAppEngine") });
sut.ActiveProbeCount.ShouldBe(1);
sut.GetSnapshot().Single().State.ShouldBe(GalaxyRuntimeState.Unknown);
}
// ---------- Callback exception safety ----------
[Fact]

View File

@@ -96,6 +96,26 @@ namespace ZB.MOM.WW.LmxOpcUa.Tests.Status
response.StatusCode.ShouldBe(HttpStatusCode.MethodNotAllowed);
}
/// <summary>
/// Confirms that Start() returns false and logs a failure when the target port is
/// already bound by another listener. Regression guard for the stability-review 2026-04-13
/// Finding 2: OpcUaService now surfaces this return value into DashboardStartFailed.
/// </summary>
[Fact]
public void Start_WhenPortInUse_ReturnsFalse()
{
var port = new Random().Next(19000, 19500);
using var blocker = new HttpListener();
blocker.Prefixes.Add($"http://localhost:{port}/");
blocker.Start();
var reportService = new StatusReportService(new HealthCheckService(), 10);
reportService.SetComponents(new FakeMxAccessClient(), null, null, null);
using var contested = new StatusWebServer(reportService, port);
contested.Start().ShouldBeFalse();
}
/// <summary>
/// Confirms that cache-control headers disable caching for dashboard responses.
/// </summary>

View File

@@ -0,0 +1,72 @@
using System;
using System.Threading.Tasks;
using Shouldly;
using Xunit;
using ZB.MOM.WW.LmxOpcUa.Host.Utilities;
namespace ZB.MOM.WW.LmxOpcUa.Tests.Utilities
{
/// <summary>
/// Tests for the bounded sync-over-async wrapper introduced by stability review 2026-04-13
/// Finding 3. The wrapper is a backstop applied at every LmxNodeManager sync-over-async site
/// (Read, Write, HistoryRead*, BuildAddressSpace probe sync).
/// </summary>
public class SyncOverAsyncTests
{
[Fact]
public void WaitSync_CompletedTask_ReturnsResult()
{
var task = Task.FromResult(42);
SyncOverAsync.WaitSync(task, TimeSpan.FromSeconds(1), "test").ShouldBe(42);
}
[Fact]
public void WaitSync_CompletedNonGenericTask_Returns()
{
var task = Task.CompletedTask;
Should.NotThrow(() => SyncOverAsync.WaitSync(task, TimeSpan.FromSeconds(1), "test"));
}
[Fact]
public void WaitSync_NeverCompletingTask_ThrowsTimeoutException()
{
var tcs = new TaskCompletionSource<int>();
var ex = Should.Throw<TimeoutException>(() =>
SyncOverAsync.WaitSync(tcs.Task, TimeSpan.FromMilliseconds(100), "op"));
ex.Message.ShouldContain("op");
}
[Fact]
public void WaitSync_NeverCompletingNonGenericTask_ThrowsTimeoutException()
{
var tcs = new TaskCompletionSource<bool>();
Should.Throw<TimeoutException>(() =>
SyncOverAsync.WaitSync((Task)tcs.Task, TimeSpan.FromMilliseconds(100), "op"));
}
[Fact]
public void WaitSync_FaultedNonGenericTask_UnwrapsInnerException()
{
var task = Task.FromException(new InvalidOperationException("boom"));
Should.Throw<InvalidOperationException>(() =>
SyncOverAsync.WaitSync(task, TimeSpan.FromSeconds(1), "op"));
}
[Fact]
public void WaitSync_FaultedGenericTask_UnwrapsInnerException()
{
var task = Task.FromException<int>(new InvalidOperationException("boom"));
Should.Throw<InvalidOperationException>(() =>
SyncOverAsync.WaitSync(task, TimeSpan.FromSeconds(1), "op"));
}
[Fact]
public void WaitSync_NullTask_ThrowsArgumentNullException()
{
Should.Throw<ArgumentNullException>(() =>
SyncOverAsync.WaitSync((Task)null!, TimeSpan.FromSeconds(1), "op"));
Should.Throw<ArgumentNullException>(() =>
SyncOverAsync.WaitSync((Task<int>)null!, TimeSpan.FromSeconds(1), "op"));
}
}
}

View File

@@ -0,0 +1,78 @@
using System;
using System.Collections.Generic;
using System.Net;
using Shouldly;
using Xunit;
using ZB.MOM.WW.LmxOpcUa.Host;
using ZB.MOM.WW.LmxOpcUa.Host.Configuration;
using ZB.MOM.WW.LmxOpcUa.Host.Domain;
using ZB.MOM.WW.LmxOpcUa.Tests.Helpers;
namespace ZB.MOM.WW.LmxOpcUa.Tests.Wiring
{
/// <summary>
/// Regression for stability review 2026-04-13 Finding 2. Confirms that when the dashboard
/// port is already bound, the service continues to start (degraded mode) and the
/// <see cref="OpcUaService.DashboardStartFailed"/> flag is raised.
/// </summary>
public class OpcUaServiceDashboardFailureTests
{
[Fact]
public void Start_DashboardPortInUse_ContinuesInDegradedMode()
{
var dashboardPort = new Random().Next(19500, 19999);
using var blocker = new HttpListener();
blocker.Prefixes.Add($"http://localhost:{dashboardPort}/");
blocker.Start();
var config = new AppConfiguration
{
OpcUa = new OpcUaConfiguration
{
Port = 14842,
GalaxyName = "TestGalaxy",
EndpointPath = "/LmxOpcUa"
},
MxAccess = new MxAccessConfiguration { ClientName = "Test" },
GalaxyRepository = new GalaxyRepositoryConfiguration(),
Dashboard = new DashboardConfiguration { Enabled = true, Port = dashboardPort }
};
var proxy = new FakeMxProxy();
var repo = new FakeGalaxyRepository
{
Hierarchy = new List<GalaxyObjectInfo>
{
new()
{
GobjectId = 1, TagName = "TestObj", BrowseName = "TestObj",
ParentGobjectId = 0, IsArea = false
}
},
Attributes = new List<GalaxyAttributeInfo>
{
new()
{
GobjectId = 1, TagName = "TestObj", AttributeName = "TestAttr",
FullTagReference = "TestObj.TestAttr", MxDataType = 5, IsArray = false
}
}
};
var service = new OpcUaService(config, proxy, repo);
service.Start();
try
{
// Service continues despite dashboard bind failure — degraded mode policy.
service.ServerHost.ShouldNotBeNull();
service.DashboardStartFailed.ShouldBeTrue();
service.StatusWeb.ShouldBeNull();
}
finally
{
service.Stop();
}
}
}
}