Close all four stability-review 2026-04-13 findings so a failed runtime probe subscription can no longer leave a phantom entry that Tick() flips to Stopped and fans out false BadOutOfService quality across a host's subtree, a silently-failed dashboard bind no longer lets the service advertise a successful start while an operator-visible endpoint is dead, the seven sync-over-async sites in LmxNodeManager (rebuild probe sync, Read, Write, four HistoryRead overrides) can no longer park the OPC UA stack thread indefinitely on a hung backend, and alarm auto-subscribe + transferred-subscription restore no longer race shutdown as untracked fire-and-forget tasks.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -192,6 +192,43 @@ namespace ZB.MOM.WW.LmxOpcUa.Tests.Configuration
|
||||
config.Security.MinimumCertificateKeySize.ShouldBe(2048);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Stability review 2026-04-13 Finding 3: MxAccess.RequestTimeoutSeconds must be at
|
||||
/// least 1. Zero or negative values disable the safety bound and are rejected.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public void Validator_MxAccessRequestTimeoutZero_ReturnsFalse()
|
||||
{
|
||||
var config = LoadFromJson();
|
||||
config.MxAccess.RequestTimeoutSeconds = 0;
|
||||
ConfigurationValidator.ValidateAndLog(config).ShouldBe(false);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Stability review 2026-04-13 Finding 3: Historian.RequestTimeoutSeconds must be at
|
||||
/// least 1 when historian is enabled.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public void Validator_HistorianRequestTimeoutZero_ReturnsFalse()
|
||||
{
|
||||
var config = LoadFromJson();
|
||||
config.Historian.Enabled = true;
|
||||
config.Historian.ServerName = "localhost";
|
||||
config.Historian.RequestTimeoutSeconds = 0;
|
||||
ConfigurationValidator.ValidateAndLog(config).ShouldBe(false);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Confirms the bound AppConfiguration carries non-zero default request timeouts.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public void Validator_DefaultRequestTimeouts_AreSensible()
|
||||
{
|
||||
var config = new AppConfiguration();
|
||||
config.MxAccess.RequestTimeoutSeconds.ShouldBeGreaterThanOrEqualTo(1);
|
||||
config.Historian.RequestTimeoutSeconds.ShouldBeGreaterThanOrEqualTo(1);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Confirms that a minimum key size below 2048 is rejected by the validator.
|
||||
/// </summary>
|
||||
|
||||
@@ -402,6 +402,73 @@ namespace ZB.MOM.WW.LmxOpcUa.Tests.MxAccess
|
||||
sut.IsHostStopped(20).ShouldBeFalse();
|
||||
}
|
||||
|
||||
// ---------- Subscribe failure rollback (stability review 2026-04-13 Finding 1) ----------
|
||||
|
||||
[Fact]
|
||||
public async Task Sync_SubscribeThrows_DoesNotLeavePhantomEntry()
|
||||
{
|
||||
var client = new FakeMxAccessClient
|
||||
{
|
||||
SubscribeException = new InvalidOperationException("advise failed")
|
||||
};
|
||||
var (stopSpy, runSpy) = (new List<int>(), new List<int>());
|
||||
using var sut = Sut(client, 15, stopSpy, runSpy);
|
||||
|
||||
await sut.SyncAsync(new[] { Engine(20, "DevAppEngine") });
|
||||
|
||||
// A failed SubscribeAsync must not leave a phantom entry that Tick() can later
|
||||
// transition from Unknown to Stopped.
|
||||
sut.ActiveProbeCount.ShouldBe(0);
|
||||
sut.GetSnapshot().ShouldBeEmpty();
|
||||
sut.IsHostStopped(20).ShouldBeFalse();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Sync_SubscribeThrows_TickDoesNotFireStopCallback()
|
||||
{
|
||||
var client = new FakeMxAccessClient
|
||||
{
|
||||
SubscribeException = new InvalidOperationException("advise failed")
|
||||
};
|
||||
var clock = new Clock();
|
||||
var (stopSpy, runSpy) = (new List<int>(), new List<int>());
|
||||
using var sut = Sut(client, 15, stopSpy, runSpy, clock);
|
||||
|
||||
await sut.SyncAsync(new[] { Engine(20, "DevAppEngine") });
|
||||
|
||||
// Advance past the unknown timeout — if the rollback were incomplete, Tick() would
|
||||
// transition the phantom entry to Stopped and fan out a false host-down signal.
|
||||
clock.Now = clock.Now.AddSeconds(30);
|
||||
sut.Tick();
|
||||
|
||||
stopSpy.ShouldBeEmpty();
|
||||
runSpy.ShouldBeEmpty();
|
||||
sut.ActiveProbeCount.ShouldBe(0);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Sync_SubscribeSucceedsAfterRetry_AppearsInSnapshot()
|
||||
{
|
||||
// After a failed subscribe rolls back cleanly, a subsequent successful SyncAsync
|
||||
// against the same host must behave normally.
|
||||
var client = new FakeMxAccessClient
|
||||
{
|
||||
SubscribeException = new InvalidOperationException("first attempt fails")
|
||||
};
|
||||
var (stopSpy, runSpy) = (new List<int>(), new List<int>());
|
||||
using var sut = Sut(client, 15, stopSpy, runSpy);
|
||||
|
||||
await sut.SyncAsync(new[] { Engine(20, "DevAppEngine") });
|
||||
sut.ActiveProbeCount.ShouldBe(0);
|
||||
|
||||
// Clear the fault and resync — the host must now appear with Unknown state.
|
||||
client.SubscribeException = null;
|
||||
await sut.SyncAsync(new[] { Engine(20, "DevAppEngine") });
|
||||
|
||||
sut.ActiveProbeCount.ShouldBe(1);
|
||||
sut.GetSnapshot().Single().State.ShouldBe(GalaxyRuntimeState.Unknown);
|
||||
}
|
||||
|
||||
// ---------- Callback exception safety ----------
|
||||
|
||||
[Fact]
|
||||
|
||||
@@ -96,6 +96,26 @@ namespace ZB.MOM.WW.LmxOpcUa.Tests.Status
|
||||
response.StatusCode.ShouldBe(HttpStatusCode.MethodNotAllowed);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Confirms that Start() returns false and logs a failure when the target port is
|
||||
/// already bound by another listener. Regression guard for the stability-review 2026-04-13
|
||||
/// Finding 2: OpcUaService now surfaces this return value into DashboardStartFailed.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public void Start_WhenPortInUse_ReturnsFalse()
|
||||
{
|
||||
var port = new Random().Next(19000, 19500);
|
||||
using var blocker = new HttpListener();
|
||||
blocker.Prefixes.Add($"http://localhost:{port}/");
|
||||
blocker.Start();
|
||||
|
||||
var reportService = new StatusReportService(new HealthCheckService(), 10);
|
||||
reportService.SetComponents(new FakeMxAccessClient(), null, null, null);
|
||||
using var contested = new StatusWebServer(reportService, port);
|
||||
|
||||
contested.Start().ShouldBeFalse();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Confirms that cache-control headers disable caching for dashboard responses.
|
||||
/// </summary>
|
||||
|
||||
@@ -0,0 +1,72 @@
|
||||
using System;
|
||||
using System.Threading.Tasks;
|
||||
using Shouldly;
|
||||
using Xunit;
|
||||
using ZB.MOM.WW.LmxOpcUa.Host.Utilities;
|
||||
|
||||
namespace ZB.MOM.WW.LmxOpcUa.Tests.Utilities
|
||||
{
|
||||
/// <summary>
|
||||
/// Tests for the bounded sync-over-async wrapper introduced by stability review 2026-04-13
|
||||
/// Finding 3. The wrapper is a backstop applied at every LmxNodeManager sync-over-async site
|
||||
/// (Read, Write, HistoryRead*, BuildAddressSpace probe sync).
|
||||
/// </summary>
|
||||
public class SyncOverAsyncTests
|
||||
{
|
||||
[Fact]
|
||||
public void WaitSync_CompletedTask_ReturnsResult()
|
||||
{
|
||||
var task = Task.FromResult(42);
|
||||
SyncOverAsync.WaitSync(task, TimeSpan.FromSeconds(1), "test").ShouldBe(42);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void WaitSync_CompletedNonGenericTask_Returns()
|
||||
{
|
||||
var task = Task.CompletedTask;
|
||||
Should.NotThrow(() => SyncOverAsync.WaitSync(task, TimeSpan.FromSeconds(1), "test"));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void WaitSync_NeverCompletingTask_ThrowsTimeoutException()
|
||||
{
|
||||
var tcs = new TaskCompletionSource<int>();
|
||||
var ex = Should.Throw<TimeoutException>(() =>
|
||||
SyncOverAsync.WaitSync(tcs.Task, TimeSpan.FromMilliseconds(100), "op"));
|
||||
ex.Message.ShouldContain("op");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void WaitSync_NeverCompletingNonGenericTask_ThrowsTimeoutException()
|
||||
{
|
||||
var tcs = new TaskCompletionSource<bool>();
|
||||
Should.Throw<TimeoutException>(() =>
|
||||
SyncOverAsync.WaitSync((Task)tcs.Task, TimeSpan.FromMilliseconds(100), "op"));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void WaitSync_FaultedNonGenericTask_UnwrapsInnerException()
|
||||
{
|
||||
var task = Task.FromException(new InvalidOperationException("boom"));
|
||||
Should.Throw<InvalidOperationException>(() =>
|
||||
SyncOverAsync.WaitSync(task, TimeSpan.FromSeconds(1), "op"));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void WaitSync_FaultedGenericTask_UnwrapsInnerException()
|
||||
{
|
||||
var task = Task.FromException<int>(new InvalidOperationException("boom"));
|
||||
Should.Throw<InvalidOperationException>(() =>
|
||||
SyncOverAsync.WaitSync(task, TimeSpan.FromSeconds(1), "op"));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void WaitSync_NullTask_ThrowsArgumentNullException()
|
||||
{
|
||||
Should.Throw<ArgumentNullException>(() =>
|
||||
SyncOverAsync.WaitSync((Task)null!, TimeSpan.FromSeconds(1), "op"));
|
||||
Should.Throw<ArgumentNullException>(() =>
|
||||
SyncOverAsync.WaitSync((Task<int>)null!, TimeSpan.FromSeconds(1), "op"));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,78 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Net;
|
||||
using Shouldly;
|
||||
using Xunit;
|
||||
using ZB.MOM.WW.LmxOpcUa.Host;
|
||||
using ZB.MOM.WW.LmxOpcUa.Host.Configuration;
|
||||
using ZB.MOM.WW.LmxOpcUa.Host.Domain;
|
||||
using ZB.MOM.WW.LmxOpcUa.Tests.Helpers;
|
||||
|
||||
namespace ZB.MOM.WW.LmxOpcUa.Tests.Wiring
|
||||
{
|
||||
/// <summary>
|
||||
/// Regression for stability review 2026-04-13 Finding 2. Confirms that when the dashboard
|
||||
/// port is already bound, the service continues to start (degraded mode) and the
|
||||
/// <see cref="OpcUaService.DashboardStartFailed"/> flag is raised.
|
||||
/// </summary>
|
||||
public class OpcUaServiceDashboardFailureTests
|
||||
{
|
||||
[Fact]
|
||||
public void Start_DashboardPortInUse_ContinuesInDegradedMode()
|
||||
{
|
||||
var dashboardPort = new Random().Next(19500, 19999);
|
||||
using var blocker = new HttpListener();
|
||||
blocker.Prefixes.Add($"http://localhost:{dashboardPort}/");
|
||||
blocker.Start();
|
||||
|
||||
var config = new AppConfiguration
|
||||
{
|
||||
OpcUa = new OpcUaConfiguration
|
||||
{
|
||||
Port = 14842,
|
||||
GalaxyName = "TestGalaxy",
|
||||
EndpointPath = "/LmxOpcUa"
|
||||
},
|
||||
MxAccess = new MxAccessConfiguration { ClientName = "Test" },
|
||||
GalaxyRepository = new GalaxyRepositoryConfiguration(),
|
||||
Dashboard = new DashboardConfiguration { Enabled = true, Port = dashboardPort }
|
||||
};
|
||||
|
||||
var proxy = new FakeMxProxy();
|
||||
var repo = new FakeGalaxyRepository
|
||||
{
|
||||
Hierarchy = new List<GalaxyObjectInfo>
|
||||
{
|
||||
new()
|
||||
{
|
||||
GobjectId = 1, TagName = "TestObj", BrowseName = "TestObj",
|
||||
ParentGobjectId = 0, IsArea = false
|
||||
}
|
||||
},
|
||||
Attributes = new List<GalaxyAttributeInfo>
|
||||
{
|
||||
new()
|
||||
{
|
||||
GobjectId = 1, TagName = "TestObj", AttributeName = "TestAttr",
|
||||
FullTagReference = "TestObj.TestAttr", MxDataType = 5, IsArray = false
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
var service = new OpcUaService(config, proxy, repo);
|
||||
service.Start();
|
||||
|
||||
try
|
||||
{
|
||||
// Service continues despite dashboard bind failure — degraded mode policy.
|
||||
service.ServerHost.ShouldNotBeNull();
|
||||
service.DashboardStartFailed.ShouldBeTrue();
|
||||
service.StatusWeb.ShouldBeNull();
|
||||
}
|
||||
finally
|
||||
{
|
||||
service.Stop();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user