Compare commits

...

2 Commits

Author SHA1 Message Date
Joseph Doherty
404bfbe7e4 AlarmSurfaceInvoker — wraps IAlarmSource.Subscribe/Unsubscribe/Acknowledge through CapabilityInvoker with multi-host fan-out. Closes alarm-surface slice of task #161 (Phase 6.1 Stream A); the Roslyn invoker-coverage analyzer is split into new task #200 because a DiagnosticAnalyzer project is genuinely its own scaffolding PR (Microsoft.CodeAnalysis.CSharp.Workspaces dep, netstandard2.0 target, Microsoft.CodeAnalysis.Testing harness, ProjectReference OutputItemType=Analyzer wiring, and four corner-case rules I want tests for before shipping). Ship this PR as the runtime guardrail + callable API; the analyzer lands next as the compile-time guardrail. New AlarmSurfaceInvoker class in Core.Resilience. Three methods mirror IAlarmSource's three mutating surfaces: SubscribeAsync (fan-out: group sourceNodeIds by IPerCallHostResolver.ResolveHost, one CapabilityInvoker.ExecuteAsync per host with DriverCapability.AlarmSubscribe so AlarmSubscribe's retry policy kicks in + returns one IAlarmSubscriptionHandle per host); UnsubscribeAsync (single-host, defaultHost); AcknowledgeAsync (fan-out: group AlarmAcknowledgeRequests by resolver-mapped host, run each host's batch through DriverCapability.AlarmAcknowledge which does NOT retry per decision #143 — alarm-ack is a write-shaped op that's not idempotent at the plant-floor level). Drivers without IPerCallHostResolver (Galaxy single MXAccess endpoint, OpcUaClient against one remote, etc.) fall back to defaultHost = DriverInstanceId so breaker + bulkhead keying still happens; drivers with it get one-dead-PLC-doesn't-poison-siblings isolation per decision #144. Single-host single-subscribe returns [handle] with length 1; empty sourceNodeIds fast-paths to [] without a driver call. Five new AlarmSurfaceInvokerTests covering: (a) empty list short-circuits — driver method never called; (b) single-host sub routes via default host — one driver call with full id list; (c) multi-host sub fans out to 2 distinct hosts for 3 src ids mapping to 2 plcs — one driver call per host; (d) Acknowledge does not retry on failure — call count stays at 1 even with exception; (e) Subscribe retries transient failures — call count reaches 3 with a 2-failures-then-success fake. Core.Tests resilience-builder suite 19/19 passing (was 14, +5); Core.Tests whole suite still green. Core project builds 0 errors. Task #200 captures the compile-time guardrail: Roslyn DiagnosticAnalyzer at src/ZB.MOM.WW.OtOpcUa.Analyzers that flags direct invocations of the eleven capability-interface methods inside the Server namespace when the call is NOT inside a CapabilityInvoker.ExecuteAsync/ExecuteWriteAsync/AlarmSurfaceInvoker.*Async lambda. That analyzer is the reason we keep paying the wrapping-class overhead for every new capability.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-19 23:07:37 -04:00
006af636a0 Merge pull request (#139) - ExternalIdReservation merge in FinaliseBatch 2026-04-19 23:04:25 -04:00
2 changed files with 256 additions and 0 deletions

View File

@@ -0,0 +1,129 @@
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
namespace ZB.MOM.WW.OtOpcUa.Core.Resilience;
/// <summary>
/// Wraps the three mutating surfaces of <see cref="IAlarmSource"/>
/// (<see cref="IAlarmSource.SubscribeAlarmsAsync"/>, <see cref="IAlarmSource.UnsubscribeAlarmsAsync"/>,
/// <see cref="IAlarmSource.AcknowledgeAsync"/>) through <see cref="CapabilityInvoker"/> so the
/// Phase 6.1 resilience pipeline runs — retry semantics match
/// <see cref="DriverCapability.AlarmSubscribe"/> (retries by default) and
/// <see cref="DriverCapability.AlarmAcknowledge"/> (does NOT retry per decision #143).
/// </summary>
/// <remarks>
/// <para>Multi-host dispatch: when the driver implements <see cref="IPerCallHostResolver"/>,
/// each source-node-id is resolved individually + grouped by host so a dead PLC inside a
/// multi-device driver doesn't poison the sibling hosts' breakers. Drivers with a single
/// host fall back to <see cref="IDriver.DriverInstanceId"/> as the single-host key.</para>
///
/// <para>Why this lives here + not on <see cref="CapabilityInvoker"/>: alarm surfaces have a
/// handle-returning shape (SubscribeAlarmsAsync returns <see cref="IAlarmSubscriptionHandle"/>)
/// + a per-call fan-out (AcknowledgeAsync gets a batch of
/// <see cref="AlarmAcknowledgeRequest"/>s that may span multiple hosts). Keeping the fan-out
/// logic here keeps the invoker's execute-overloads narrow.</para>
/// </remarks>
public sealed class AlarmSurfaceInvoker
{
private readonly CapabilityInvoker _invoker;
private readonly IAlarmSource _alarmSource;
private readonly IPerCallHostResolver? _hostResolver;
private readonly string _defaultHost;
public AlarmSurfaceInvoker(
CapabilityInvoker invoker,
IAlarmSource alarmSource,
string defaultHost,
IPerCallHostResolver? hostResolver = null)
{
ArgumentNullException.ThrowIfNull(invoker);
ArgumentNullException.ThrowIfNull(alarmSource);
ArgumentException.ThrowIfNullOrWhiteSpace(defaultHost);
_invoker = invoker;
_alarmSource = alarmSource;
_defaultHost = defaultHost;
_hostResolver = hostResolver;
}
/// <summary>
/// Subscribe to alarm events for a set of source node ids, fanning out by resolved host
/// so per-host breakers / bulkheads apply. Returns one handle per host — callers that
/// don't care about per-host separation may concatenate them.
/// </summary>
public async Task<IReadOnlyList<IAlarmSubscriptionHandle>> SubscribeAsync(
IReadOnlyList<string> sourceNodeIds,
CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(sourceNodeIds);
if (sourceNodeIds.Count == 0) return [];
var byHost = GroupByHost(sourceNodeIds);
var handles = new List<IAlarmSubscriptionHandle>(byHost.Count);
foreach (var (host, ids) in byHost)
{
var handle = await _invoker.ExecuteAsync(
DriverCapability.AlarmSubscribe,
host,
async ct => await _alarmSource.SubscribeAlarmsAsync(ids, ct).ConfigureAwait(false),
cancellationToken).ConfigureAwait(false);
handles.Add(handle);
}
return handles;
}
/// <summary>Cancel an alarm subscription. Routes through the AlarmSubscribe pipeline for parity.</summary>
public ValueTask UnsubscribeAsync(IAlarmSubscriptionHandle handle, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(handle);
return _invoker.ExecuteAsync(
DriverCapability.AlarmSubscribe,
_defaultHost,
async ct => await _alarmSource.UnsubscribeAlarmsAsync(handle, ct).ConfigureAwait(false),
cancellationToken);
}
/// <summary>
/// Acknowledge alarms. Fans out by resolved host; each host's batch runs through the
/// AlarmAcknowledge pipeline (no-retry per decision #143 — an alarm-ack is not idempotent
/// at the plant-floor acknowledgement level even if the OPC UA spec permits re-issue).
/// </summary>
public async Task AcknowledgeAsync(
IReadOnlyList<AlarmAcknowledgeRequest> acknowledgements,
CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(acknowledgements);
if (acknowledgements.Count == 0) return;
var byHost = _hostResolver is null
? new Dictionary<string, List<AlarmAcknowledgeRequest>> { [_defaultHost] = acknowledgements.ToList() }
: acknowledgements
.GroupBy(a => _hostResolver.ResolveHost(a.SourceNodeId))
.ToDictionary(g => g.Key, g => g.ToList());
foreach (var (host, batch) in byHost)
{
var batchSnapshot = batch; // capture for the lambda
await _invoker.ExecuteAsync(
DriverCapability.AlarmAcknowledge,
host,
async ct => await _alarmSource.AcknowledgeAsync(batchSnapshot, ct).ConfigureAwait(false),
cancellationToken).ConfigureAwait(false);
}
}
private Dictionary<string, List<string>> GroupByHost(IReadOnlyList<string> sourceNodeIds)
{
if (_hostResolver is null)
return new Dictionary<string, List<string>> { [_defaultHost] = sourceNodeIds.ToList() };
var result = new Dictionary<string, List<string>>(StringComparer.Ordinal);
foreach (var id in sourceNodeIds)
{
var host = _hostResolver.ResolveHost(id);
if (!result.TryGetValue(host, out var list))
result[host] = list = new List<string>();
list.Add(id);
}
return result;
}
}

View File

@@ -0,0 +1,127 @@
using Shouldly;
using Xunit;
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
using ZB.MOM.WW.OtOpcUa.Core.Resilience;
namespace ZB.MOM.WW.OtOpcUa.Core.Tests.Resilience;
[Trait("Category", "Unit")]
public sealed class AlarmSurfaceInvokerTests
{
private static readonly DriverResilienceOptions TierAOptions = new() { Tier = DriverTier.A };
[Fact]
public async Task SubscribeAsync_EmptyList_ReturnsEmpty_WithoutDriverCall()
{
var driver = new FakeAlarmSource();
var surface = NewSurface(driver, defaultHost: "h");
var handles = await surface.SubscribeAsync([], CancellationToken.None);
handles.Count.ShouldBe(0);
driver.SubscribeCallCount.ShouldBe(0);
}
[Fact]
public async Task SubscribeAsync_SingleHost_RoutesThroughDefaultHost()
{
var driver = new FakeAlarmSource();
var surface = NewSurface(driver, defaultHost: "h1");
var handles = await surface.SubscribeAsync(["src-1", "src-2"], CancellationToken.None);
handles.Count.ShouldBe(1);
driver.SubscribeCallCount.ShouldBe(1);
driver.LastSubscribedIds.ShouldBe(["src-1", "src-2"]);
}
[Fact]
public async Task SubscribeAsync_MultiHost_FansOutByResolvedHost()
{
var driver = new FakeAlarmSource();
var resolver = new StubResolver(new Dictionary<string, string>
{
["src-1"] = "plc-a",
["src-2"] = "plc-b",
["src-3"] = "plc-a",
});
var surface = NewSurface(driver, defaultHost: "default-ignored", resolver: resolver);
var handles = await surface.SubscribeAsync(["src-1", "src-2", "src-3"], CancellationToken.None);
handles.Count.ShouldBe(2); // one per distinct host
driver.SubscribeCallCount.ShouldBe(2); // one driver call per host
}
[Fact]
public async Task AcknowledgeAsync_DoesNotRetry_OnFailure()
{
var driver = new FakeAlarmSource { AcknowledgeShouldThrow = true };
var surface = NewSurface(driver, defaultHost: "h1");
await Should.ThrowAsync<InvalidOperationException>(() =>
surface.AcknowledgeAsync([new AlarmAcknowledgeRequest("s", "c", null)], CancellationToken.None));
driver.AcknowledgeCallCount.ShouldBe(1, "AlarmAcknowledge must not retry — decision #143");
}
[Fact]
public async Task SubscribeAsync_Retries_Transient_Failures()
{
var driver = new FakeAlarmSource { SubscribeFailuresBeforeSuccess = 2 };
var surface = NewSurface(driver, defaultHost: "h1");
await surface.SubscribeAsync(["src"], CancellationToken.None);
driver.SubscribeCallCount.ShouldBe(3, "AlarmSubscribe retries by default — decision #143");
}
private static AlarmSurfaceInvoker NewSurface(
IAlarmSource driver,
string defaultHost,
IPerCallHostResolver? resolver = null)
{
var builder = new DriverResiliencePipelineBuilder();
var invoker = new CapabilityInvoker(builder, "drv-1", () => TierAOptions);
return new AlarmSurfaceInvoker(invoker, driver, defaultHost, resolver);
}
private sealed class FakeAlarmSource : IAlarmSource
{
public int SubscribeCallCount { get; private set; }
public int AcknowledgeCallCount { get; private set; }
public int SubscribeFailuresBeforeSuccess { get; set; }
public bool AcknowledgeShouldThrow { get; set; }
public IReadOnlyList<string> LastSubscribedIds { get; private set; } = [];
public Task<IAlarmSubscriptionHandle> SubscribeAlarmsAsync(
IReadOnlyList<string> sourceNodeIds, CancellationToken cancellationToken)
{
SubscribeCallCount++;
LastSubscribedIds = sourceNodeIds;
if (SubscribeCallCount <= SubscribeFailuresBeforeSuccess)
throw new InvalidOperationException("transient");
return Task.FromResult<IAlarmSubscriptionHandle>(new StubHandle($"h-{SubscribeCallCount}"));
}
public Task UnsubscribeAlarmsAsync(IAlarmSubscriptionHandle handle, CancellationToken cancellationToken)
=> Task.CompletedTask;
public Task AcknowledgeAsync(
IReadOnlyList<AlarmAcknowledgeRequest> acknowledgements, CancellationToken cancellationToken)
{
AcknowledgeCallCount++;
if (AcknowledgeShouldThrow) throw new InvalidOperationException("ack boom");
return Task.CompletedTask;
}
public event EventHandler<AlarmEventArgs>? OnAlarmEvent { add { } remove { } }
}
private sealed record StubHandle(string DiagnosticId) : IAlarmSubscriptionHandle;
private sealed class StubResolver(Dictionary<string, string> map) : IPerCallHostResolver
{
public string ResolveHost(string fullReference) => map[fullReference];
}
}