From 004558c2410eb00c9bad8aba4963821f7f6bbcd0 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Thu, 11 Jun 2026 06:26:48 -0400 Subject: [PATCH] fix(alarms): delta-gate WriteAlarmCondition to suppress inbound ack double-emit (T20) Inbound client acks now route through the engine (T18/T19). On a successful Acknowledge the T18 gate returns Good, so the SDK applies the acked state to the AlarmConditionState node and auto-fires its own condition event (E2) -- directly on the node, bypassing WriteAlarmCondition. The engine then re-projects that same transition through WriteAlarmCondition, which fired again (E3): a double-emit. Gate WriteAlarmCondition's ReportConditionEvent on a genuine delta computed against the node's CURRENT live state (read before projecting the snapshot), not a last-written cache (which would be stale, since the SDK-applied ack never went through this method). For a re-projected ack the snapshot equals the node's already-applied state -> no delta -> suppress E3. Genuine engine-driven transitions still differ -> fire. Compared fields (value-equality via AlarmConditionDelta record): Active, Acked, Confirmed, Enabled, Shelving (mapped from the shelving state machine), Severity (mapped through MapSeverity to match the bucket the node stores), Message. Optional Confirmed/Shelving fold to the node read-back default when the child is absent so they can't register a phantom delta. Tests prove both: suppression of the simulated inbound ack re-projection (EventId unchanged) and that genuine transitions fire while identical re-projections suppress; plus a direct unit test of the ShouldFireConditionEvent seam. 102/102 OpcUaServer.Tests green. --- .../OtOpcUaNodeManager.cs | 131 +++++++++++++++--- .../SdkAddressSpaceSinkTests.cs | 121 ++++++++++++++++ 2 files changed, 235 insertions(+), 17 deletions(-) diff --git a/src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/OtOpcUaNodeManager.cs b/src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/OtOpcUaNodeManager.cs index 9261dd2b..d81bf796 100644 --- a/src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/OtOpcUaNodeManager.cs +++ b/src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/OtOpcUaNodeManager.cs @@ -131,6 +131,17 @@ public sealed class OtOpcUaNodeManager : CustomNodeManager2 { if (_alarmConditions.TryGetValue(alarmNodeId, out var condition)) { + // T20 delta-gate: read the node's CURRENT live condition state FIRST (before projecting + // the incoming snapshot onto it), then decide fire-vs-suppress by comparing the incoming + // snapshot to that current state. We gate against the NODE's state, NOT a "last written" + // cache, because an inbound client ack the SDK applied (OnAcknowledge returned Good → + // SDK mutated AckedState + auto-fired its own event) NEVER passed through this method, so a + // last-written cache would be stale and wrongly report a delta. By the time the engine + // re-projects that ack here, the node already holds the acked state → no delta → suppress. + var current = ReadConditionDelta(condition); + var incoming = ToConditionDelta(state, condition); + bool fire = ShouldFireConditionEvent(current, incoming); + // EnabledState / AckedState / ActiveState are mandatory children — always present after // Create. Confirm + Shelving are optional Part 9 children: T14's real-server finding is // that Create auto-builds them for our subtypes, but a base AlarmConditionState (or a @@ -166,12 +177,17 @@ public sealed class OtOpcUaNodeManager : CustomNodeManager2 condition.Time.Value = sourceTimestampUtc; condition.ReceiveTime.Value = sourceTimestampUtc; - // T16 — fire a real Part 9 condition event for THIS engine-driven transition. The - // ScriptedAlarmHostActor only calls WriteAlarmCondition on a REAL state transition - // (Emission != None/Suppressed), so every call corresponds to exactly one transition → - // fire exactly one event. ReportConditionEvent stamps a fresh EventId, ClearChangeMasks, - // and ReportEvent — all still under this lock. - ReportConditionEvent(condition, sourceTimestampUtc); + // T20 — fire a real Part 9 condition event ONLY when this projection is a genuine state + // change (the delta-gate decided above, against the node's pre-projection state). A + // genuine engine-driven transition (alarm goes active/clear, severity bucket shifts, an + // engine-side ack, etc.) differs from the node's current state → fire. The re-projection + // of a client ack the SDK already applied equals the node's current state → no delta → + // suppress, so we don't double-emit (E2 from the SDK + E3 from here). ReportConditionEvent + // stamps a fresh EventId, ClearChangeMasks, and ReportEvent — all still under this lock. + if (fire) + { + ReportConditionEvent(condition, sourceTimestampUtc); + } return; } @@ -206,14 +222,16 @@ public sealed class OtOpcUaNodeManager : CustomNodeManager2 /// materialise) — no branch creation here. /// /// - /// Double-emit note (for T17). This helper fires ONLY for ENGINE-DRIVEN (outbound) - /// transitions routed through . T17's inbound - /// Acknowledge/Confirm will go through the SDK's own OnAcknowledgeCalled, which already - /// auto-fires a condition event (ReportStateChange) on a successful ack. These don't - /// overlap today (T17 isn't built), but once it lands an engine-driven re-projection of an ack - /// the SDK has ALREADY auto-emitted could double-emit the same logical transition — to be - /// reconciled in T17 (either suppress the SDK auto-fire, or skip the engine re-projection for - /// transitions that originated from an inbound method call). + /// Double-emit note (resolved by delta-gate). An inbound client Acknowledge/Confirm + /// goes through the SDK's own handler, which (after T18's gate returns Good) applies the acked + /// state to the node and auto-fires its own condition event (E2) — directly on the node, + /// BYPASSING . The engine then re-projects that same logical + /// transition through , which would otherwise fire a second + /// event (E3). 's delta-gate suppresses E3: it compares the + /// incoming snapshot against the NODE's CURRENT state, and because the SDK has ALREADY + /// pre-applied the inbound-ack state, the re-projection is a no-delta no-op (no fire). Genuine + /// engine-driven transitions still differ from the node's current state, so they fire here as + /// before. /// /// /// The materialised condition whose new state has already been projected; must be non-null. @@ -260,6 +278,84 @@ public sealed class OtOpcUaNodeManager : CustomNodeManager2 } } + /// + /// The gate-relevant slice of a Part 9 condition's state — exactly the fields that drive a + /// condition event AND that an can change. As a record, two + /// instances compare by value, so is a plain inequality. + /// + /// Severity is stored as the MAPPED bucket (a + /// ) — the same value the node holds after SetSeverity — so two + /// raw severities that fall in the same bucket are correctly treated as "no change". The + /// Shelving kind is read back from / mapped to the SDK's shelving state machine so the + /// live node and the snapshot compare on the same 3-way (Unshelved/OneShot/Timed) axis. + /// + /// + internal readonly record struct AlarmConditionDelta( + bool Active, + bool Acknowledged, + bool Confirmed, + bool Enabled, + AlarmShelvingKind Shelving, + ushort MappedSeverity, + string Message); + + /// Decide whether a projection is a genuine state change + /// (and so should fire a Part 9 condition event) by comparing the node's pre-projection state to the + /// incoming snapshot. Pure + value-based so it's unit-testable in isolation: returns true iff + /// any gate-relevant field differs. An inbound client ack the SDK already applied makes + /// == false (suppress the re-projected + /// double-emit); a genuine engine-driven transition differs ⇒ true (fire). + /// The node's current (pre-projection) gate-relevant state. + /// The incoming snapshot's gate-relevant state. + /// true to fire a condition event; false to suppress (no delta). + internal static bool ShouldFireConditionEvent(AlarmConditionDelta current, AlarmConditionDelta incoming) => + current != incoming; + + /// Read the gate-relevant slice off the LIVE condition node. Mandatory children + /// (Active/Acked/Enabled) are always present; Confirmed/Shelving are optional and null-guarded + /// (a leaner child set ⇒ treat as the unset default). Severity is read as the already-mapped + /// bucket the node stores, and shelving is mapped from the shelving + /// state machine's CurrentState so it lines up with . + private static AlarmConditionDelta ReadConditionDelta(AlarmConditionState condition) => new( + Active: condition.ActiveState?.Id?.Value ?? false, + Acknowledged: condition.AckedState?.Id?.Value ?? true, + Confirmed: condition.ConfirmedState?.Id?.Value ?? true, + Enabled: condition.EnabledState?.Id?.Value ?? true, + Shelving: ReadShelvingKind(condition), + MappedSeverity: condition.Severity?.Value ?? (ushort)0, + Message: condition.Message?.Value?.Text ?? string.Empty); + + /// Build the gate-relevant slice from the incoming snapshot, normalising the two fields that + /// the node stores in a derived form: Severity is run through so it matches + /// the bucket the node holds (the projection calls SetSeverity(MapSeverity(...))), and an + /// optional Confirmed/Shelving that the node can't actually hold (missing child) is folded to the + /// node's read-back default so it never spuriously registers as a delta. + private static AlarmConditionDelta ToConditionDelta(AlarmConditionSnapshot state, AlarmConditionState condition) => new( + Active: state.Active, + Acknowledged: state.Acknowledged, + // If the node has no ConfirmedState child, the projection is a no-op there; mirror the node's + // read-back default (true) so a snapshot Confirmed value can't create a phantom delta. + Confirmed: condition.ConfirmedState is not null ? state.Confirmed : true, + Enabled: state.Enabled, + // Likewise for shelving: without a ShelvingState child the projection can't apply, so fold to the + // node's read-back default (Unshelved). + Shelving: condition.ShelvingState is not null ? state.Shelving : AlarmShelvingKind.Unshelved, + MappedSeverity: (ushort)MapSeverity(state.Severity), + Message: state.Message ?? string.Empty); + + /// Map the live shelving state machine's CurrentState back to our 3-way + /// by matching its well-known Part 9 state object id. Any node without + /// a shelving sub-state machine (or an unrecognised/unset state) reads as + /// — the same value folds an + /// unsupported shelving snapshot to, so the two stay comparable. + private static AlarmShelvingKind ReadShelvingKind(AlarmConditionState condition) + { + var stateId = condition.ShelvingState?.CurrentState?.Id?.Value as NodeId; + if (stateId == ObjectIds.ShelvedStateMachineType_OneShotShelved) return AlarmShelvingKind.OneShot; + if (stateId == ObjectIds.ShelvedStateMachineType_TimedShelved) return AlarmShelvingKind.Timed; + return AlarmShelvingKind.Unshelved; + } + /// /// Materialise a real OPC UA Part 9 node under its equipment /// folder so clients can browse it as a proper condition (and subscribe to its events). The node @@ -339,9 +435,10 @@ public sealed class OtOpcUaNodeManager : CustomNodeManager2 // routing; these delegates are the veto/permission seam the SDK invokes BEFORE applying the // state change. Each gates on the caller's AlarmAck role (fails closed) and, when allowed, // routes a mapped AlarmCommand to the engine via AlarmCommandRouter, then returns Good so the - // SDK applies its node state + auto-fires its own event. - // T20: the engine re-projects that same logical transition through WriteAlarmCondition, which - // also fires — the resulting double-emit is de-duped in a later task (T20), NOT here. + // SDK applies its node state + auto-fires its own event (E2). + // T20: the engine re-projects that same logical transition through WriteAlarmCondition; its + // delta-gate (compares against the node's current state, which the SDK already pre-applied) + // sees no change and suppresses the would-be second event (E3) — so no double-emit. alarm.OnAcknowledge = (context, condition, _, comment) => HandleAlarmCommand(context, condition, "Acknowledge", comment, unshelveAt: null); alarm.OnConfirm = (context, condition, _, comment) => diff --git a/tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests/SdkAddressSpaceSinkTests.cs b/tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests/SdkAddressSpaceSinkTests.cs index 07215109..38030fb2 100644 --- a/tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests/SdkAddressSpaceSinkTests.cs +++ b/tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests/SdkAddressSpaceSinkTests.cs @@ -354,6 +354,127 @@ public sealed class SdkAddressSpaceSinkTests : IDisposable await host.DisposeAsync(); } + /// T20 — the inbound double-emit is suppressed by the delta-gate. We simulate the REAL + /// inbound sequence: a client Acknowledge whose T18 gate returned Good causes the SDK to apply the + /// acked state to the node and auto-fire its OWN event (E2) WITHOUT going through WriteAlarmCondition. + /// We reproduce that by applying the acked state directly onto the live AlarmConditionState the way + /// the SDK would. THEN the engine re-projects the same logical transition through WriteAlarmCondition + /// with the matching snapshot — and because that snapshot equals the node's current state, the + /// delta-gate must fire NO event (no E3). We prove "no event fired" by asserting the condition's + /// EventId is UNCHANGED across the WriteAlarmCondition call (ReportConditionEvent always restamps a + /// fresh GUID EventId when it fires — same probe the T16 event test uses). + [Fact] + public async Task WriteAlarmCondition_suppresses_event_when_snapshot_equals_node_current_state() + { + var (host, server) = await BootAsync(); + var nm = server.NodeManager!; + var sink = new SdkAddressSpaceSink(nm); + + sink.EnsureFolder("eq-ack", parentNodeId: null, displayName: "Equipment Ack"); + sink.MaterialiseAlarmCondition("alm-ack", "eq-ack", "HighTemp", "OffNormalAlarm", severity: 700); + + var condition = nm.TryGetAlarmCondition("alm-ack"); + condition.ShouldNotBeNull(); + + // Drive the alarm active+unacked through the engine path (a genuine transition → fires). + sink.WriteAlarmCondition("alm-ack", Snapshot(active: true, acknowledged: false, message: "active"), DateTime.UtcNow); + condition!.ActiveState.Id.Value.ShouldBeTrue(); + condition.AckedState.Id.Value.ShouldBeFalse(); + + // === Simulate the SDK-applied inbound Acknowledge (E2) === + // After T18's gate returns Good, the SDK applies the acked state to the node and auto-fires its + // own event — directly on the node, BYPASSING WriteAlarmCondition. Reproduce that node mutation. + lock (nm.Lock) + { + condition.SetAcknowledgedState(nm.SystemContext, true); + condition.Message.Value = new LocalizedText("active"); + condition.ClearChangeMasks(nm.SystemContext, includeChildren: true); + } + condition.AckedState.Id.Value.ShouldBeTrue(); + + // Capture the EventId AFTER the SDK-applied ack but BEFORE the engine re-projection. + var beforeReProject = (byte[]?)condition.EventId.Value?.Clone(); + + // === Engine re-projects the SAME acked transition through WriteAlarmCondition (would-be E3) === + // Snapshot equals the node's current state (active, acked, message "active") ⇒ delta-gate sees + // no change ⇒ NO event fires. + sink.WriteAlarmCondition("alm-ack", Snapshot(active: true, acknowledged: true, message: "active"), DateTime.UtcNow); + + var afterReProject = (byte[]?)condition.EventId.Value?.Clone(); + // EventId is UNCHANGED ⇒ ReportConditionEvent did NOT run ⇒ E3 suppressed. + afterReProject.ShouldBe(beforeReProject); + + // The projection still applied (state is intact) — only the redundant event was suppressed. + condition.ActiveState.Id.Value.ShouldBeTrue(); + condition.AckedState.Id.Value.ShouldBeTrue(); + + await host.DisposeAsync(); + } + + /// T20 — genuine engine-driven transitions still fire exactly one event, and a second + /// IDENTICAL WriteAlarmCondition (no delta vs the node's now-current state) fires zero more. We use + /// the EventId-changed-on-fire probe: a fire restamps a fresh GUID EventId, a suppress leaves it + /// untouched. + [Fact] + public async Task WriteAlarmCondition_fires_on_delta_and_suppresses_identical_reprojection() + { + var (host, server) = await BootAsync(); + var nm = server.NodeManager!; + var sink = new SdkAddressSpaceSink(nm); + + sink.EnsureFolder("eq-delta", parentNodeId: null, displayName: "Equipment Delta"); + sink.MaterialiseAlarmCondition("alm-delta", "eq-delta", "HighTemp", "OffNormalAlarm", severity: 700); + + var condition = nm.TryGetAlarmCondition("alm-delta"); + condition.ShouldNotBeNull(); + + var beforeFirst = (byte[]?)condition!.EventId.Value?.Clone(); + + // Genuine transition: snapshot (active, unacked) differs from the materialise state + // (inactive, acked) ⇒ delta ⇒ fires exactly one event (EventId changes). + sink.WriteAlarmCondition("alm-delta", Snapshot(active: true, acknowledged: false, message: "active"), DateTime.UtcNow); + var afterFirst = (byte[]?)condition.EventId.Value?.Clone(); + afterFirst.ShouldNotBeNull(); + afterFirst!.ShouldNotBe(beforeFirst); // fired + + // Identical re-projection: snapshot now EQUALS the node's current state ⇒ no delta ⇒ 0 more + // events (EventId unchanged from the first fire). + sink.WriteAlarmCondition("alm-delta", Snapshot(active: true, acknowledged: false, message: "active"), DateTime.UtcNow); + var afterSecond = (byte[]?)condition.EventId.Value?.Clone(); + afterSecond.ShouldBe(afterFirst); // suppressed + + // A FURTHER genuine transition (clear) differs again ⇒ fires once more. + sink.WriteAlarmCondition("alm-delta", Snapshot(active: false, acknowledged: true, message: "cleared"), DateTime.UtcNow); + var afterThird = (byte[]?)condition.EventId.Value?.Clone(); + afterThird.ShouldNotBe(afterSecond); // fired + + await host.DisposeAsync(); + } + + /// T20 — direct unit test of the pure fire-vs-suppress decision seam + /// (). Equal states ⇒ suppress; any single + /// field differing ⇒ fire. This pins the gate logic independent of the booted server. + [Fact] + public void ShouldFireConditionEvent_fires_only_on_a_field_delta() + { + var baseState = new OtOpcUaNodeManager.AlarmConditionDelta( + Active: true, Acknowledged: false, Confirmed: true, Enabled: true, + Shelving: AlarmShelvingKind.Unshelved, MappedSeverity: 100, Message: "m"); + + // Equal ⇒ suppress (this is the inbound double-emit case in pure form). + OtOpcUaNodeManager.ShouldFireConditionEvent(baseState, baseState).ShouldBeFalse(); + OtOpcUaNodeManager.ShouldFireConditionEvent(baseState, baseState with { }).ShouldBeFalse(); + + // Each single-field difference ⇒ fire. + OtOpcUaNodeManager.ShouldFireConditionEvent(baseState, baseState with { Active = false }).ShouldBeTrue(); + OtOpcUaNodeManager.ShouldFireConditionEvent(baseState, baseState with { Acknowledged = true }).ShouldBeTrue(); + OtOpcUaNodeManager.ShouldFireConditionEvent(baseState, baseState with { Confirmed = false }).ShouldBeTrue(); + OtOpcUaNodeManager.ShouldFireConditionEvent(baseState, baseState with { Enabled = false }).ShouldBeTrue(); + OtOpcUaNodeManager.ShouldFireConditionEvent(baseState, baseState with { Shelving = AlarmShelvingKind.Timed }).ShouldBeTrue(); + OtOpcUaNodeManager.ShouldFireConditionEvent(baseState, baseState with { MappedSeverity = 900 }).ShouldBeTrue(); + OtOpcUaNodeManager.ShouldFireConditionEvent(baseState, baseState with { Message = "other" }).ShouldBeTrue(); + } + /// Builds a test with sensible defaults so each call /// site only specifies the fields it cares about. private static AlarmConditionSnapshot Snapshot(