From 56abd64c6ce6d7dad8ab16e1fca56d51e2b2451b Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Sun, 14 Jun 2026 02:33:02 -0400 Subject: [PATCH 1/6] metrics(alarms): expose provider-switch count in snapshot, bound the reason tag B1: add AlarmProviderSwitchCount to GatewayMetricsSnapshot so the switch total is readable without scraping the OTEL counter. B2: replace the free-text reason tag on mxgateway.alarms.provider_switches with a bounded AlarmProviderSwitchReason enum (failover/failback/unknown); the human-readable reason stays in the structured log. --- .../Alarms/GatewayAlarmMonitor.cs | 8 +++++++- .../Metrics/AlarmProviderSwitchReason.cs | 20 +++++++++++++++++++ .../Metrics/GatewayMetrics.cs | 19 ++++++++++++++---- .../Metrics/GatewayMetricsSnapshot.cs | 1 + .../Metrics/GatewayMetricsTests.cs | 7 ++++--- 5 files changed, 47 insertions(+), 8 deletions(-) create mode 100644 src/ZB.MOM.WW.MxGateway.Server/Metrics/AlarmProviderSwitchReason.cs diff --git a/src/ZB.MOM.WW.MxGateway.Server/Alarms/GatewayAlarmMonitor.cs b/src/ZB.MOM.WW.MxGateway.Server/Alarms/GatewayAlarmMonitor.cs index fdbfc89..a353d9c 100644 --- a/src/ZB.MOM.WW.MxGateway.Server/Alarms/GatewayAlarmMonitor.cs +++ b/src/ZB.MOM.WW.MxGateway.Server/Alarms/GatewayAlarmMonitor.cs @@ -399,7 +399,13 @@ public sealed class GatewayAlarmMonitor : BackgroundService, IGatewayAlarmServic BroadcastToAll(new AlarmFeedMessage { ProviderStatus = status }); } - _metrics.AlarmProviderSwitched(fromModeInt, ModeToInt(toMode), reason); + AlarmProviderSwitchReason switchReason = toMode switch + { + AlarmProviderMode.Subtag => AlarmProviderSwitchReason.Failover, + AlarmProviderMode.Alarmmgr => AlarmProviderSwitchReason.Failback, + _ => AlarmProviderSwitchReason.Unknown, + }; + _metrics.AlarmProviderSwitched(fromModeInt, ModeToInt(toMode), switchReason); _logger.LogInformation( "Alarm provider mode changed to {Mode} (degraded={Degraded}): {Reason}", diff --git a/src/ZB.MOM.WW.MxGateway.Server/Metrics/AlarmProviderSwitchReason.cs b/src/ZB.MOM.WW.MxGateway.Server/Metrics/AlarmProviderSwitchReason.cs new file mode 100644 index 0000000..da024c8 --- /dev/null +++ b/src/ZB.MOM.WW.MxGateway.Server/Metrics/AlarmProviderSwitchReason.cs @@ -0,0 +1,20 @@ +namespace ZB.MOM.WW.MxGateway.Server.Metrics; + +/// +/// Bounded classification of an alarm-provider switch, used as the low-cardinality +/// reason tag on the mxgateway.alarms.provider_switches counter. The +/// worker supplies a free-text reason (e.g. "primary PollOnce failed") that +/// stays in the structured log; only this bounded value reaches the metric tag so the +/// time series cannot fan out on operation-specific text. +/// +public enum AlarmProviderSwitchReason +{ + /// The switch direction could not be classified. + Unknown = 0, + + /// Switched from the primary (alarmmgr) provider to the subtag standby — degraded. + Failover = 1, + + /// Switched back from the subtag standby to the primary (alarmmgr) provider — recovered. + Failback = 2, +} diff --git a/src/ZB.MOM.WW.MxGateway.Server/Metrics/GatewayMetrics.cs b/src/ZB.MOM.WW.MxGateway.Server/Metrics/GatewayMetrics.cs index 959ef41..15fad05 100644 --- a/src/ZB.MOM.WW.MxGateway.Server/Metrics/GatewayMetrics.cs +++ b/src/ZB.MOM.WW.MxGateway.Server/Metrics/GatewayMetrics.cs @@ -50,6 +50,7 @@ public sealed class GatewayMetrics : IDisposable private long _heartbeatFailures; private long _streamDisconnects; private long _retryAttempts; + private long _alarmProviderSwitches; private bool _disposed; /// @@ -383,25 +384,34 @@ public sealed class GatewayMetrics : IDisposable } /// - /// Records that the alarm provider switched modes and updates the current provider mode gauge. + /// Records that the alarm provider switched modes, increments the switch count, and updates the + /// current provider mode gauge. /// /// Provider mode before the switch (1=alarmmgr, 2=subtag, 0=unknown). /// Provider mode after the switch (1=alarmmgr, 2=subtag, 0=unknown). - /// Human-readable reason for the switch. - public void AlarmProviderSwitched(int fromMode, int toMode, string reason) + /// Bounded switch classification used as the counter's reason tag. + public void AlarmProviderSwitched(int fromMode, int toMode, AlarmProviderSwitchReason reason) { lock (_syncRoot) { _alarmProviderMode = toMode; + _alarmProviderSwitches++; } _alarmProviderSwitchesCounter.Add( 1, new KeyValuePair("from", fromMode.ToString(CultureInfo.InvariantCulture)), new KeyValuePair("to", toMode.ToString(CultureInfo.InvariantCulture)), - new KeyValuePair("reason", reason)); + new KeyValuePair("reason", ReasonTag(reason))); } + private static string ReasonTag(AlarmProviderSwitchReason reason) => reason switch + { + AlarmProviderSwitchReason.Failover => "failover", + AlarmProviderSwitchReason.Failback => "failback", + _ => "unknown", + }; + /// Sets the current alarm provider-mode gauge without recording a switch (e.g. startup baseline). public void SetAlarmProviderMode(int mode) { @@ -433,6 +443,7 @@ public sealed class GatewayMetrics : IDisposable HeartbeatFailures: _heartbeatFailures, StreamDisconnects: _streamDisconnects, RetryAttempts: _retryAttempts, + AlarmProviderSwitchCount: _alarmProviderSwitches, CommandFailuresByMethod: new Dictionary(_commandFailuresByMethod, StringComparer.OrdinalIgnoreCase), EventsByFamily: new Dictionary(_eventsByFamily, StringComparer.OrdinalIgnoreCase), EventsBySession: new Dictionary(_eventsBySession, StringComparer.Ordinal), diff --git a/src/ZB.MOM.WW.MxGateway.Server/Metrics/GatewayMetricsSnapshot.cs b/src/ZB.MOM.WW.MxGateway.Server/Metrics/GatewayMetricsSnapshot.cs index c96f570..996d44a 100644 --- a/src/ZB.MOM.WW.MxGateway.Server/Metrics/GatewayMetricsSnapshot.cs +++ b/src/ZB.MOM.WW.MxGateway.Server/Metrics/GatewayMetricsSnapshot.cs @@ -18,6 +18,7 @@ public sealed record GatewayMetricsSnapshot( long HeartbeatFailures, long StreamDisconnects, long RetryAttempts, + long AlarmProviderSwitchCount, IReadOnlyDictionary CommandFailuresByMethod, IReadOnlyDictionary EventsByFamily, IReadOnlyDictionary EventsBySession, diff --git a/src/ZB.MOM.WW.MxGateway.Tests/Metrics/GatewayMetricsTests.cs b/src/ZB.MOM.WW.MxGateway.Tests/Metrics/GatewayMetricsTests.cs index 2ac7ff1..3fa974c 100644 --- a/src/ZB.MOM.WW.MxGateway.Tests/Metrics/GatewayMetricsTests.cs +++ b/src/ZB.MOM.WW.MxGateway.Tests/Metrics/GatewayMetricsTests.cs @@ -111,12 +111,13 @@ public sealed class GatewayMetricsTests }); listener.Start(); - metrics.AlarmProviderSwitched(1, 2, "test"); + metrics.AlarmProviderSwitched(1, 2, AlarmProviderSwitchReason.Failover); Assert.Equal(1, capturedValue); Assert.Equal("1", capturedFrom); Assert.Equal("2", capturedTo); - Assert.Equal("test", capturedReason); + Assert.Equal("failover", capturedReason); + Assert.Equal(1, metrics.GetSnapshot().AlarmProviderSwitchCount); } /// @@ -150,7 +151,7 @@ public sealed class GatewayMetricsTests }); listener.Start(); - metrics.AlarmProviderSwitched(1, 2, "test"); + metrics.AlarmProviderSwitched(1, 2, AlarmProviderSwitchReason.Failover); listener.RecordObservableInstruments(); Assert.Equal(2, capturedMode); From 5573f2a2291d3ce4a54b778498c584e98471b640 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Sun, 14 Jun 2026 02:33:14 -0400 Subject: [PATCH 2/6] galaxy(alarms): drop dead primitive branch from AlarmAttributesSql B5: the candidate CTE's src_pri=1 (primitive-instance) UNION ALL branch was always excluded by the final WHERE r.src_pri=0, so it added work with no output change. Remove the branch and the now-constant src_pri column/filter. An alarm anchor is always a user attribute, so output is identical. --- .../Galaxy/GalaxyRepository.cs | 32 ++++++------------- 1 file changed, 9 insertions(+), 23 deletions(-) diff --git a/src/ZB.MOM.WW.MxGateway.Server/Galaxy/GalaxyRepository.cs b/src/ZB.MOM.WW.MxGateway.Server/Galaxy/GalaxyRepository.cs index a934466..d8922fd 100644 --- a/src/ZB.MOM.WW.MxGateway.Server/Galaxy/GalaxyRepository.cs +++ b/src/ZB.MOM.WW.MxGateway.Server/Galaxy/GalaxyRepository.cs @@ -308,11 +308,13 @@ LEFT JOIN data_type dt ON dt.mx_data_type = r.mx_data_type WHERE r.rn = 1 ORDER BY r.tag_name, r.attribute_name"; - // Alarm-only discovery for the subtag-fallback watch-list. This deliberately reuses the - // exact candidate/ranked CTE structure and the same `AlarmExtension`-based is_alarm - // detection as AttributesSql so the two queries cannot drift: a row qualifies only when - // its user attribute (src_pri 0) anchors an `AlarmExtension` primitive on the owning - // object. It projects just what the watch-list needs — full_tag_reference (tag_name + + // Alarm-only discovery for the subtag-fallback watch-list. This reuses the candidate/ranked + // CTE shape and the same `AlarmExtension`-based detection as AttributesSql. Unlike + // AttributesSql it keeps only the user-attribute (dynamic_attribute) candidate branch: an + // alarm anchor is always a user attribute, so the primitive-instance branch AttributesSql + // carries would be filtered out here anyway — a row qualifies only when its user attribute + // anchors an `AlarmExtension` primitive on the owning object. It projects just what the + // watch-list needs — full_tag_reference (tag_name + // '.' + attribute_name, matching AttributesSql) and the owning object's tag_name as // source_object_reference. The array `[]` suffix is intentionally omitted: an // alarm-bearing attribute is a scalar anchor, not an array body. It also projects the @@ -332,7 +334,7 @@ ORDER BY r.tag_name, r.attribute_name"; ), candidate AS ( SELECT - dpc.gobject_id, g.tag_name, da.attribute_name, dpc.depth, 0 AS src_pri + dpc.gobject_id, g.tag_name, da.attribute_name, dpc.depth FROM deployed_package_chain dpc INNER JOIN dynamic_attribute da ON da.package_id = dpc.package_id INNER JOIN gobject g ON g.gobject_id = dpc.gobject_id @@ -341,25 +343,10 @@ candidate AS ( AND da.attribute_name NOT LIKE '[_]%' AND da.attribute_name NOT LIKE '%.Description' AND da.mx_attribute_category IN (2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 24) - UNION ALL - SELECT - dpc.gobject_id, g.tag_name, - CASE WHEN pi.primitive_name IS NULL OR pi.primitive_name = '' - THEN ad.attribute_name - ELSE pi.primitive_name + '.' + ad.attribute_name END AS attribute_name, - dpc.depth, 1 AS src_pri - FROM deployed_package_chain dpc - INNER JOIN primitive_instance pi ON pi.package_id = dpc.package_id - INNER JOIN attribute_definition ad ON ad.primitive_definition_id = pi.primitive_definition_id - INNER JOIN gobject g ON g.gobject_id = dpc.gobject_id - INNER JOIN template_definition td ON td.template_definition_id = g.template_definition_id - WHERE td.category_id IN (1, 3, 4, 10, 11, 13, 17, 24, 26) - AND ad.attribute_name NOT LIKE '[_]%' - AND ad.attribute_name NOT LIKE '%.Description' ), ranked AS ( SELECT c.*, ROW_NUMBER() OVER ( - PARTITION BY c.gobject_id, c.attribute_name ORDER BY c.src_pri, c.depth) AS rn + PARTITION BY c.gobject_id, c.attribute_name ORDER BY c.depth) AS rn FROM candidate c ) SELECT @@ -370,7 +357,6 @@ FROM ranked r INNER JOIN gobject g ON g.gobject_id = r.gobject_id LEFT JOIN gobject area ON area.gobject_id = g.area_gobject_id WHERE r.rn = 1 - AND r.src_pri = 0 AND EXISTS ( SELECT 1 FROM deployed_package_chain dpc2 INNER JOIN primitive_instance pi ON pi.package_id = dpc2.package_id AND pi.primitive_name = r.attribute_name From 37aadf72b38d8bd6b71f37ffb86b4cc4e73ab763 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Sun, 14 Jun 2026 02:33:14 -0400 Subject: [PATCH 3/6] docs(alarms): clarify resolver cancellation contract; mark design doc superseded MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit C6b: IAlarmWatchListResolver.ResolveAsync doc now notes that while discovery being unavailable never throws, a triggered cancellation token still propagates. C7: annotate the original design doc where it drifted from the shipped code — metric names / unimplemented watch-list gauges, and the proto-type location (gateway proto, not worker proto). --- .../2026-06-13-alarm-subtag-fallback-design.md | 16 +++++++++++++++- .../Alarms/IAlarmWatchListResolver.cs | 6 ++++-- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/docs/plans/2026-06-13-alarm-subtag-fallback-design.md b/docs/plans/2026-06-13-alarm-subtag-fallback-design.md index a1382b1..6981af1 100644 --- a/docs/plans/2026-06-13-alarm-subtag-fallback-design.md +++ b/docs/plans/2026-06-13-alarm-subtag-fallback-design.md @@ -1,7 +1,10 @@ # Alarm Subtag-Monitoring Fallback — Design **Date:** 2026-06-13 -**Status:** Approved (brainstorming), ready for implementation planning +**Status:** Superseded by implementation (merged to `main`). This is the original +brainstorming design; a few details below were refined during implementation — +see the inline **Superseded** notes. The shipped behaviour is documented in +`docs/AlarmClientDiscovery.md`, the client READMEs, and the contracts. **Branch:** `feat/alarm-subtag-fallback` ## Problem @@ -162,6 +165,11 @@ reconcile cadence and pushes an updated watch-list when the model changes. **`mxaccess_worker.proto`:** +> **Superseded:** these additions shipped in `mxaccess_gateway.proto`, not +> `mxaccess_worker.proto` — the worker imports the gateway proto and the alarm +> commands/events live there (`AlarmSubtagTarget`, +> `OnAlarmProviderModeChangedEvent`, the extended subscribe command). + - Extend the alarm-subscribe command with: `AlarmProviderMode forced_mode` (`UNSPECIFIED` = auto), `int32 consecutive_failure_threshold`, `int32 failback_probe_interval_seconds`, `int32 failback_stable_probes`, and @@ -240,6 +248,12 @@ to `/hubs/alarms`, (c) update metrics, (d) force a reconcile. - `mxgateway_alarm_provider_switch_total{from,to,reason}` (counter) - `mxgateway_alarm_fallback_watchlist_size` (gauge) +> **Superseded:** the shipped meter names are `mxgateway.alarms.provider_mode` +> (gauge) and `mxgateway.alarms.provider_switches{from,to,reason}` (counter, +> `reason` bounded to `failover`/`failback`/`unknown`). The watch-list-size / +> watch-list-empty gauges were not implemented; an empty watch-list is surfaced +> via a warning log and the feed's degraded `ProviderStatus` instead. + ## Configuration ```jsonc diff --git a/src/ZB.MOM.WW.MxGateway.Server/Alarms/IAlarmWatchListResolver.cs b/src/ZB.MOM.WW.MxGateway.Server/Alarms/IAlarmWatchListResolver.cs index 7581d5c..196b2b8 100644 --- a/src/ZB.MOM.WW.MxGateway.Server/Alarms/IAlarmWatchListResolver.cs +++ b/src/ZB.MOM.WW.MxGateway.Server/Alarms/IAlarmWatchListResolver.cs @@ -19,8 +19,10 @@ public interface IAlarmWatchListResolver /// Token to cancel the asynchronous operation. /// /// The resolved watch-list, possibly empty. - /// Discovery being unavailable never throws; the caller decides what to do - /// with an empty list. + /// Discovery being unavailable never throws — it yields an empty (or + /// config-only) list and the caller decides what to do with it. Cancellation + /// is the one exception: a triggered + /// still propagates an . /// Task> ResolveAsync( AlarmsOptions options, From a3752799de9c47526b43979ae3510c4712ff1cee Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Sun, 14 Jun 2026 02:35:59 -0400 Subject: [PATCH 4/6] worker(alarms): remove dead FailoverAlarmConsumer.subscriptionExpression MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit B4: the field was stored in Subscribe but never read — the primary is never re-subscribed during probing. Drop it and keep the rationale as a comment. --- .../MxAccess/FailoverAlarmConsumer.cs | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/src/ZB.MOM.WW.MxGateway.Worker/MxAccess/FailoverAlarmConsumer.cs b/src/ZB.MOM.WW.MxGateway.Worker/MxAccess/FailoverAlarmConsumer.cs index b40c614..079c8b9 100644 --- a/src/ZB.MOM.WW.MxGateway.Worker/MxAccess/FailoverAlarmConsumer.cs +++ b/src/ZB.MOM.WW.MxGateway.Worker/MxAccess/FailoverAlarmConsumer.cs @@ -65,13 +65,6 @@ public sealed class FailoverAlarmConsumer : IMxAccessAlarmConsumer private bool disposed; private DateTime lastProbeAtUtc = DateTime.MinValue; - /// - /// The subscription expression passed to . - /// Stored for documentation and potential future full re-subscribe - /// scenarios; the primary is NOT re-subscribed during probing. - /// - private string subscriptionExpression = string.Empty; - /// /// Composes the failover consumer over its two children. /// @@ -119,9 +112,9 @@ public sealed class FailoverAlarmConsumer : IMxAccessAlarmConsumer { if (disposed) throw new ObjectDisposedException(nameof(FailoverAlarmConsumer)); - // Store for documentation; the primary is not torn down on failover - // and is therefore not re-subscribed during ProbeOnce. - subscriptionExpression = subscription; + // The primary is not torn down on failover and is therefore never + // re-subscribed during ProbeOnce, so the subscription expression does + // not need to be retained here. // Arm the standby first so it is warm regardless of primary outcome. // A standby subscribe failure is a hard fault (the fallback itself is From 986dcee14a41627170da537fc0de284ee85f6ae8 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Sun, 14 Jun 2026 02:35:59 -0400 Subject: [PATCH 5/6] worker(alarms): UnAdvise only advised handles in LmxSubtagAlarmSource teardown MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit B3: track advised handles separately from added handles so Dispose only UnAdvises items that were actually advised — a write-only subtag (e.g. ack-comment added by Write, never advised) is removed but not unadvised. Add Dispose tests covering the advised/write-only split, full removal, single Unregister, and double-dispose idempotency. --- .../MxAccess/LmxSubtagAlarmSourceTests.cs | 73 ++++++++++++++++--- .../MxAccess/LmxSubtagAlarmSource.cs | 16 +++- 2 files changed, 79 insertions(+), 10 deletions(-) diff --git a/src/ZB.MOM.WW.MxGateway.Worker.Tests/MxAccess/LmxSubtagAlarmSourceTests.cs b/src/ZB.MOM.WW.MxGateway.Worker.Tests/MxAccess/LmxSubtagAlarmSourceTests.cs index bbfb05d..08995f9 100644 --- a/src/ZB.MOM.WW.MxGateway.Worker.Tests/MxAccess/LmxSubtagAlarmSourceTests.cs +++ b/src/ZB.MOM.WW.MxGateway.Worker.Tests/MxAccess/LmxSubtagAlarmSourceTests.cs @@ -1,5 +1,6 @@ using System; using System.Collections.Generic; +using System.Linq; using ZB.MOM.WW.MxGateway.Worker.MxAccess; namespace ZB.MOM.WW.MxGateway.Worker.Tests.MxAccess; @@ -133,6 +134,60 @@ public sealed class LmxSubtagAlarmSourceTests Assert.Single(server.Writes); } + /// + /// Verifies UnAdvises only the + /// handles that were actually advised — a write-only item (added by + /// but never advised) is removed + /// but not unadvised — and unregisters the server exactly once. + /// + [Fact] + public void Dispose_UnAdvisesOnlyAdvisedHandles_RemovesAll_AndUnregistersOnce() + { + var server = new RecordingMxAccessServer(); + var source = new LmxSubtagAlarmSource(server, FakeServerHandle); + + source.Advise(new[] { "Tank1.Alarm.Subtag", "Tank2.Alarm.Subtag" }); + // A write-only subtag: added by Write, never advised. + source.Write("Tank1.Alarm.AckComment", "acknowledged"); + + int advised1 = server.LastItemHandleFor("Tank1.Alarm.Subtag"); + int advised2 = server.LastItemHandleFor("Tank2.Alarm.Subtag"); + int writeOnly = server.LastItemHandleFor("Tank1.Alarm.AckComment"); + + source.Dispose(); + + // Only the two advised handles are unadvised — never the write-only one. + Assert.Equal(new[] { advised1, advised2 }, server.UnAdvisedItemHandles); + Assert.DoesNotContain(writeOnly, server.UnAdvisedItemHandles); + // Every added item (advised + write-only) is removed. + Assert.Equal( + new[] { advised1, advised2, writeOnly }.OrderBy(h => h), + server.RemovedItemHandles.OrderBy(h => h)); + Assert.Equal(1, server.UnregisterCount); + } + + /// + /// Verifies is idempotent: a + /// second call performs no further teardown. + /// + [Fact] + public void Dispose_IsIdempotent() + { + var server = new RecordingMxAccessServer(); + var source = new LmxSubtagAlarmSource(server, FakeServerHandle); + + source.Advise(new[] { "Tank1.Alarm.Subtag" }); + + source.Dispose(); + int unadviseAfterFirst = server.UnAdvisedItemHandles.Count; + int unregisterAfterFirst = server.UnregisterCount; + + source.Dispose(); + + Assert.Equal(unadviseAfterFirst, server.UnAdvisedItemHandles.Count); + Assert.Equal(unregisterAfterFirst, server.UnregisterCount); + } + /// /// Recording test double that captures the /// AddItem/Advise/Write/UnAdvise/RemoveItem/Unregister calls @@ -152,13 +207,17 @@ public sealed class LmxSubtagAlarmSourceTests public List Writes { get; } = new(); + public List UnAdvisedItemHandles { get; } = new(); + + public List RemovedItemHandles { get; } = new(); + + public int UnregisterCount { get; private set; } + public int LastItemHandleFor(string itemAddress) => handlesByAddress[itemAddress]; public int Register(string clientName) => FakeServerHandle; - public void Unregister(int serverHandle) - { - } + public void Unregister(int serverHandle) => UnregisterCount++; public int AddItem(int serverHandle, string itemDefinition) { @@ -171,9 +230,7 @@ public sealed class LmxSubtagAlarmSourceTests public int AddItem2(int serverHandle, string itemDefinition, string itemContext) => AddItem(serverHandle, itemDefinition); - public void RemoveItem(int serverHandle, int itemHandle) - { - } + public void RemoveItem(int serverHandle, int itemHandle) => RemovedItemHandles.Add(itemHandle); public void Advise(int serverHandle, int itemHandle) { @@ -181,9 +238,7 @@ public sealed class LmxSubtagAlarmSourceTests AdvisedServerHandles.Add(serverHandle); } - public void UnAdvise(int serverHandle, int itemHandle) - { - } + public void UnAdvise(int serverHandle, int itemHandle) => UnAdvisedItemHandles.Add(itemHandle); public void AdviseSupervisory(int serverHandle, int itemHandle) { diff --git a/src/ZB.MOM.WW.MxGateway.Worker/MxAccess/LmxSubtagAlarmSource.cs b/src/ZB.MOM.WW.MxGateway.Worker/MxAccess/LmxSubtagAlarmSource.cs index c16e2e6..e334a9a 100644 --- a/src/ZB.MOM.WW.MxGateway.Worker/MxAccess/LmxSubtagAlarmSource.cs +++ b/src/ZB.MOM.WW.MxGateway.Worker/MxAccess/LmxSubtagAlarmSource.cs @@ -46,6 +46,12 @@ public sealed class LmxSubtagAlarmSource : ISubtagAlarmSource private readonly Dictionary addressesByItemHandle = new Dictionary(); + // Handles that were actually Advise()d, tracked separately from the added + // set so Dispose only UnAdvises advised items. Write() can AddItem a + // write-only subtag (e.g. an ack-comment that was never advised); calling + // UnAdvise on such a handle would be an unbalanced teardown. + private readonly HashSet advisedItemHandles = new HashSet(); + private object? mxAccessComObject; private IMxAccessServer? server; private LMXProxyServerClass? comEventSource; @@ -134,6 +140,7 @@ public sealed class LmxSubtagAlarmSource : ISubtagAlarmSource mxServer.Advise(serverHandle, itemHandle); itemHandlesByAddress[itemAddress!] = itemHandle; addressesByItemHandle[itemHandle] = itemAddress!; + advisedItemHandles.Add(itemHandle); } } @@ -225,7 +232,13 @@ public sealed class LmxSubtagAlarmSource : ISubtagAlarmSource { foreach (KeyValuePair entry in addressesByItemHandle) { - try { mxServer.UnAdvise(serverHandle, entry.Key); } catch { /* swallow — best effort */ } + // Only UnAdvise handles that were actually advised; a write-only + // item (added by Write but never Advise'd) was never advised. + if (advisedItemHandles.Contains(entry.Key)) + { + try { mxServer.UnAdvise(serverHandle, entry.Key); } catch { /* swallow — best effort */ } + } + try { mxServer.RemoveItem(serverHandle, entry.Key); } catch { /* swallow — best effort */ } } @@ -234,6 +247,7 @@ public sealed class LmxSubtagAlarmSource : ISubtagAlarmSource itemHandlesByAddress.Clear(); addressesByItemHandle.Clear(); + advisedItemHandles.Clear(); object? comToRelease = mxAccessComObject; mxAccessComObject = null; From 393e326275fa0851333991bbcbd80c47f9f81d6d Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Sun, 14 Jun 2026 02:35:59 -0400 Subject: [PATCH 6/6] docs(alarms): note operator/IDE toggle drives the live subtag smoke test C6a: the rig's TestAlarm attributes are object-driven; a flip script OR a manual operator/IDE toggle drives them (confirmed live 2026-06-14). Update the how-to-run comments and Skip reason accordingly. --- .../Probes/AlarmSubtagLiveSmokeTests.cs | 23 +++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/src/ZB.MOM.WW.MxGateway.Worker.Tests/Probes/AlarmSubtagLiveSmokeTests.cs b/src/ZB.MOM.WW.MxGateway.Worker.Tests/Probes/AlarmSubtagLiveSmokeTests.cs index 41ff65a..ce9b3ac 100644 --- a/src/ZB.MOM.WW.MxGateway.Worker.Tests/Probes/AlarmSubtagLiveSmokeTests.cs +++ b/src/ZB.MOM.WW.MxGateway.Worker.Tests/Probes/AlarmSubtagLiveSmokeTests.cs @@ -16,9 +16,12 @@ // 1. On the dev rig with AVEVA System Platform installed and Galaxy running: // $env:MXGATEWAY_RUN_LIVE_MXACCESS_TESTS = "1" // 2. Remove (or set to null) the Skip parameter on the [Fact] below. -// 3. Run with an alarm flip script (same one used by AlarmsLiveSmokeTests) -// so that TestMachine_001.TestAlarm001 toggles its Active/Acked subtags -// on a ~10 s cadence. +// 3. Drive a TestMachine alarm so its Active/Acked subtags toggle — either an +// alarm flip script (same one used by AlarmsLiveSmokeTests, ~10 s cadence) +// or a manual operator/IDE toggle of the alarm attribute. The rig's +// TestAlarm attributes are object-driven, so an external MXAccess Write +// cannot toggle them (confirmed live 2026-06-14 by toggling TestAlarm002 +// from the IDE). // // net48/x86 constraints: // - No init-only properties, records, index/range operators, C# 8+ pattern @@ -49,8 +52,9 @@ namespace ZB.MOM.WW.MxGateway.Worker.Tests.Probes; /// writes the /// ack-comment subtag (AckMsg) successfully. /// -/// Skip-gated; flip Skip=null on the dev rig with the alarm flip -/// script running. The remaining live-validation item is confirming that +/// Skip-gated; flip Skip=null on the dev rig with an alarm being +/// driven (flip script or a manual operator/IDE toggle of the alarm +/// attribute). The remaining live-validation item is confirming that /// the runtime MXAccess item reference path requires no intermediate /// alarm-condition segment (i.e. <Object>.<AlarmAttr>.InAlarm /// resolves as-is). @@ -117,7 +121,7 @@ public sealed class AlarmSubtagLiveSmokeTests /// the Degraded flag and synthetic GUID are stamped, then /// AcknowledgeByName and verifies the ack-comment write returns 0. /// - [Fact(Skip = "Live dev-rig smoke test — flip Skip=null with AVEVA + an alarm flip script running. Subtag fallback path. Field names confirmed (InAlarm/Acked/AckMsg/Priority); live-validate runtime path resolves without intermediate alarm-condition segment.")] + [Fact(Skip = "Live dev-rig smoke test — flip Skip=null with AVEVA + an alarm being driven (flip script or manual operator/IDE toggle of the alarm attribute). Subtag fallback path. Field names confirmed (InAlarm/Acked/AckMsg/Priority); live-validate runtime path resolves without intermediate alarm-condition segment.")] public void SubtagFallback_FullPipelineRoundTrip_SynthesizesRaiseAndAcknowledges() { Exception? threadException = null; @@ -342,9 +346,10 @@ public sealed class AlarmSubtagLiveSmokeTests consumer.Subscribe(subscriptionExpression); Log("Subscribe returned OK."); - // 1. Wait for a Raise transition. The alarm flip script (same one - // used by AlarmsLiveSmokeTests) writes the active subtag on a - // ~10 s cadence. LmxSubtagAlarmSource delivers OnDataChange via + // 1. Wait for a Raise transition. Whatever is driving the alarm — a + // flip script (same one used by AlarmsLiveSmokeTests, ~10 s cadence) + // or a manual operator/IDE toggle — writes the active subtag. + // LmxSubtagAlarmSource delivers OnDataChange via // the Windows message pump on the STA, so we must pump messages // here while we wait — mirroring how AlarmsLiveSmokeTests drives // its WnWrapAlarmConsumer.PollOnce() from the STA in a tight loop.