Merge alarm-fallback cleanup: metrics snapshot/reason, SQL prune, teardown, doc drift
Implements the actionable deferred items from pending.md (B1-B5, C6-C7): - B1/B2 metrics: provider-switch count in snapshot + bounded reason enum - B5: drop dead primitive branch from AlarmAttributesSql - B3/B4 worker: UnAdvise only advised handles (+Dispose tests); remove dead field - C6/C7: doc clarifications and design-doc superseded notes Verified: gateway tests on macOS, net48/x86 worker suite (318 passed) on windev.
This commit is contained in:
@@ -1,7 +1,10 @@
|
||||
# Alarm Subtag-Monitoring Fallback — Design
|
||||
|
||||
**Date:** 2026-06-13
|
||||
**Status:** Approved (brainstorming), ready for implementation planning
|
||||
**Status:** Superseded by implementation (merged to `main`). This is the original
|
||||
brainstorming design; a few details below were refined during implementation —
|
||||
see the inline **Superseded** notes. The shipped behaviour is documented in
|
||||
`docs/AlarmClientDiscovery.md`, the client READMEs, and the contracts.
|
||||
**Branch:** `feat/alarm-subtag-fallback`
|
||||
|
||||
## Problem
|
||||
@@ -162,6 +165,11 @@ reconcile cadence and pushes an updated watch-list when the model changes.
|
||||
|
||||
**`mxaccess_worker.proto`:**
|
||||
|
||||
> **Superseded:** these additions shipped in `mxaccess_gateway.proto`, not
|
||||
> `mxaccess_worker.proto` — the worker imports the gateway proto and the alarm
|
||||
> commands/events live there (`AlarmSubtagTarget`,
|
||||
> `OnAlarmProviderModeChangedEvent`, the extended subscribe command).
|
||||
|
||||
- Extend the alarm-subscribe command with: `AlarmProviderMode forced_mode`
|
||||
(`UNSPECIFIED` = auto), `int32 consecutive_failure_threshold`,
|
||||
`int32 failback_probe_interval_seconds`, `int32 failback_stable_probes`, and
|
||||
@@ -240,6 +248,12 @@ to `/hubs/alarms`, (c) update metrics, (d) force a reconcile.
|
||||
- `mxgateway_alarm_provider_switch_total{from,to,reason}` (counter)
|
||||
- `mxgateway_alarm_fallback_watchlist_size` (gauge)
|
||||
|
||||
> **Superseded:** the shipped meter names are `mxgateway.alarms.provider_mode`
|
||||
> (gauge) and `mxgateway.alarms.provider_switches{from,to,reason}` (counter,
|
||||
> `reason` bounded to `failover`/`failback`/`unknown`). The watch-list-size /
|
||||
> watch-list-empty gauges were not implemented; an empty watch-list is surfaced
|
||||
> via a warning log and the feed's degraded `ProviderStatus` instead.
|
||||
|
||||
## Configuration
|
||||
|
||||
```jsonc
|
||||
|
||||
@@ -399,7 +399,13 @@ public sealed class GatewayAlarmMonitor : BackgroundService, IGatewayAlarmServic
|
||||
BroadcastToAll(new AlarmFeedMessage { ProviderStatus = status });
|
||||
}
|
||||
|
||||
_metrics.AlarmProviderSwitched(fromModeInt, ModeToInt(toMode), reason);
|
||||
AlarmProviderSwitchReason switchReason = toMode switch
|
||||
{
|
||||
AlarmProviderMode.Subtag => AlarmProviderSwitchReason.Failover,
|
||||
AlarmProviderMode.Alarmmgr => AlarmProviderSwitchReason.Failback,
|
||||
_ => AlarmProviderSwitchReason.Unknown,
|
||||
};
|
||||
_metrics.AlarmProviderSwitched(fromModeInt, ModeToInt(toMode), switchReason);
|
||||
|
||||
_logger.LogInformation(
|
||||
"Alarm provider mode changed to {Mode} (degraded={Degraded}): {Reason}",
|
||||
|
||||
@@ -19,8 +19,10 @@ public interface IAlarmWatchListResolver
|
||||
/// <param name="cancellationToken">Token to cancel the asynchronous operation.</param>
|
||||
/// <returns>
|
||||
/// The resolved <see cref="AlarmSubtagTarget"/> watch-list, possibly empty.
|
||||
/// Discovery being unavailable never throws; the caller decides what to do
|
||||
/// with an empty list.
|
||||
/// Discovery being unavailable never throws — it yields an empty (or
|
||||
/// config-only) list and the caller decides what to do with it. Cancellation
|
||||
/// is the one exception: a triggered <paramref name="cancellationToken"/>
|
||||
/// still propagates an <see cref="OperationCanceledException"/>.
|
||||
/// </returns>
|
||||
Task<IReadOnlyList<AlarmSubtagTarget>> ResolveAsync(
|
||||
AlarmsOptions options,
|
||||
|
||||
@@ -308,11 +308,13 @@ LEFT JOIN data_type dt ON dt.mx_data_type = r.mx_data_type
|
||||
WHERE r.rn = 1
|
||||
ORDER BY r.tag_name, r.attribute_name";
|
||||
|
||||
// Alarm-only discovery for the subtag-fallback watch-list. This deliberately reuses the
|
||||
// exact candidate/ranked CTE structure and the same `AlarmExtension`-based is_alarm
|
||||
// detection as AttributesSql so the two queries cannot drift: a row qualifies only when
|
||||
// its user attribute (src_pri 0) anchors an `AlarmExtension` primitive on the owning
|
||||
// object. It projects just what the watch-list needs — full_tag_reference (tag_name +
|
||||
// Alarm-only discovery for the subtag-fallback watch-list. This reuses the candidate/ranked
|
||||
// CTE shape and the same `AlarmExtension`-based detection as AttributesSql. Unlike
|
||||
// AttributesSql it keeps only the user-attribute (dynamic_attribute) candidate branch: an
|
||||
// alarm anchor is always a user attribute, so the primitive-instance branch AttributesSql
|
||||
// carries would be filtered out here anyway — a row qualifies only when its user attribute
|
||||
// anchors an `AlarmExtension` primitive on the owning object. It projects just what the
|
||||
// watch-list needs — full_tag_reference (tag_name +
|
||||
// '.' + attribute_name, matching AttributesSql) and the owning object's tag_name as
|
||||
// source_object_reference. The array `[]` suffix is intentionally omitted: an
|
||||
// alarm-bearing attribute is a scalar anchor, not an array body. It also projects the
|
||||
@@ -332,7 +334,7 @@ ORDER BY r.tag_name, r.attribute_name";
|
||||
),
|
||||
candidate AS (
|
||||
SELECT
|
||||
dpc.gobject_id, g.tag_name, da.attribute_name, dpc.depth, 0 AS src_pri
|
||||
dpc.gobject_id, g.tag_name, da.attribute_name, dpc.depth
|
||||
FROM deployed_package_chain dpc
|
||||
INNER JOIN dynamic_attribute da ON da.package_id = dpc.package_id
|
||||
INNER JOIN gobject g ON g.gobject_id = dpc.gobject_id
|
||||
@@ -341,25 +343,10 @@ candidate AS (
|
||||
AND da.attribute_name NOT LIKE '[_]%'
|
||||
AND da.attribute_name NOT LIKE '%.Description'
|
||||
AND da.mx_attribute_category IN (2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 24)
|
||||
UNION ALL
|
||||
SELECT
|
||||
dpc.gobject_id, g.tag_name,
|
||||
CASE WHEN pi.primitive_name IS NULL OR pi.primitive_name = ''
|
||||
THEN ad.attribute_name
|
||||
ELSE pi.primitive_name + '.' + ad.attribute_name END AS attribute_name,
|
||||
dpc.depth, 1 AS src_pri
|
||||
FROM deployed_package_chain dpc
|
||||
INNER JOIN primitive_instance pi ON pi.package_id = dpc.package_id
|
||||
INNER JOIN attribute_definition ad ON ad.primitive_definition_id = pi.primitive_definition_id
|
||||
INNER JOIN gobject g ON g.gobject_id = dpc.gobject_id
|
||||
INNER JOIN template_definition td ON td.template_definition_id = g.template_definition_id
|
||||
WHERE td.category_id IN (1, 3, 4, 10, 11, 13, 17, 24, 26)
|
||||
AND ad.attribute_name NOT LIKE '[_]%'
|
||||
AND ad.attribute_name NOT LIKE '%.Description'
|
||||
),
|
||||
ranked AS (
|
||||
SELECT c.*, ROW_NUMBER() OVER (
|
||||
PARTITION BY c.gobject_id, c.attribute_name ORDER BY c.src_pri, c.depth) AS rn
|
||||
PARTITION BY c.gobject_id, c.attribute_name ORDER BY c.depth) AS rn
|
||||
FROM candidate c
|
||||
)
|
||||
SELECT
|
||||
@@ -370,7 +357,6 @@ FROM ranked r
|
||||
INNER JOIN gobject g ON g.gobject_id = r.gobject_id
|
||||
LEFT JOIN gobject area ON area.gobject_id = g.area_gobject_id
|
||||
WHERE r.rn = 1
|
||||
AND r.src_pri = 0
|
||||
AND EXISTS (
|
||||
SELECT 1 FROM deployed_package_chain dpc2
|
||||
INNER JOIN primitive_instance pi ON pi.package_id = dpc2.package_id AND pi.primitive_name = r.attribute_name
|
||||
|
||||
@@ -0,0 +1,20 @@
|
||||
namespace ZB.MOM.WW.MxGateway.Server.Metrics;
|
||||
|
||||
/// <summary>
|
||||
/// Bounded classification of an alarm-provider switch, used as the low-cardinality
|
||||
/// <c>reason</c> tag on the <c>mxgateway.alarms.provider_switches</c> counter. The
|
||||
/// worker supplies a free-text reason (e.g. <c>"primary PollOnce failed"</c>) that
|
||||
/// stays in the structured log; only this bounded value reaches the metric tag so the
|
||||
/// time series cannot fan out on operation-specific text.
|
||||
/// </summary>
|
||||
public enum AlarmProviderSwitchReason
|
||||
{
|
||||
/// <summary>The switch direction could not be classified.</summary>
|
||||
Unknown = 0,
|
||||
|
||||
/// <summary>Switched from the primary (alarmmgr) provider to the subtag standby — degraded.</summary>
|
||||
Failover = 1,
|
||||
|
||||
/// <summary>Switched back from the subtag standby to the primary (alarmmgr) provider — recovered.</summary>
|
||||
Failback = 2,
|
||||
}
|
||||
@@ -50,6 +50,7 @@ public sealed class GatewayMetrics : IDisposable
|
||||
private long _heartbeatFailures;
|
||||
private long _streamDisconnects;
|
||||
private long _retryAttempts;
|
||||
private long _alarmProviderSwitches;
|
||||
private bool _disposed;
|
||||
|
||||
/// <summary>
|
||||
@@ -383,25 +384,34 @@ public sealed class GatewayMetrics : IDisposable
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Records that the alarm provider switched modes and updates the current provider mode gauge.
|
||||
/// Records that the alarm provider switched modes, increments the switch count, and updates the
|
||||
/// current provider mode gauge.
|
||||
/// </summary>
|
||||
/// <param name="fromMode">Provider mode before the switch (1=alarmmgr, 2=subtag, 0=unknown).</param>
|
||||
/// <param name="toMode">Provider mode after the switch (1=alarmmgr, 2=subtag, 0=unknown).</param>
|
||||
/// <param name="reason">Human-readable reason for the switch.</param>
|
||||
public void AlarmProviderSwitched(int fromMode, int toMode, string reason)
|
||||
/// <param name="reason">Bounded switch classification used as the counter's <c>reason</c> tag.</param>
|
||||
public void AlarmProviderSwitched(int fromMode, int toMode, AlarmProviderSwitchReason reason)
|
||||
{
|
||||
lock (_syncRoot)
|
||||
{
|
||||
_alarmProviderMode = toMode;
|
||||
_alarmProviderSwitches++;
|
||||
}
|
||||
|
||||
_alarmProviderSwitchesCounter.Add(
|
||||
1,
|
||||
new KeyValuePair<string, object?>("from", fromMode.ToString(CultureInfo.InvariantCulture)),
|
||||
new KeyValuePair<string, object?>("to", toMode.ToString(CultureInfo.InvariantCulture)),
|
||||
new KeyValuePair<string, object?>("reason", reason));
|
||||
new KeyValuePair<string, object?>("reason", ReasonTag(reason)));
|
||||
}
|
||||
|
||||
private static string ReasonTag(AlarmProviderSwitchReason reason) => reason switch
|
||||
{
|
||||
AlarmProviderSwitchReason.Failover => "failover",
|
||||
AlarmProviderSwitchReason.Failback => "failback",
|
||||
_ => "unknown",
|
||||
};
|
||||
|
||||
/// <summary>Sets the current alarm provider-mode gauge without recording a switch (e.g. startup baseline).</summary>
|
||||
public void SetAlarmProviderMode(int mode)
|
||||
{
|
||||
@@ -433,6 +443,7 @@ public sealed class GatewayMetrics : IDisposable
|
||||
HeartbeatFailures: _heartbeatFailures,
|
||||
StreamDisconnects: _streamDisconnects,
|
||||
RetryAttempts: _retryAttempts,
|
||||
AlarmProviderSwitchCount: _alarmProviderSwitches,
|
||||
CommandFailuresByMethod: new Dictionary<string, long>(_commandFailuresByMethod, StringComparer.OrdinalIgnoreCase),
|
||||
EventsByFamily: new Dictionary<string, long>(_eventsByFamily, StringComparer.OrdinalIgnoreCase),
|
||||
EventsBySession: new Dictionary<string, long>(_eventsBySession, StringComparer.Ordinal),
|
||||
|
||||
@@ -18,6 +18,7 @@ public sealed record GatewayMetricsSnapshot(
|
||||
long HeartbeatFailures,
|
||||
long StreamDisconnects,
|
||||
long RetryAttempts,
|
||||
long AlarmProviderSwitchCount,
|
||||
IReadOnlyDictionary<string, long> CommandFailuresByMethod,
|
||||
IReadOnlyDictionary<string, long> EventsByFamily,
|
||||
IReadOnlyDictionary<string, long> EventsBySession,
|
||||
|
||||
@@ -111,12 +111,13 @@ public sealed class GatewayMetricsTests
|
||||
});
|
||||
listener.Start();
|
||||
|
||||
metrics.AlarmProviderSwitched(1, 2, "test");
|
||||
metrics.AlarmProviderSwitched(1, 2, AlarmProviderSwitchReason.Failover);
|
||||
|
||||
Assert.Equal(1, capturedValue);
|
||||
Assert.Equal("1", capturedFrom);
|
||||
Assert.Equal("2", capturedTo);
|
||||
Assert.Equal("test", capturedReason);
|
||||
Assert.Equal("failover", capturedReason);
|
||||
Assert.Equal(1, metrics.GetSnapshot().AlarmProviderSwitchCount);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@@ -150,7 +151,7 @@ public sealed class GatewayMetricsTests
|
||||
});
|
||||
listener.Start();
|
||||
|
||||
metrics.AlarmProviderSwitched(1, 2, "test");
|
||||
metrics.AlarmProviderSwitched(1, 2, AlarmProviderSwitchReason.Failover);
|
||||
listener.RecordObservableInstruments();
|
||||
|
||||
Assert.Equal(2, capturedMode);
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using ZB.MOM.WW.MxGateway.Worker.MxAccess;
|
||||
|
||||
namespace ZB.MOM.WW.MxGateway.Worker.Tests.MxAccess;
|
||||
@@ -133,6 +134,60 @@ public sealed class LmxSubtagAlarmSourceTests
|
||||
Assert.Single(server.Writes);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verifies <see cref="LmxSubtagAlarmSource.Dispose"/> UnAdvises only the
|
||||
/// handles that were actually advised — a write-only item (added by
|
||||
/// <see cref="LmxSubtagAlarmSource.Write"/> but never advised) is removed
|
||||
/// but not unadvised — and unregisters the server exactly once.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public void Dispose_UnAdvisesOnlyAdvisedHandles_RemovesAll_AndUnregistersOnce()
|
||||
{
|
||||
var server = new RecordingMxAccessServer();
|
||||
var source = new LmxSubtagAlarmSource(server, FakeServerHandle);
|
||||
|
||||
source.Advise(new[] { "Tank1.Alarm.Subtag", "Tank2.Alarm.Subtag" });
|
||||
// A write-only subtag: added by Write, never advised.
|
||||
source.Write("Tank1.Alarm.AckComment", "acknowledged");
|
||||
|
||||
int advised1 = server.LastItemHandleFor("Tank1.Alarm.Subtag");
|
||||
int advised2 = server.LastItemHandleFor("Tank2.Alarm.Subtag");
|
||||
int writeOnly = server.LastItemHandleFor("Tank1.Alarm.AckComment");
|
||||
|
||||
source.Dispose();
|
||||
|
||||
// Only the two advised handles are unadvised — never the write-only one.
|
||||
Assert.Equal(new[] { advised1, advised2 }, server.UnAdvisedItemHandles);
|
||||
Assert.DoesNotContain(writeOnly, server.UnAdvisedItemHandles);
|
||||
// Every added item (advised + write-only) is removed.
|
||||
Assert.Equal(
|
||||
new[] { advised1, advised2, writeOnly }.OrderBy(h => h),
|
||||
server.RemovedItemHandles.OrderBy(h => h));
|
||||
Assert.Equal(1, server.UnregisterCount);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verifies <see cref="LmxSubtagAlarmSource.Dispose"/> is idempotent: a
|
||||
/// second call performs no further teardown.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public void Dispose_IsIdempotent()
|
||||
{
|
||||
var server = new RecordingMxAccessServer();
|
||||
var source = new LmxSubtagAlarmSource(server, FakeServerHandle);
|
||||
|
||||
source.Advise(new[] { "Tank1.Alarm.Subtag" });
|
||||
|
||||
source.Dispose();
|
||||
int unadviseAfterFirst = server.UnAdvisedItemHandles.Count;
|
||||
int unregisterAfterFirst = server.UnregisterCount;
|
||||
|
||||
source.Dispose();
|
||||
|
||||
Assert.Equal(unadviseAfterFirst, server.UnAdvisedItemHandles.Count);
|
||||
Assert.Equal(unregisterAfterFirst, server.UnregisterCount);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Recording <see cref="IMxAccessServer"/> test double that captures the
|
||||
/// AddItem/Advise/Write/UnAdvise/RemoveItem/Unregister calls
|
||||
@@ -152,13 +207,17 @@ public sealed class LmxSubtagAlarmSourceTests
|
||||
|
||||
public List<WriteRecord> Writes { get; } = new();
|
||||
|
||||
public List<int> UnAdvisedItemHandles { get; } = new();
|
||||
|
||||
public List<int> RemovedItemHandles { get; } = new();
|
||||
|
||||
public int UnregisterCount { get; private set; }
|
||||
|
||||
public int LastItemHandleFor(string itemAddress) => handlesByAddress[itemAddress];
|
||||
|
||||
public int Register(string clientName) => FakeServerHandle;
|
||||
|
||||
public void Unregister(int serverHandle)
|
||||
{
|
||||
}
|
||||
public void Unregister(int serverHandle) => UnregisterCount++;
|
||||
|
||||
public int AddItem(int serverHandle, string itemDefinition)
|
||||
{
|
||||
@@ -171,9 +230,7 @@ public sealed class LmxSubtagAlarmSourceTests
|
||||
public int AddItem2(int serverHandle, string itemDefinition, string itemContext)
|
||||
=> AddItem(serverHandle, itemDefinition);
|
||||
|
||||
public void RemoveItem(int serverHandle, int itemHandle)
|
||||
{
|
||||
}
|
||||
public void RemoveItem(int serverHandle, int itemHandle) => RemovedItemHandles.Add(itemHandle);
|
||||
|
||||
public void Advise(int serverHandle, int itemHandle)
|
||||
{
|
||||
@@ -181,9 +238,7 @@ public sealed class LmxSubtagAlarmSourceTests
|
||||
AdvisedServerHandles.Add(serverHandle);
|
||||
}
|
||||
|
||||
public void UnAdvise(int serverHandle, int itemHandle)
|
||||
{
|
||||
}
|
||||
public void UnAdvise(int serverHandle, int itemHandle) => UnAdvisedItemHandles.Add(itemHandle);
|
||||
|
||||
public void AdviseSupervisory(int serverHandle, int itemHandle)
|
||||
{
|
||||
|
||||
@@ -16,9 +16,12 @@
|
||||
// 1. On the dev rig with AVEVA System Platform installed and Galaxy running:
|
||||
// $env:MXGATEWAY_RUN_LIVE_MXACCESS_TESTS = "1"
|
||||
// 2. Remove (or set to null) the Skip parameter on the [Fact] below.
|
||||
// 3. Run with an alarm flip script (same one used by AlarmsLiveSmokeTests)
|
||||
// so that TestMachine_001.TestAlarm001 toggles its Active/Acked subtags
|
||||
// on a ~10 s cadence.
|
||||
// 3. Drive a TestMachine alarm so its Active/Acked subtags toggle — either an
|
||||
// alarm flip script (same one used by AlarmsLiveSmokeTests, ~10 s cadence)
|
||||
// or a manual operator/IDE toggle of the alarm attribute. The rig's
|
||||
// TestAlarm attributes are object-driven, so an external MXAccess Write
|
||||
// cannot toggle them (confirmed live 2026-06-14 by toggling TestAlarm002
|
||||
// from the IDE).
|
||||
//
|
||||
// net48/x86 constraints:
|
||||
// - No init-only properties, records, index/range operators, C# 8+ pattern
|
||||
@@ -49,8 +52,9 @@ namespace ZB.MOM.WW.MxGateway.Worker.Tests.Probes;
|
||||
/// <see cref="SubtagAlarmConsumer.AcknowledgeByName"/> writes the
|
||||
/// ack-comment subtag (AckMsg) successfully.
|
||||
///
|
||||
/// Skip-gated; flip <c>Skip=null</c> on the dev rig with the alarm flip
|
||||
/// script running. The remaining live-validation item is confirming that
|
||||
/// Skip-gated; flip <c>Skip=null</c> on the dev rig with an alarm being
|
||||
/// driven (flip script or a manual operator/IDE toggle of the alarm
|
||||
/// attribute). The remaining live-validation item is confirming that
|
||||
/// the runtime MXAccess item reference path requires no intermediate
|
||||
/// alarm-condition segment (i.e. <c><Object>.<AlarmAttr>.InAlarm</c>
|
||||
/// resolves as-is).
|
||||
@@ -117,7 +121,7 @@ public sealed class AlarmSubtagLiveSmokeTests
|
||||
/// the Degraded flag and synthetic GUID are stamped, then
|
||||
/// AcknowledgeByName and verifies the ack-comment write returns 0.
|
||||
/// </summary>
|
||||
[Fact(Skip = "Live dev-rig smoke test — flip Skip=null with AVEVA + an alarm flip script running. Subtag fallback path. Field names confirmed (InAlarm/Acked/AckMsg/Priority); live-validate runtime path resolves without intermediate alarm-condition segment.")]
|
||||
[Fact(Skip = "Live dev-rig smoke test — flip Skip=null with AVEVA + an alarm being driven (flip script or manual operator/IDE toggle of the alarm attribute). Subtag fallback path. Field names confirmed (InAlarm/Acked/AckMsg/Priority); live-validate runtime path resolves without intermediate alarm-condition segment.")]
|
||||
public void SubtagFallback_FullPipelineRoundTrip_SynthesizesRaiseAndAcknowledges()
|
||||
{
|
||||
Exception? threadException = null;
|
||||
@@ -342,9 +346,10 @@ public sealed class AlarmSubtagLiveSmokeTests
|
||||
consumer.Subscribe(subscriptionExpression);
|
||||
Log("Subscribe returned OK.");
|
||||
|
||||
// 1. Wait for a Raise transition. The alarm flip script (same one
|
||||
// used by AlarmsLiveSmokeTests) writes the active subtag on a
|
||||
// ~10 s cadence. LmxSubtagAlarmSource delivers OnDataChange via
|
||||
// 1. Wait for a Raise transition. Whatever is driving the alarm — a
|
||||
// flip script (same one used by AlarmsLiveSmokeTests, ~10 s cadence)
|
||||
// or a manual operator/IDE toggle — writes the active subtag.
|
||||
// LmxSubtagAlarmSource delivers OnDataChange via
|
||||
// the Windows message pump on the STA, so we must pump messages
|
||||
// here while we wait — mirroring how AlarmsLiveSmokeTests drives
|
||||
// its WnWrapAlarmConsumer.PollOnce() from the STA in a tight loop.
|
||||
|
||||
@@ -65,13 +65,6 @@ public sealed class FailoverAlarmConsumer : IMxAccessAlarmConsumer
|
||||
private bool disposed;
|
||||
private DateTime lastProbeAtUtc = DateTime.MinValue;
|
||||
|
||||
/// <summary>
|
||||
/// The subscription expression passed to <see cref="Subscribe"/>.
|
||||
/// Stored for documentation and potential future full re-subscribe
|
||||
/// scenarios; the primary is NOT re-subscribed during probing.
|
||||
/// </summary>
|
||||
private string subscriptionExpression = string.Empty;
|
||||
|
||||
/// <summary>
|
||||
/// Composes the failover consumer over its two children.
|
||||
/// </summary>
|
||||
@@ -119,9 +112,9 @@ public sealed class FailoverAlarmConsumer : IMxAccessAlarmConsumer
|
||||
{
|
||||
if (disposed) throw new ObjectDisposedException(nameof(FailoverAlarmConsumer));
|
||||
|
||||
// Store for documentation; the primary is not torn down on failover
|
||||
// and is therefore not re-subscribed during ProbeOnce.
|
||||
subscriptionExpression = subscription;
|
||||
// The primary is not torn down on failover and is therefore never
|
||||
// re-subscribed during ProbeOnce, so the subscription expression does
|
||||
// not need to be retained here.
|
||||
|
||||
// Arm the standby first so it is warm regardless of primary outcome.
|
||||
// A standby subscribe failure is a hard fault (the fallback itself is
|
||||
|
||||
@@ -46,6 +46,12 @@ public sealed class LmxSubtagAlarmSource : ISubtagAlarmSource
|
||||
private readonly Dictionary<int, string> addressesByItemHandle =
|
||||
new Dictionary<int, string>();
|
||||
|
||||
// Handles that were actually Advise()d, tracked separately from the added
|
||||
// set so Dispose only UnAdvises advised items. Write() can AddItem a
|
||||
// write-only subtag (e.g. an ack-comment that was never advised); calling
|
||||
// UnAdvise on such a handle would be an unbalanced teardown.
|
||||
private readonly HashSet<int> advisedItemHandles = new HashSet<int>();
|
||||
|
||||
private object? mxAccessComObject;
|
||||
private IMxAccessServer? server;
|
||||
private LMXProxyServerClass? comEventSource;
|
||||
@@ -134,6 +140,7 @@ public sealed class LmxSubtagAlarmSource : ISubtagAlarmSource
|
||||
mxServer.Advise(serverHandle, itemHandle);
|
||||
itemHandlesByAddress[itemAddress!] = itemHandle;
|
||||
addressesByItemHandle[itemHandle] = itemAddress!;
|
||||
advisedItemHandles.Add(itemHandle);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -225,7 +232,13 @@ public sealed class LmxSubtagAlarmSource : ISubtagAlarmSource
|
||||
{
|
||||
foreach (KeyValuePair<int, string> entry in addressesByItemHandle)
|
||||
{
|
||||
try { mxServer.UnAdvise(serverHandle, entry.Key); } catch { /* swallow — best effort */ }
|
||||
// Only UnAdvise handles that were actually advised; a write-only
|
||||
// item (added by Write but never Advise'd) was never advised.
|
||||
if (advisedItemHandles.Contains(entry.Key))
|
||||
{
|
||||
try { mxServer.UnAdvise(serverHandle, entry.Key); } catch { /* swallow — best effort */ }
|
||||
}
|
||||
|
||||
try { mxServer.RemoveItem(serverHandle, entry.Key); } catch { /* swallow — best effort */ }
|
||||
}
|
||||
|
||||
@@ -234,6 +247,7 @@ public sealed class LmxSubtagAlarmSource : ISubtagAlarmSource
|
||||
|
||||
itemHandlesByAddress.Clear();
|
||||
addressesByItemHandle.Clear();
|
||||
advisedItemHandles.Clear();
|
||||
|
||||
object? comToRelease = mxAccessComObject;
|
||||
mxAccessComObject = null;
|
||||
|
||||
Reference in New Issue
Block a user