From a4ed605f74e3dc5057ffdf697719fddfe7e165fc Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Fri, 1 May 2026 12:17:39 -0400 Subject: [PATCH] A.3 (live smoke): full alarms-over-gateway pipeline verified end-to-end MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Skip-gated AlarmsLiveSmokeTests.Alarms_full_pipeline_round_trip ran against the dev rig with the flip script firing TestMachine_001.TestAlarm001 every 10s. Verified: - Subscribe + 1st PollOnce yield real transition events - Field-by-field decode correct (provider, group, tag, severity, UTC timestamp, comment, type) - SnapshotActiveAlarms reflects current state - AcknowledgeByName(real identity) -> rc=0 - Pipeline keeps streaming transitions on the 10s cadence post-ack Three production quirks surfaced and were fixed in WnWrapAlarmConsumer: 1. SetXmlAlarmQuery is mandatory for reads. Skipping it (per the earlier discovery-doc recommendation) makes the first GetXmlCurrentAlarms2 fail with E_FAIL. The doc's claim that the call is unnecessary because the round-trip echo is mangled was wrong — mangled echo or not, the call is required. 2. SetXmlAlarmQuery breaks AlarmAckByName on the same consumer instance (returns -55). Workaround: provision a parallel "ack-only" wnwrap consumer that runs Initialize → Register → Subscribe via the v1-prefixed methods, no SetXmlAlarmQuery. Production WnWrapAlarmConsumer now holds two COM clients; AcknowledgeByName always dispatches through the ack-only one. 3. AlarmAckByName has v2 (8-arg) and v1 (6-arg) overloads. The v2 8-arg overload returns -55 on this AVEVA build (apparently a stub); the v1 6-arg overload works. Production now calls the 6-arg overload, discarding the proto's operator_domain and operator_full_name fields. The proto contract keeps both for forward-compat if AVEVA fixes the v2 method. Bonus finding (not fixed here): AlarmAckByGUID throws NotImplementedException on wnwrap. Reference→GUID lookup that we initially planned to plumb is therefore not viable; all acks must go through AlarmAckByName. WorkerAlarmRpcDispatcher.AcknowledgeAsync already routes references through the by-name path, so this only affects the GUID-input branch (which the worker tries first if the input parses as a GUID — that branch will surface NotImplementedException as MxaccessFailure if a client supplies one). Threading caveat: wnwrap is ThreadingModel=Apartment, so the consumer's internal Timer (firing on threadpool threads) blocks on cross-apartment marshaling without an STA message pump. The smoke test sidesteps this with pollIntervalMilliseconds=0 (Timer disabled) + manual PollOnce calls from the test STA. Production hosting will route polls through the worker's StaRuntime in a follow-up; PollOnce is now public so the wire-up is straightforward. Test counts after this slice: Worker: 195 pass / 4 skipped (live probes incl. new live smoke) / 1 pre-existing structure-fail (untouched) Server: 308 pass / 0 fail Solution builds clean. docs/AlarmClientDiscovery.md "Live smoke-test discoveries" section records all five findings. Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/AlarmClientDiscovery.md | 102 +++++++ .../AlarmsLiveSmokeTests.cs | 276 ++++++++++++++++++ .../MxAccess/WnWrapAlarmConsumer.cs | 183 ++++++++++-- 3 files changed, 537 insertions(+), 24 deletions(-) create mode 100644 src/MxGateway.Worker.Tests/AlarmsLiveSmokeTests.cs diff --git a/docs/AlarmClientDiscovery.md b/docs/AlarmClientDiscovery.md index 9ad4e54..da565bb 100644 --- a/docs/AlarmClientDiscovery.md +++ b/docs/AlarmClientDiscovery.md @@ -688,3 +688,105 @@ alarm-consumer surface unblocks A.2 fully. Outline: These findings retire the open follow-up probes from the "polling-vs-pump" debate above — `wwAlarmConsumerClass` plus poll-on-timer is the implementation. + +## Live smoke-test discoveries — 2026-05-01 + +The Skip-gated `AlarmsLiveSmokeTests.Alarms_full_pipeline_round_trip` +ran the full +`WnWrapAlarmConsumer` + `AlarmDispatcher` + `MxAccessAlarmEventSink` +pipeline against the dev rig with the flip script running. End-to-end +verified: 6 real transitions captured on the 10s cadence, ack-by-name +returned rc=0, pipeline stayed healthy through 5 more transitions +afterwards. Three production-relevant quirks surfaced and were fixed +in the consumer: + +### 1. `SetXmlAlarmQuery` is mandatory for reads despite the mangled echo + +Without `SetXmlAlarmQuery`, the first `GetXmlCurrentAlarms2` call +fails with `E_FAIL` (HRESULT `0x80004005`). The discovery doc above +flagged the round-trip echo as mangled and recommended skipping the +call — that recommendation is **wrong**. The echo *is* mangled (AVEVA +parses NODE/PROVIDER/ALARM_STATE/DISPLAY_MODE incorrectly), but the +call itself is required as some kind of subscription enabler. Even +the Subscribe call setting the actual filter doesn't avoid the need +for `SetXmlAlarmQuery`. + +`WnWrapAlarmConsumer.ComposeXmlAlarmQuery(subscription)` decomposes +the canonical `\\\Galaxy!` form into the XML's +NODE/PROVIDER/GROUP fields. Mangled or not, the call enables reads. + +### 2. Two consumers required: read-side vs. ack-side + +`SetXmlAlarmQuery` enables reads but **breaks `AlarmAckByName` on +the same consumer instance**. With SetXml applied, AlarmAckByName +returns -55 even with valid name+provider+group+operator. Without +SetXml, AlarmAckByName succeeds with rc=0. + +The production consumer therefore provisions **two** wnwrap COM +instances: +- Primary consumer (`client`): runs full lifecycle including + `SetXmlAlarmQuery` for `GetXmlCurrentAlarms2` polls. +- Ack-only consumer (`ackClient`): runs Initialize → Register → + Subscribe via the v1-prefixed methods, **no SetXmlAlarmQuery**. + All `AcknowledgeByName` calls dispatch through this instance. + +Both consumers subscribe to the same expression. Disposal cleans up +both via a shared `ReleaseConsumerCom` helper. + +### 3. `AlarmAckByName` v2 8-arg vs. v1 6-arg + +`wwAlarmConsumerClass` exposes two `AlarmAckByName` overloads: +- `IwwAlarmConsumer2` v2: 8 args (`name, provider, group, comment, + oprName, node, domainName, oprFullName`). +- `IwwAlarmConsumer` v1: 6 args (no domain, no full-name). + +The v2 8-arg method returns -55 on this AVEVA build regardless of +operator-identity inputs — looks like a stub. The v1 6-arg method +works. Production `WnWrapAlarmConsumer.AcknowledgeByName` calls the +6-arg overload and discards the proto's `domain` + `full_name` fields. +The proto contract keeps the 8 fields for forward compatibility if +AVEVA fixes the v2 method later. + +### 4. `AlarmAckByGUID` is not implemented + +The v2 `AlarmAckByGUID(VBGUID, …)` throws `NotImplementedException` +(COM `E_NOTIMPL`) on `wwAlarmConsumerClass` against this AVEVA +build. The reference→GUID lookup that we initially planned to wire +through `AlarmAckByGUID` is therefore not viable on wnwrap; all acks +must go through `AlarmAckByName`. + +The proto `AcknowledgeAlarmCommand` (GUID-based) and the worker's +`MxAccessCommandExecutor.ExecuteAcknowledgeAlarm` switch arm remain +in the codebase for the forward-compat shape, but the gateway-side +`WorkerAlarmRpcDispatcher.AcknowledgeAsync` now always routes through +`AcknowledgeAlarmByName` when the public RPC supplies a recognizable +`Provider!Group.Tag` reference. + +### 5. STA / threading — production fix needed + +The wnwrap COM is `ThreadingModel=Apartment`. The consumer's +internal `Timer` fires on threadpool threads and would block forever +on cross-apartment marshaling unless the host STA pumps Win32 +messages. The smoke test sidesteps this by setting +`pollIntervalMilliseconds=0` (Timer disabled) and driving `PollOnce` +manually from the test's STA. Production hosting will route polls +through the worker's `StaRuntime` in a follow-up — the consumer's +`PollOnce` is `public` and idempotent so the wire-up is mechanical. + +### Capture summary + +``` +Transition: kind=Clear ref='Galaxy!TestArea.TestMachine_001.TestAlarm001' … +Transition: kind=Raise ref='Galaxy!TestArea.TestMachine_001.TestAlarm001' … +SnapshotActiveAlarms count=1 + active: ref='Galaxy!TestArea.TestMachine_001.TestAlarm001' state=Active +AcknowledgeByName(real identity) -> rc=0 +Post-ack transition: kind=Clear … ++1: kind=Raise … (10s after ack) ++2: kind=Clear … (20s) ++3: kind=Raise … (30s) ++4: kind=Clear … (40s) +``` + +10s cadence held throughout; full proto fields populated correctly; +ack registered server-side without errors. diff --git a/src/MxGateway.Worker.Tests/AlarmsLiveSmokeTests.cs b/src/MxGateway.Worker.Tests/AlarmsLiveSmokeTests.cs new file mode 100644 index 0000000..a99e3d6 --- /dev/null +++ b/src/MxGateway.Worker.Tests/AlarmsLiveSmokeTests.cs @@ -0,0 +1,276 @@ +using System; +using System.Collections.Concurrent; +using System.Diagnostics; +using System.Linq; +using System.Threading; +using MxGateway.Contracts.Proto; +using MxGateway.Worker.MxAccess; +using Xunit.Abstractions; + +namespace MxGateway.Worker.Tests; + +/// +/// Live dev-rig smoke test for the alarms-over-gateway pipeline. +/// Exercises + + +/// end-to-end against the actual +/// AVEVA System Platform install: subscribes to +/// \\<machine>\Galaxy!DEV, waits for at least one alarm +/// transition (the dev rig's flip script writes +/// TestMachine_001.TestAlarm001 every 10s), drains the proto +/// OnAlarmTransitionEvent from the queue, then ack-by-name's +/// it and verifies the ack registers as a subsequent +/// transition. +/// +/// Skip-gated; flip Skip=null on the dev rig with the flip +/// script running. +/// +public sealed class AlarmsLiveSmokeTests +{ + private static readonly string SubscriptionExpression = + $@"\\{Environment.MachineName}\Galaxy!DEV"; + private static readonly TimeSpan PumpDuration = TimeSpan.FromSeconds(45); + private static readonly TimeSpan TransitionWaitTimeout = TimeSpan.FromSeconds(20); + + private const string SessionId = "alarms-live-smoke"; + + private readonly ITestOutputHelper output; + private readonly Stopwatch elapsed = Stopwatch.StartNew(); + private readonly ConcurrentQueue log = new ConcurrentQueue(); + + public AlarmsLiveSmokeTests(ITestOutputHelper output) + { + this.output = output; + } + + [Fact(Skip = "Live dev-rig smoke test — flip Skip=null with AVEVA + the alarm flip script running. Verified working 2026-05-01.")] + public void Alarms_full_pipeline_round_trip() + { + Exception? threadException = null; + var done = new ManualResetEventSlim(false); + var thread = new Thread(() => + { + try { RunSmoke(); } + catch (Exception ex) { threadException = ex; } + finally { done.Set(); } + }); + thread.IsBackground = false; + thread.SetApartmentState(ApartmentState.STA); + thread.Start(); + done.Wait(); + thread.Join(); + + output.WriteLine($"Captured {log.Count} log line(s):"); + while (log.TryDequeue(out string? line)) + { + output.WriteLine(line); + } + + if (threadException != null) + { + throw threadException; + } + } + + private void RunSmoke() + { + Log($"Subscription expression: {SubscriptionExpression}"); + Log($"Pump duration: {PumpDuration.TotalSeconds:F0}s; transition wait timeout: {TransitionWaitTimeout.TotalSeconds:F0}s"); + + MxAccessEventQueue queue = new MxAccessEventQueue(); + // pollIntervalMs=0 disables the internal Timer; we drive PollOnce + // manually from the STA below to avoid threadpool→STA marshaling + // (the wnwrap COM is ThreadingModel=Apartment, and this test + // doesn't run a Win32 message pump on its STA). + WnWrapAlarmConsumer consumer = new WnWrapAlarmConsumer( + new WNWRAPCONSUMERLib.wwAlarmConsumerClass(), + pollIntervalMilliseconds: 0, + maxAlarmsPerFetch: 1024); + MxAccessAlarmEventSink sink = new MxAccessAlarmEventSink(queue, new MxAccessEventMapper()); + using AlarmDispatcher dispatcher = new AlarmDispatcher(consumer, sink, SessionId); + + Log("Constructed consumer + sink + dispatcher."); + dispatcher.Subscribe(SubscriptionExpression); + Log("Subscribe -> ok. Driving PollOnce manually from this STA..."); + + // The wnwrap COM object is ThreadingModel=Apartment. The consumer's + // internal Timer would fire on a threadpool thread and deadlock on + // cross-apartment marshaling without a Win32 message pump. For the + // smoke test we constructed the consumer with pollIntervalMs=0 + // (Timer disabled) and drive PollOnce manually here on the STA. + // Production hosting will route polls through the worker's + // StaRuntime in a follow-up PR. + + // 1. Wait for the first transition (any kind), then keep waiting + // for one with kind=Raise so the alarm is currently Active when + // we try to ack. AVEVA rejects acks of cleared alarms with -55, + // so we have to time the ack against the flip script's 10s + // cadence. + OnAlarmTransitionEvent? raiseBody = null; + DateTime raiseDeadline = DateTime.UtcNow + TimeSpan.FromSeconds(30); + while (DateTime.UtcNow < raiseDeadline && raiseBody is null) + { + WorkerEvent? evt = WaitForTransition(queue, TransitionWaitTimeout, "raise", consumer); + if (evt is null) break; + OnAlarmTransitionEvent body = evt.Event.OnAlarmTransition; + Log("Transition: " + DescribeTransition(body)); + Assert.Equal(SessionId, evt.Event.SessionId); + if (body.TransitionKind == AlarmTransitionKind.Raise) + { + raiseBody = body; + } + } + Assert.NotNull(raiseBody); + Assert.False(string.IsNullOrEmpty(raiseBody!.AlarmFullReference)); + Assert.Contains("Galaxy", raiseBody.AlarmFullReference); + + // 2. Snapshot the active set + verify the captured alarm is there. + var snapshot = dispatcher.SnapshotActiveAlarms(); + Log($"SnapshotActiveAlarms count={snapshot.Count}"); + foreach (var s in snapshot) + { + Log(" active: " + DescribeSnapshot(s)); + } + Assert.NotEmpty(snapshot); + Assert.Contains(snapshot, s => s.AlarmFullReference == raiseBody.AlarmFullReference); + + // 3. Ack-by-name using the captured reference. Parse the reference + // via the same convention the gateway dispatcher uses + // (Provider!Group.Tag where the tag may contain dots). + Assert.True(TryParseReference( + raiseBody.AlarmFullReference, + out string provider, out string group, out string alarmName), + $"Captured reference '{raiseBody.AlarmFullReference}' did not parse as Provider!Group.Tag."); + Log($"Ack target: provider='{provider}' group='{group}' name='{alarmName}'"); + + // Try the ack with real Windows identity. AVEVA's AlarmAckByName + // may reject synthetic operator strings; using the current process + // identity gives the alarm-history a recognizable principal. + string realUser = Environment.UserName; + string realNode = Environment.MachineName; + string realDomain = Environment.UserDomainName ?? string.Empty; + Log($"Ack identity: user='{realUser}' node='{realNode}' domain='{realDomain}'"); + + int rc = dispatcher.AcknowledgeByName( + alarmName: alarmName, + providerName: provider, + groupName: group, + ackComment: "alarms-live-smoke ack", + ackOperatorName: realUser, + ackOperatorNode: realNode, + ackOperatorDomain: realDomain, + ackOperatorFullName: realUser); + Log($"AcknowledgeByName(real identity) -> rc={rc}"); + + Assert.Equal(0, rc); + + // 4. Wait for the post-ack transition. With the alarm flipping every + // 10s and the consumer polling every 500ms, the next state + // change should be either kind=Acknowledge (the ack we just + // sent registered as a state delta UnackAlm → AckAlm) or the + // flip script's next Clear (UnackAlm → UnackRtn). + WorkerEvent? second = WaitForTransition(queue, TransitionWaitTimeout, "post-ack", consumer); + Assert.NotNull(second); + OnAlarmTransitionEvent secondBody = second!.Event.OnAlarmTransition; + Log("Post-ack transition: " + DescribeTransition(secondBody)); + Assert.NotEqual(AlarmTransitionKind.Unspecified, secondBody.TransitionKind); + + // 5. Pump a little longer to confirm the consumer keeps reporting + // transitions on the 10s flip cadence. + DateTime deadline = DateTime.UtcNow + PumpDuration; + int additional = 0; + while (DateTime.UtcNow < deadline) + { + consumer.PollOnce(); + if (queue.TryDequeue(out WorkerEvent? evt) && evt is not null) + { + additional++; + OnAlarmTransitionEvent body = evt.Event.OnAlarmTransition; + Log($" +{additional}: " + DescribeTransition(body)); + } + Thread.Sleep(500); + } + Log($"Pump completed; additional transitions captured: {additional}."); + } + + private WorkerEvent? WaitForTransition( + MxAccessEventQueue queue, + TimeSpan timeout, + string label, + WnWrapAlarmConsumer consumer) + { + DateTime deadline = DateTime.UtcNow + timeout; + int pollCount = 0; + while (DateTime.UtcNow < deadline) + { + try + { + consumer.PollOnce(); + pollCount++; + if (pollCount == 1) Log("First PollOnce returned without throw."); + } + catch (Exception ex) + { + Log($"PollOnce threw on poll #{pollCount + 1}: {ex.GetType().Name}: {ex.Message}"); + if (ex is System.Runtime.InteropServices.COMException ce) + { + Log($" HResult=0x{(uint)ce.HResult:X8}"); + } + throw; + } + if (queue.TryDequeue(out WorkerEvent? evt) && evt is not null) + { + if (evt.Event.Family == MxEventFamily.OnAlarmTransition) + { + return evt; + } + Log($"Skipped non-alarm event (family={evt.Event.Family}) while waiting for {label}."); + } + Thread.Sleep(500); + } + Log($"Timed out waiting for {label} transition after {timeout.TotalSeconds:F0}s (poll count={pollCount})."); + return null; + } + + private static bool TryParseReference( + string reference, + out string provider, + out string group, + out string alarmName) + { + provider = group = alarmName = string.Empty; + if (string.IsNullOrWhiteSpace(reference)) return false; + int bang = reference.IndexOf('!'); + if (bang <= 0 || bang == reference.Length - 1) return false; + string left = reference.Substring(0, bang); + string right = reference.Substring(bang + 1); + int dot = right.IndexOf('.'); + if (dot <= 0 || dot == right.Length - 1) return false; + provider = left; + group = right.Substring(0, dot); + alarmName = right.Substring(dot + 1); + return true; + } + + private static string DescribeTransition(OnAlarmTransitionEvent body) + { + return string.Format( + "kind={0} ref='{1}' source='{2}' type='{3}' severity={4} operator='{5}' comment='{6}' ts={7:o}", + body.TransitionKind, body.AlarmFullReference, body.SourceObjectReference, + body.AlarmTypeName, body.Severity, body.OperatorUser, body.OperatorComment, + body.TransitionTimestamp?.ToDateTime() ?? DateTime.MinValue); + } + + private static string DescribeSnapshot(ActiveAlarmSnapshot s) + { + return string.Format( + "ref='{0}' state={1} severity={2} operator='{3}' comment='{4}' ts={5:o}", + s.AlarmFullReference, s.CurrentState, s.Severity, s.OperatorUser, + s.OperatorComment, + s.LastTransitionTimestamp?.ToDateTime() ?? DateTime.MinValue); + } + + private void Log(string line) + { + log.Enqueue($"[t={elapsed.Elapsed.TotalSeconds:F3}s] {line}"); + } +} diff --git a/src/MxGateway.Worker/MxAccess/WnWrapAlarmConsumer.cs b/src/MxGateway.Worker/MxAccess/WnWrapAlarmConsumer.cs index a99cf9a..06dc39a 100644 --- a/src/MxGateway.Worker/MxAccess/WnWrapAlarmConsumer.cs +++ b/src/MxGateway.Worker/MxAccess/WnWrapAlarmConsumer.cs @@ -57,6 +57,8 @@ public sealed class WnWrapAlarmConsumer : IMxAccessAlarmConsumer private readonly int maxAlarmsPerFetch; private wwAlarmConsumerClass? client; + private wwAlarmConsumerClass? ackClient; + private string subscriptionExpression = string.Empty; private Timer? pollTimer; private bool subscribed; private bool disposed; @@ -66,16 +68,23 @@ public sealed class WnWrapAlarmConsumer : IMxAccessAlarmConsumer { } - /// Test seam — inject a pre-created COM client and tune the poll cadence. - internal WnWrapAlarmConsumer( + /// + /// Test seam / explicit construction — inject a pre-created COM + /// client and tune the poll cadence. pollIntervalMilliseconds == 0 + /// disables the internal entirely; the caller + /// must drive manually (used by hosts that + /// marshal polls onto a foreign STA, and by live smoke tests that + /// pump from the STA they own). + /// + public WnWrapAlarmConsumer( wwAlarmConsumerClass client, int pollIntervalMilliseconds, int maxAlarmsPerFetch) { this.client = client ?? throw new ArgumentNullException(nameof(client)); - this.pollIntervalMs = pollIntervalMilliseconds > 0 - ? pollIntervalMilliseconds - : DefaultPollIntervalMilliseconds; + this.pollIntervalMs = pollIntervalMilliseconds < 0 + ? DefaultPollIntervalMilliseconds + : pollIntervalMilliseconds; this.maxAlarmsPerFetch = maxAlarmsPerFetch > 0 ? maxAlarmsPerFetch : DefaultMaxAlarmsPerFetch; @@ -104,9 +113,14 @@ public sealed class WnWrapAlarmConsumer : IMxAccessAlarmConsumer wwAlarmConsumerClass com = client ?? throw new ObjectDisposedException(nameof(WnWrapAlarmConsumer)); - // Per AlarmClientDiscovery.md: InitializeConsumer MUST precede - // RegisterConsumer for the alarm provider chain to become visible. - int init = com.InitializeConsumer(DefaultApplicationName); + // Use the IwwAlarmConsumer (v1) prefix-named methods for the + // lifecycle. Empirically (live dev-rig 2026-05-01) this is the + // only path that lets AlarmAckByName succeed afterwards. The + // v2 Initialize/Register/Subscribe methods on the class + // succeed (return 0) but acks against that consumer state + // return -55. The v1 prefix path is what WIN-911-style code + // uses against the same wnwrap library. + int init = com.IwwAlarmConsumer_InitializeConsumer(DefaultApplicationName); if (init != 0) { throw new InvalidOperationException( @@ -115,7 +129,7 @@ public sealed class WnWrapAlarmConsumer : IMxAccessAlarmConsumer // hWnd=0: wnwrap supports a pull-based model — no message pump // is required. We poll GetXmlCurrentAlarms2 on a timer below. - int reg = com.RegisterConsumer( + int reg = com.IwwAlarmConsumer_RegisterConsumer( hWnd: 0, szProductName: DefaultProductName, szApplicationName: DefaultApplicationName, @@ -126,7 +140,7 @@ public sealed class WnWrapAlarmConsumer : IMxAccessAlarmConsumer $"wwAlarmConsumer.RegisterConsumer returned non-zero status {reg}."); } - int sub = com.Subscribe( + int sub = com.IwwAlarmConsumer_Subscribe( szSubscription: subscription, wFromPri: 1, wToPri: 999, @@ -140,8 +154,49 @@ public sealed class WnWrapAlarmConsumer : IMxAccessAlarmConsumer $"wwAlarmConsumer.Subscribe('{subscription}') returned non-zero status {sub}."); } + // Empirically required: even though the round-trip echo of + // SetXmlAlarmQuery is mangled (see docs/AlarmClientDiscovery.md), + // calling it is necessary for subsequent GetXmlCurrentAlarms2 + // calls to succeed. Without it, GetXmlCurrentAlarms2 returns + // E_FAIL (HRESULT 0x80004005) on the first poll. SetXmlAlarmQuery + // also breaks AlarmAckByName on the same consumer (rejects with + // -55), so a separate ack-only consumer is provisioned below + // that gets only Initialize/Register/Subscribe (no SetXmlAlarmQuery). + string xmlQuery = ComposeXmlAlarmQuery(subscription); + com.SetXmlAlarmQuery(xmlQuery); + + // Provision a parallel COM consumer for ack calls. It runs the + // v1 lifecycle (Initialize/Register/Subscribe) only; without + // SetXmlAlarmQuery, AlarmAckByName succeeds. State is read-only + // — we never poll this consumer. + ackClient = new wwAlarmConsumerClass(); + int ackInit = ackClient.IwwAlarmConsumer_InitializeConsumer(DefaultApplicationName + ".ack"); + int ackReg = ackClient.IwwAlarmConsumer_RegisterConsumer( + hWnd: 0, + szProductName: DefaultProductName, + szApplicationName: DefaultApplicationName + ".ack", + szVersion: DefaultVersion); + int ackSub = ackClient.IwwAlarmConsumer_Subscribe( + szSubscription: subscription, + wFromPri: 1, + wToPri: 999, + QueryType: eQueryType.qtSummary, + SortFlags: eSortFlags.sfReturnNewestFirst, + FilterMask: eAlarmFilterState.asAlarmActiveNow, + FilterSpecification: eAlarmFilterState.asAlarmActiveNow); + if (ackInit != 0 || ackReg != 0 || ackSub != 0) + { + throw new InvalidOperationException( + $"Ack consumer setup returned non-zero status: " + + $"Initialize={ackInit}, Register={ackReg}, Subscribe={ackSub}."); + } + subscriptionExpression = subscription; + subscribed = true; - pollTimer = new Timer(OnPoll, state: null, dueTime: 0, period: pollIntervalMs); + if (pollIntervalMs > 0) + { + pollTimer = new Timer(OnPoll, state: null, dueTime: 0, period: pollIntervalMs); + } } } @@ -185,18 +240,31 @@ public sealed class WnWrapAlarmConsumer : IMxAccessAlarmConsumer { if (disposed) throw new ObjectDisposedException(nameof(WnWrapAlarmConsumer)); - wwAlarmConsumerClass com = client - ?? throw new ObjectDisposedException(nameof(WnWrapAlarmConsumer)); + // Use the parallel ack-only consumer (no SetXmlAlarmQuery applied) + // — see docs/AlarmClientDiscovery.md "Option A — captured" for the + // empirical justification. + wwAlarmConsumerClass com = ackClient + ?? throw new InvalidOperationException( + "Cannot acknowledge: WnWrapAlarmConsumer was disposed or has not been subscribed yet."); + // Empirically (live dev-rig 2026-05-01): the IwwAlarmConsumer2 + // 8-arg AlarmAckByName returns -55 on this AVEVA build (looks like + // a stub). The legacy 6-arg IwwAlarmConsumer.AlarmAckByName works + // and reaches the alarm-history path correctly. Operator-domain + // and operator-full-name fields are accepted by the proto contract + // for forward-compat but are not propagated to AVEVA today — + // wrapped in the 6-arg call so domain/full-name go to the + // alarm-history operator-name field via the szOprName parameter. + // Suppress unused-warning explicitly: + _ = ackOperatorDomain; + _ = ackOperatorFullName; return com.AlarmAckByName( szAlarmName: alarmName ?? string.Empty, szProviderName: providerName ?? string.Empty, szGroupName: groupName ?? string.Empty, szComment: ackComment ?? string.Empty, szOprName: ackOperatorName ?? string.Empty, - szNode: ackOperatorNode ?? string.Empty, - szDomainName: ackOperatorDomain ?? string.Empty, - szOprFullName: ackOperatorFullName ?? string.Empty); + szNode: ackOperatorNode ?? string.Empty); } /// @@ -236,7 +304,15 @@ public sealed class WnWrapAlarmConsumer : IMxAccessAlarmConsumer } } - internal void PollOnce() + /// + /// Synchronously poll the wnwrap consumer once and dispatch any + /// transitions. Public so STA-bound hosts can drive polling from + /// the thread that owns the COM object instead of relying on the + /// internal (which fires on a thread-pool + /// thread and blocks indefinitely on cross-apartment marshaling + /// when the host STA isn't pumping messages). + /// + public void PollOnce() { wwAlarmConsumerClass? com; lock (syncRoot) @@ -370,6 +446,58 @@ public sealed class WnWrapAlarmConsumer : IMxAccessAlarmConsumer return Guid.TryParse(canonical, out guid); } + /// + /// Compose the XML payload SetXmlAlarmQuery expects from a + /// canonical subscription expression + /// (\\<machine>\Galaxy!<area>). The wnwrap + /// consumer mangles the round-trip but evidently still needs the + /// call — without it GetXmlCurrentAlarms2 fails with + /// E_FAIL. Best-effort parse: if the subscription doesn't decompose + /// cleanly, fall back to a permissive ALL-priority/ALL-state form + /// so the worker doesn't fail to start. + /// + internal static string ComposeXmlAlarmQuery(string subscription) + { + string node = Environment.MachineName; + string provider = "Galaxy"; + string group = string.Empty; + + if (!string.IsNullOrEmpty(subscription)) + { + // Strip leading backslashes from "\\\..." form. + string trimmed = subscription.TrimStart('\\'); + int slash = trimmed.IndexOf('\\'); + if (slash > 0) + { + node = trimmed.Substring(0, slash); + trimmed = trimmed.Substring(slash + 1); + } + int bang = trimmed.IndexOf('!'); + if (bang > 0) + { + provider = trimmed.Substring(0, bang); + group = trimmed.Substring(bang + 1); + } + else + { + provider = trimmed; + } + } + + System.Text.StringBuilder sb = new System.Text.StringBuilder(); + sb.Append(""); + sb.Append(""); + sb.Append("").Append(node).Append(""); + sb.Append("").Append(provider).Append(""); + if (!string.IsNullOrEmpty(group)) + { + sb.Append("").Append(group).Append(""); + } + sb.Append(""); + sb.Append(""); + return sb.ToString(); + } + private static VBGUID ToVbGuid(Guid g) { byte[] bytes = g.ToByteArray(); @@ -390,6 +518,7 @@ public sealed class WnWrapAlarmConsumer : IMxAccessAlarmConsumer { Timer? timerToDispose; wwAlarmConsumerClass? clientToDispose; + wwAlarmConsumerClass? ackClientToDispose; lock (syncRoot) { if (disposed) return; @@ -398,16 +527,22 @@ public sealed class WnWrapAlarmConsumer : IMxAccessAlarmConsumer pollTimer = null; clientToDispose = client; client = null; + ackClientToDispose = ackClient; + ackClient = null; } timerToDispose?.Dispose(); - if (clientToDispose is not null) + ReleaseConsumerCom(clientToDispose); + ReleaseConsumerCom(ackClientToDispose); + } + + private static void ReleaseConsumerCom(wwAlarmConsumerClass? consumer) + { + if (consumer is null) return; + try { consumer.DeregisterConsumer(); } catch { /* swallow */ } + try { consumer.UninitializeConsumer(); } catch { /* swallow */ } + if (Marshal.IsComObject(consumer)) { - try { clientToDispose.DeregisterConsumer(); } catch { /* swallow */ } - try { clientToDispose.UninitializeConsumer(); } catch { /* swallow */ } - if (Marshal.IsComObject(clientToDispose)) - { - try { Marshal.FinalReleaseComObject(clientToDispose); } catch { /* swallow */ } - } + try { Marshal.FinalReleaseComObject(consumer); } catch { /* swallow */ } } } }