diff --git a/docs/AlarmClientDiscovery.md b/docs/AlarmClientDiscovery.md index 39e4484..0a05b7f 100644 --- a/docs/AlarmClientDiscovery.md +++ b/docs/AlarmClientDiscovery.md @@ -335,6 +335,69 @@ alarms truly only flow through the historian event-storage path, A.2 needs to consume from `aahEventStorage` instead — a fundamental architecture pivot. +## BREAKTHROUGH — seventh probe run, 2026-05-01 + +Two changes finally produced a signal: + +1. **Subscription scope:** `\\\Galaxy!` is the + canonical AlarmClient subscription format (per ArchestrA Alarm + Client docs at `archestra6.rssing.com/chan-12008125/article13.html`): + `\\Node\Provider!Area!Filter`, where Node is the *machine* name, + Provider is **literally `Galaxy`**, and Area is a hosted area + object. For this rig (`\\DESKTOP-6JL3KKO\Galaxy!DEV`) the DEV + area — the platform's primary area — is the right scope. Earlier + `\Galaxy!`, `\Galaxy!TestArea`, `\\.\Galaxy!`, etc., all returned + rc=0 but matched no traffic — they were not the canonical form. +2. **`InitializeConsumer` before `RegisterConsumer`** — already + discovered earlier; bug-fix for PR A.5's `AlarmClientConsumer`. + +With both in place, `GetHighPriAlarm` returned a record on every +poll for 60s straight (117/117 calls), but threw +`ArgumentOutOfRangeException: Not a valid Win32 FileTime` instead +of returning successfully — the AlarmRecord struct contains five +DateTime fields (`ar_Time`, `ar_OrigTime`, `ar_AckTime`, +`ar_RtnTime`, `ar_SubTime`) and AVEVA writes sentinel/invalid +FILETIME values for unset ones (e.g., `ar_AckTime` for an +unacknowledged alarm). The .NET interop that AVEVA ships +(`aaAlarmManagedClient.dll`) auto-converts FILETIME→DateTime and +rejects out-of-range values. + +`GetStatistics` continues to report `total=0 active=0` even with +GetHighPriAlarm returning records — those two API surfaces have +genuinely different views in AVEVA's data model. + +So: **alarms flow through `aaAlarmManagedClient.AlarmClient` once +the subscription expression is canonical**. The blocking issue is +extracting the payload past the .NET interop's DateTime +auto-marshaling. + +## Remaining work to capture alarm payloads + +Define a custom COM interop that uses `long` (FILETIME-as-int64) +instead of `DateTime` for the timestamp fields. Approach options: + +1. **Patch the AVEVA-shipped `aaAlarmManagedClient.dll`** — ildasm + the assembly, replace `DateTime` with `long` on AlarmRecord's + timestamp fields, ilasm back. Brittle across AVEVA upgrades. +2. **Write our own `[ComImport]` interface** — declare + `IRawAlarmConsumer` ourselves with safe-blittable types, + discover the underlying COM IID (via reflection on + `AlarmClient`'s `[Guid]` attribute), and `(IRawAlarmConsumer) + alarmClient` cast. Cleaner; requires the IID. +3. **Use `IDispatch` late binding** — dispatch-Invoke bypasses + strong-typed marshaling. Verbose but doesn't need IIDs. + +For PR A.2's worker integration, option 2 is the least +disruptive. Once the interop is custom, `AlarmClient.Subscribe` + +`GetHighPriAlarm` + `GetAlarmExtendedRec` form a viable +polling-style alarm consumer. + +The polling-vs-WM_APP-callback question from earlier is now +moot: `GetStatistics`'s `positions[]/handles[]` arrays remained +empty even when alarms were demonstrably present. The active +read API for current alarms is `GetHighPriAlarm`, not +`GetStatistics`'s change array. + ### Implications for A.2 implementation The A.2 PR's value is unmeasurable until at least one alarm diff --git a/src/MxGateway.Worker.Tests/AlarmClientWmProbeTests.cs b/src/MxGateway.Worker.Tests/AlarmClientWmProbeTests.cs index 44ac80e..6e156c4 100644 --- a/src/MxGateway.Worker.Tests/AlarmClientWmProbeTests.cs +++ b/src/MxGateway.Worker.Tests/AlarmClientWmProbeTests.cs @@ -35,13 +35,20 @@ public sealed class AlarmClientWmProbeTests : IDisposable // Try multiple subscription expressions sequentially (each Subscribe call // adds to the consumer's scope). The "everything" form varies by AVEVA // version — we shotgun common forms. + // Canonical AlarmClient subscription format (per ArchestrA docs): + // \\Node\Provider!Area!Filter + // - Node: machine name (NOT galaxy name; "Galaxy" is the literal provider) + // - Provider: literal "Galaxy" + // - Area: area object the engine hosts the alarm under + // Note: each Subscribe call REPLACES the prior subscription on the + // consumer, so we test exactly one expression per probe run. + private static readonly string MachineName = Environment.MachineName; private static readonly string[] SubscriptionExpressions = { - @"\Galaxy!", // documented "all groups under Galaxy provider" - @"\Galaxy!*", // wildcard variant - @"\\Galaxy!", // double-backslash UNC-style - @"\Galaxy!TestArea", // explicit area where TestMachine_001 lives - @"\\.\Galaxy!", // local-host prefix + // DEV is the top-level area on the Platform (TestArea is contained + // within DEV). Alarms typically publish at the platform's primary + // area. If TestArea-only doesn't catch them, DEV should. + $@"\\{MachineName}\Galaxy!DEV", }; private const string SubscriptionExpression = @"\Galaxy!"; private static readonly TimeSpan PumpDuration = TimeSpan.FromSeconds(60); @@ -352,6 +359,8 @@ public sealed class AlarmClientWmProbeTests : IDisposable } Log($"Pump duration {PumpDuration.TotalSeconds:F0}s elapsed; deregistering."); + Log($"GetHighPriAlarm tally: ok-with-record={getHighPriOk} threw={getHighPriThrow} " + + $"(throws indicate alarm-record marshaling failure; ok=empty record)."); try { int dereg = client.DeregisterConsumer(); Log($"DeregisterConsumer -> {dereg}"); } catch (Exception ex) { Log($"DeregisterConsumer threw: {ex.GetType().Name}: {ex.Message}"); } @@ -375,6 +384,8 @@ public sealed class AlarmClientWmProbeTests : IDisposable private string lastProvidersSummary = string.Empty; private string lastHighPriSummary = string.Empty; private string lastSfStatsSummary = string.Empty; + private int getHighPriOk = 0; + private int getHighPriThrow = 0; /// /// Try every read API the AlarmClient exposes and log when its @@ -406,25 +417,32 @@ public sealed class AlarmClientWmProbeTests : IDisposable private void PollAllChannels(AlarmClient client, int seq) { - // Channel A: GetHighPriAlarm — direct peek of highest-priority alarm. + // Channel A: GetHighPriAlarm — peek highest-priority alarm. Track + // outcome state (record/empty/throw) and log every transition AND + // total counts at end. The throw correlates with an alarm being + // present (AVEVA fills timestamps with sentinel FILETIME values + // that crash the .NET marshaler) — useful as a presence signal + // even if we can't read the record. try { AlarmRecord rec = NewAlarmRecord(); int rc = client.GetHighPriAlarm(ref rec); string desc = rc == 0 ? DescribeAlarmRecord(rec) : ""; string summary = $"rc={rc} {desc}"; + getHighPriOk++; if (summary != lastHighPriSummary) { - Log($"GetHighPriAlarm #{seq}: {summary} (changed)"); + Log($"GetHighPriAlarm #{seq}: {summary} (changed; ok={getHighPriOk}, throw={getHighPriThrow})"); lastHighPriSummary = summary; } } catch (Exception ex) { - string es = $"{ex.GetType().Name}: {ex.Message}"; + string es = $"{ex.GetType().Name}"; + getHighPriThrow++; if (es != lastHighPriSummary) { - Log($"GetHighPriAlarm #{seq}: threw {es}"); + Log($"GetHighPriAlarm #{seq}: threw {es} (changed; ok={getHighPriOk}, throw={getHighPriThrow})"); lastHighPriSummary = es; } }