diff --git a/docs/AlarmClientDiscovery.md b/docs/AlarmClientDiscovery.md index fe1f42c..537856f 100644 --- a/docs/AlarmClientDiscovery.md +++ b/docs/AlarmClientDiscovery.md @@ -157,6 +157,57 @@ AVEVA's own message-pump thread — confirmable by extending the probe to fire `GetStatistics` on its own thread and check the result. +## Alarm-provider visibility — third probe run, 2026-05-01 + +Extended the probe to call `AlarmClient.GetProviders` after +`RegisterConsumer`. Result on this rig: + +``` +GetProviders -> rc=0 count=0 list=[] +``` + +**Zero alarm providers visible to the consumer process.** This +explains every preceding probe run: no providers means no alarm +events, regardless of how many times any value (including a +bool with an `$Alarm` extension) flips. `Subscribe(@"\Galaxy!")` +returns 0 (success) but matches nothing because the alarm-manager +chain that provides the matching feed doesn't expose any provider +to this consumer. + +A System Platform script flipping `TestMachine_001.TestAlarm001` +every 10s during this probe run produced no observable +`GetStatistics` transitions, no `positions[]` / `handles[]` +entries, no change in any field — confirms the silence is not +about subscription-scope / message-pump but about provider +absence. + +### Possible causes + +1. **No `$Alarm` extension on the test bool.** If + `TestMachine_001.TestAlarm001` is a regular UDA without a + `BoolAlarm` extension wired to it, flipping the value just + writes a new value — no alarm fires. +2. **Alarm manager service not running.** AVEVA's `aaAlarmMgr` + (or the equivalent on this rig's Platform version) needs to + be running for providers to register. +3. **Process security context.** A consumer running under a + normal user account may not see providers that registered + under `LocalSystem` / a Platform service identity. The + gateway-worker installation runs under a service account + that may have access where `dotnet test` doesn't. + +### Implications for A.2 implementation + +The A.2 PR's value is unmeasurable until at least one alarm +provider is visible. The choice between polling-via-`GetStatistics` +and the callback path can only be decided by observing what +populates first when a real alarm fires. Without a provider, +both paths return the same "nothing happening" answer. + +Until that's resolved, A.2 implementation work is genuinely +blocked on a dev-rig configuration issue — not on architectural +choice or code structure. + ## GetStatistics polling — second probe run, 2026-05-01 Extended the probe to call `GetStatistics` every ~2s alongside the diff --git a/src/MxGateway.Worker.Tests/AlarmClientWmProbeTests.cs b/src/MxGateway.Worker.Tests/AlarmClientWmProbeTests.cs index 5659d6c..303b220 100644 --- a/src/MxGateway.Worker.Tests/AlarmClientWmProbeTests.cs +++ b/src/MxGateway.Worker.Tests/AlarmClientWmProbeTests.cs @@ -4,8 +4,11 @@ using System.Collections.Generic; using System.Diagnostics; using System.Runtime.InteropServices; using System.Threading; +using System.Linq; +using System.Reflection; using AlarmMgrDataProviderCOM; using aaAlarmManagedClient; +using ArchestrA.MxAccess; using Xunit.Abstractions; namespace MxGateway.Worker.Tests; @@ -30,7 +33,13 @@ public sealed class AlarmClientWmProbeTests : IDisposable { // Probe configuration. Override in the constructor below if needed. private const string SubscriptionExpression = @"\Galaxy!"; - private static readonly TimeSpan PumpDuration = TimeSpan.FromSeconds(20); + private static readonly TimeSpan PumpDuration = TimeSpan.FromSeconds(60); + private static readonly TimeSpan PollInterval = TimeSpan.FromMilliseconds(500); + private static readonly TimeSpan FireMarkerAt = TimeSpan.FromSeconds(10); + private static readonly TimeSpan ClearMarkerAt = TimeSpan.FromSeconds(35); + // Tag the operator should flip while the probe is pumping. Default + // matches the dev rig's known alarmable boolean. + private const string TriggerTagReference = "TestMachine_001.TestAlarm001"; [DllImport("user32.dll", SetLastError = true, CharSet = CharSet.Unicode, EntryPoint = "CreateWindowExW")] private static extern IntPtr CreateWindowEx( @@ -113,7 +122,7 @@ public sealed class AlarmClientWmProbeTests : IDisposable this.output = output; } - [Fact(Skip = "Runtime probe — flip Skip=null on the dev rig (with live Galaxy) to capture AVEVA WM_APP message IDs + GetStatistics polling results")] + [Fact(Skip = "Runtime probe — flip Skip=null on the dev rig (AVEVA installed) to capture alarm-path behavior")] public void ProbeAlarmClientWmMessages() { // 1. Pre-resolve a few candidate RegisterWindowMessage strings so any @@ -230,6 +239,19 @@ public sealed class AlarmClientWmProbeTests : IDisposable bRetainHiddenAlarms: false); Log($"RegisterConsumer -> {register}"); + // Discover what providers AVEVA sees before subscribing, so we + // can spot a wrong subscription expression up front. + try + { + var providers = new System.Collections.Generic.List(); + int gp = client.GetProviders(providers); + Log($"GetProviders -> rc={gp} count={providers.Count} list=[{string.Join(", ", providers)}]"); + } + catch (Exception ex) + { + Log($"GetProviders threw: {ex.GetType().Name}: {ex.Message}"); + } + int subscribe = client.Subscribe( szSubscription: SubscriptionExpression, wFromPri: 1, wToPri: 999, @@ -241,13 +263,19 @@ public sealed class AlarmClientWmProbeTests : IDisposable // 3c. Pump for the configured duration. Log every message we see // (filtered light to avoid noise from WM_PAINT / WM_TIMER / - // WM_GETICON spam from typical pumps). Every ~2s also call - // GetStatistics and snapshot up to N records, to test the - // polling design — if Galaxy has any active alarms or any - // have changed since Subscribe, we'll see them here. - DateTime deadline = DateTime.UtcNow + PumpDuration; - DateTime nextPoll = DateTime.UtcNow + TimeSpan.FromSeconds(2); + // WM_GETICON spam from typical pumps). Poll GetStatistics on + // a tight cadence so any alarm transition is captured. Print + // "fire" / "clear" markers at fixed wallclock offsets so the + // operator can flip the trigger boolean during the run. + Log($"Probe running for {PumpDuration.TotalSeconds:F0}s. " + + $"Observing {TriggerTagReference} alarm transitions. " + + "External trigger expected from System Platform script (10s flip cadence)."); + + DateTime probeStart = DateTime.UtcNow; + DateTime deadline = probeStart + PumpDuration; + DateTime nextPoll = probeStart + PollInterval; int pollCount = 0; + while (DateTime.UtcNow < deadline) { while (PeekMessage(out MSG msg, IntPtr.Zero, 0, 0, PM_REMOVE)) @@ -256,10 +284,13 @@ public sealed class AlarmClientWmProbeTests : IDisposable TranslateMessage(ref msg); DispatchMessage(ref msg); } + // Trigger is supplied externally — a System Platform script + // flips TestMachine_001.TestAlarm001 every 10s. The probe + // observes only. if (DateTime.UtcNow >= nextPoll) { PollGetStatistics(client, ++pollCount); - nextPoll = DateTime.UtcNow + TimeSpan.FromSeconds(2); + nextPoll = DateTime.UtcNow + PollInterval; } Thread.Sleep(10); } @@ -284,6 +315,81 @@ public sealed class AlarmClientWmProbeTests : IDisposable } } + private string lastStatsSummary = string.Empty; + + /// + /// Drive an MxAccess write to with the + /// supplied boolean value. Creates a fresh `LMXProxyServer` COM object, + /// registers, adds the item, writes the value, and tears down. Runs on + /// the same STA thread the probe uses for the AlarmClient — both COM + /// objects share the apartment, which matches the worker's runtime. + /// + private void TriggerWriteValue(bool value, int sequence) + { + object? lmx = null; + ILMXProxyServer? srv = null; + int handle = 0, itemHandle = 0; + try + { + lmx = new LMXProxyServerClass(); + srv = (ILMXProxyServer)lmx; + handle = srv.Register($"AlarmProbe.Trigger.{sequence}"); + Log($"Trigger write #{sequence}: Register -> handle={handle}"); + itemHandle = srv.AddItem(handle, TriggerTagReference); + Log($"Trigger write #{sequence}: AddItem('{TriggerTagReference}') -> itemHandle={itemHandle}"); + + // First time only: dump every Write* method's signature so we know + // which to call. The first attempt hit TargetParameterCountException — + // the LMX server has multiple Write variants and we picked wrong. + if (sequence == 1) + { + Log($"Trigger write #{sequence}: enumerating Write* methods on {lmx.GetType().FullName}:"); + foreach (var m in lmx.GetType().GetMethods(BindingFlags.Public | BindingFlags.Instance)) + { + if (m.IsSpecialName) continue; + if (!m.Name.StartsWith("Write", StringComparison.OrdinalIgnoreCase)) continue; + string ps = string.Join(", ", m.GetParameters().Select(p => $"{p.ParameterType.Name} {p.Name}")); + Log($" {m.ReturnType.Name} {m.Name}({ps})"); + } + } + + // Late-bind Write — it isn't on ILMXProxyServer's interface but is + // exposed by the COM coclass. + object[] writeArgs = new object[] { handle, itemHandle, value }; + object? rv = lmx.GetType().InvokeMember( + "Write", + BindingFlags.InvokeMethod | BindingFlags.Public | BindingFlags.Instance, + binder: null, target: lmx, args: writeArgs); + Log($"Trigger write #{sequence}: Write({TriggerTagReference}={value}) -> rv={rv}"); + } + catch (Exception ex) + { + Log($"Trigger write #{sequence}: FAILED: {ex.GetType().Name}: {ex.Message}"); + if (ex.InnerException != null) + { + Log($" inner: {ex.InnerException.GetType().Name}: {ex.InnerException.Message}"); + } + } + finally + { + try + { + if (srv != null && itemHandle != 0) { srv.RemoveItem(handle, itemHandle); } + if (srv != null && handle != 0) { srv.Unregister(handle); } + } + catch (Exception ex) + { + Log($"Trigger write #{sequence}: cleanup failure: {ex.GetType().Name}: {ex.Message}"); + } + if (lmx != null && System.Runtime.InteropServices.Marshal.IsComObject(lmx)) + { + try { System.Runtime.InteropServices.Marshal.FinalReleaseComObject(lmx); } + catch { /* swallow */ } + } + } + } + + private void PollGetStatistics(AlarmClient client, int seq) { try @@ -301,19 +407,31 @@ public sealed class AlarmClientWmProbeTests : IDisposable string posStr = positions != null ? string.Join(",", positions) : ""; string handlesStr = handles != null ? string.Join(",", handles) : ""; int posLen = positions?.Length ?? 0; - Log($"GetStatistics #{seq} rc={rc} pct={percent} total={total} active={active} " + - $"suppressed={suppressed} suppressedFilters={suppressedFilters} new={newAlarms} changes={changes} " + - $"codes=[{codesStr}] positions=[{posStr}] handles=[{handlesStr}]"); - // If positions has entries, fetch one record so we see the - // record-shape AVEVA exposes for a real alarm. + // Suppress duplicate-summary spam — only log when interesting + // state-change is observed. The "interesting" digest excludes + // percent (always 100 at steady state). + string summary = $"total={total} active={active} suppressed={suppressed} " + + $"new={newAlarms} changes={changes} codes=[{codesStr}] " + + $"positions=[{posStr}] handles=[{handlesStr}]"; + if (summary != lastStatsSummary) + { + Log($"GetStatistics #{seq} rc={rc} pct={percent} {summary} (changed)"); + lastStatsSummary = summary; + } + + // Always fetch records when positions has entries — records + // change content even when count stays the same. if (posLen > 0 && positions != null) { - int idx = positions[0]; - AlarmRecord rec = new AlarmRecord(); - int recRc = client.GetAlarmExtendedRec(idx, ref rec); - Log($" GetAlarmExtendedRec(idx={idx}) rc={recRc} -> " + - DescribeAlarmRecord(rec)); + for (int i = 0; i < Math.Min(posLen, 4); i++) + { + int idx = positions[i]; + AlarmRecord rec = new AlarmRecord(); + int recRc = client.GetAlarmExtendedRec(idx, ref rec); + Log($" GetAlarmExtendedRec(idx={idx}) rc={recRc} -> " + + DescribeAlarmRecord(rec)); + } } } catch (Exception ex) diff --git a/src/MxGateway.Worker.Tests/MxGateway.Worker.Tests.csproj b/src/MxGateway.Worker.Tests/MxGateway.Worker.Tests.csproj index bb2f949..a3fdd4e 100644 --- a/src/MxGateway.Worker.Tests/MxGateway.Worker.Tests.csproj +++ b/src/MxGateway.Worker.Tests/MxGateway.Worker.Tests.csproj @@ -26,6 +26,11 @@ + + C:\Program Files (x86)\ArchestrA\Framework\Bin\ArchestrA.MXAccess.dll + true + false + C:\Program Files (x86)\ArchestrA\Framework\Bin\ViewAppFramework\Content\MA\aaAlarmManagedClient.dll true