diff --git a/src/ZB.MOM.WW.ScadaBridge.SiteRuntime/Actors/AlarmActor.cs b/src/ZB.MOM.WW.ScadaBridge.SiteRuntime/Actors/AlarmActor.cs index e3020b23..ca9e2db6 100644 --- a/src/ZB.MOM.WW.ScadaBridge.SiteRuntime/Actors/AlarmActor.cs +++ b/src/ZB.MOM.WW.ScadaBridge.SiteRuntime/Actors/AlarmActor.cs @@ -41,6 +41,17 @@ public class AlarmActor : ReceiveActor private readonly ISiteHealthCollector? _healthCollector; private readonly IServiceProvider? _serviceProvider; + /// + /// M1.5: the optional site operational-event log, resolved once from + /// at construction and cached. The + /// registration is process-lifetime (a singleton), so resolving once on + /// the actor's own thread is both correct and cheaper than a per-event + /// GetService on the hot path. null when no provider was + /// supplied (the test/no-logging path) — then + /// no-ops. + /// + private readonly ISiteEventLogger? _siteEventLogger; + /// /// M1.5: priority at or above which a computed-alarm raise is logged as /// Error to the site event log; below it, raises log as Warning. @@ -118,6 +129,9 @@ public class AlarmActor : ReceiveActor _logger = logger; _healthCollector = healthCollector; _serviceProvider = serviceProvider; + // M1.5: resolve the optional site event logger once and cache it, + // rather than calling GetService on every alarm transition. + _siteEventLogger = serviceProvider?.GetService(); _priority = alarmConfig.PriorityLevel; _onTriggerScriptName = alarmConfig.OnTriggerScriptCanonicalName; _onTriggerCompiledScript = onTriggerCompiledScript; @@ -323,13 +337,14 @@ public class AlarmActor : ReceiveActor /// /// M1.5: fire-and-forget an alarm operational event to the optional - /// . Resolved optionally and never awaited so a - /// logging failure cannot affect alarm evaluation (matching the established + /// (resolved once at construction and cached + /// in ). Never awaited so a logging failure + /// cannot affect alarm evaluation (matching the established /// ScriptActor/ScriptExecutionActor pattern). /// private void LogAlarmEvent(string severity, string message) { - _ = _serviceProvider?.GetService()?.LogEventAsync( + _ = _siteEventLogger?.LogEventAsync( "alarm", severity, _instanceName, $"AlarmActor:{_alarmName}", message); } diff --git a/src/ZB.MOM.WW.ScadaBridge.SiteRuntime/Actors/DeploymentManagerActor.cs b/src/ZB.MOM.WW.ScadaBridge.SiteRuntime/Actors/DeploymentManagerActor.cs index 1866e878..10a6aae6 100644 --- a/src/ZB.MOM.WW.ScadaBridge.SiteRuntime/Actors/DeploymentManagerActor.cs +++ b/src/ZB.MOM.WW.ScadaBridge.SiteRuntime/Actors/DeploymentManagerActor.cs @@ -643,9 +643,18 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers /// on a deploy/enable/disable/delete outcome. /// Resolved optionally and never awaited so a logging failure cannot affect the /// deployment pipeline (matching the established ScriptActor/ScriptExecutionActor - /// pattern). Only reads the immutable _serviceProvider field, so it is - /// safe to call from the PipeTo continuations that report disable/delete - /// outcomes off the actor thread. + /// pattern). + /// + /// Thread-safety: the disable () and delete + /// () paths call this from a + /// + /// continuation that runs on a thread-pool thread, NOT on the actor thread — + /// so it must touch only immutable, thread-safe state. It does: the only + /// field it reads is the readonly _serviceProvider captured at + /// construction (the resolved is a process + /// singleton). No actor-private mutable state is referenced, which is what + /// makes calling it off the actor thread safe. + /// /// private void LogDeploymentEvent(string severity, string instanceName, string message, string? details = null) { diff --git a/src/ZB.MOM.WW.ScadaBridge.SiteRuntime/Actors/NativeAlarmActor.cs b/src/ZB.MOM.WW.ScadaBridge.SiteRuntime/Actors/NativeAlarmActor.cs index bca89d4d..63a354ed 100644 --- a/src/ZB.MOM.WW.ScadaBridge.SiteRuntime/Actors/NativeAlarmActor.cs +++ b/src/ZB.MOM.WW.ScadaBridge.SiteRuntime/Actors/NativeAlarmActor.cs @@ -212,7 +212,14 @@ public class NativeAlarmActor : ReceiveActor { _alarms[sourceRef] = t; PersistUpsert(t); - Emit(t, t.Condition); + // M1.5: a snapshot replay is a re-sync of the source's current + // active set on (re)subscribe, NOT a live transition — surface it + // upward for the DebugView but do NOT re-log an `alarm` operational + // event. Otherwise every DCL reconnect would re-emit an `alarm` + // event for every already-active native condition (the + // synthesised return-to-normal above IS a real state change and + // keeps logSiteEvent: true). + Emit(t, t.Condition, logSiteEvent: false); } _snapshotBuffer.Clear(); diff --git a/src/ZB.MOM.WW.ScadaBridge.StoreAndForward/StoreAndForwardService.cs b/src/ZB.MOM.WW.ScadaBridge.StoreAndForward/StoreAndForwardService.cs index e335ea01..b1c3b9e2 100644 --- a/src/ZB.MOM.WW.ScadaBridge.StoreAndForward/StoreAndForwardService.cs +++ b/src/ZB.MOM.WW.ScadaBridge.StoreAndForward/StoreAndForwardService.cs @@ -82,6 +82,18 @@ public class StoreAndForwardService /// recognisable instead of an unattributable empty string. /// public const string UnknownSiteSentinel = "$unknown-site"; + + /// + /// M1.7: the detail-string prefix written by + /// when an immediate forward attempt throws and the message is buffered for + /// the retry sweep. matches on this same prefix + /// to distinguish a forward failure (logged) from a routine + /// no-handler enqueue (not logged), so both the construction site and the + /// check reference this single constant rather than duplicating the + /// literal — keeping the two ends from drifting apart. + /// + private const string BufferedForRetryDetailPrefix = "Buffered for retry"; + private Timer? _retryTimer; private int _retryInProgress; @@ -261,11 +273,12 @@ public class StoreAndForwardService if (category == StoreAndForwardCategory.Notification) { // Spec: log only forward-failure (the immediate forward threw and the - // notification was buffered for retry — detail "Buffered for retry:") - // and park. A routine "No handler registered, buffered" enqueue and a - // forward-success "Delivered" are deliberately NOT logged. + // notification was buffered for retry — detail prefixed + // BufferedForRetryDetailPrefix) and park. A routine "No handler + // registered, buffered" enqueue and a forward-success "Delivered" + // are deliberately NOT logged. var isForwardFailure = action == "Queued" - && detail.StartsWith("Buffered for retry", StringComparison.Ordinal); + && detail.StartsWith(BufferedForRetryDetailPrefix, StringComparison.Ordinal); if (!isForwardFailure && action != "Parked") { return; @@ -536,7 +549,7 @@ public class StoreAndForwardService message.LastError = ex.Message; await BufferAsync(message); - RaiseActivity("Queued", category, $"Buffered for retry: {target} ({ex.Message})"); + RaiseActivity("Queued", category, $"{BufferedForRetryDetailPrefix}: {target} ({ex.Message})"); return new StoreAndForwardResult(true, message.Id, true); } } @@ -553,7 +566,7 @@ public class StoreAndForwardService await BufferAsync(message); RaiseActivity("Queued", category, attemptImmediateDelivery ? $"No handler registered, buffered: {target}" - : $"Buffered for retry: {target}"); + : $"{BufferedForRetryDetailPrefix}: {target}"); return new StoreAndForwardResult(true, message.Id, true); } diff --git a/tests/ZB.MOM.WW.ScadaBridge.SiteRuntime.Tests/Actors/DeploymentManagerActorTests.cs b/tests/ZB.MOM.WW.ScadaBridge.SiteRuntime.Tests/Actors/DeploymentManagerActorTests.cs index d2a4613e..6b57e936 100644 --- a/tests/ZB.MOM.WW.ScadaBridge.SiteRuntime.Tests/Actors/DeploymentManagerActorTests.cs +++ b/tests/ZB.MOM.WW.ScadaBridge.SiteRuntime.Tests/Actors/DeploymentManagerActorTests.cs @@ -219,24 +219,27 @@ public class DeploymentManagerActorTests : TestKit, IDisposable ExpectMsg(TimeSpan.FromSeconds(5)); await Task.Delay(1000); + // The deployment site events are emitted fire-and-forget off the actor + // thread (LogDeploymentEvent runs in a ContinueWith continuation), so + // poll for each event with AwaitAssert rather than a bare Task.Delay — + // a fixed sleep is racy under CI load. actor.Tell(new DisableInstanceCommand("cmd-de1", "EvtPump", DateTimeOffset.UtcNow)); Assert.True(ExpectMsg(TimeSpan.FromSeconds(5)).Success); - await Task.Delay(300); + AwaitAssert(() => Assert.Contains(siteLog.OfType("deployment"), + r => r.Message.Contains("disabled", StringComparison.OrdinalIgnoreCase)), + TimeSpan.FromSeconds(2)); actor.Tell(new EnableInstanceCommand("cmd-en1", "EvtPump", DateTimeOffset.UtcNow)); Assert.True(ExpectMsg(TimeSpan.FromSeconds(5)).Success); - await Task.Delay(300); + AwaitAssert(() => Assert.Contains(siteLog.OfType("deployment"), + r => r.Message.Contains("enabled", StringComparison.OrdinalIgnoreCase)), + TimeSpan.FromSeconds(2)); actor.Tell(new DeleteInstanceCommand("cmd-del-evt", "EvtPump", DateTimeOffset.UtcNow)); Assert.True(ExpectMsg(TimeSpan.FromSeconds(5)).Success); - - AwaitAssert(() => - { - var rows = siteLog.OfType("deployment"); - Assert.Contains(rows, r => r.Message.Contains("disabled", StringComparison.OrdinalIgnoreCase)); - Assert.Contains(rows, r => r.Message.Contains("enabled", StringComparison.OrdinalIgnoreCase)); - Assert.Contains(rows, r => r.Message.Contains("deleted", StringComparison.OrdinalIgnoreCase)); - }, TimeSpan.FromSeconds(2)); + AwaitAssert(() => Assert.Contains(siteLog.OfType("deployment"), + r => r.Message.Contains("deleted", StringComparison.OrdinalIgnoreCase)), + TimeSpan.FromSeconds(2)); } [Fact] diff --git a/tests/ZB.MOM.WW.ScadaBridge.SiteRuntime.Tests/Actors/NativeAlarmActorTests.cs b/tests/ZB.MOM.WW.ScadaBridge.SiteRuntime.Tests/Actors/NativeAlarmActorTests.cs index 4cc5f594..abcd0766 100644 --- a/tests/ZB.MOM.WW.ScadaBridge.SiteRuntime.Tests/Actors/NativeAlarmActorTests.cs +++ b/tests/ZB.MOM.WW.ScadaBridge.SiteRuntime.Tests/Actors/NativeAlarmActorTests.cs @@ -1,3 +1,4 @@ +using System.Text.Json; using Akka.Actor; using Akka.TestKit.Xunit2; using Microsoft.Extensions.Logging.Abstractions; @@ -180,6 +181,101 @@ public class NativeAlarmActorTests : TestKit, IDisposable }, TimeSpan.FromSeconds(2)); } + [Fact] + public async Task Rehydration_DoesNotEmitSiteEvent() + { + // Pre-populate SQLite with an active condition so the actor rehydrates + // it on PreStart. Rehydration replays last-known state — it is NOT a + // live transition, so it must surface upward (for the DebugView) but + // must NOT re-log an `alarm` operational event. + var condition = new AlarmConditionState(true, false, null, AlarmShelveState.Unshelved, false, 800); + await _storage.UpsertNativeAlarmAsync( + "inst", "Pressure", "T01.Hi", + JsonSerializer.Serialize(condition), DateTimeOffset.UtcNow); + + var siteLog = new FakeSiteEventLogger(); + var instance = CreateTestProbe(); + var dcl = CreateTestProbe(); + Spawn(instance.Ref, dcl.Ref, new SingleServiceProvider(siteLog)); + + // The rehydrated condition is surfaced upward... + var emitted = instance.ExpectMsg(TimeSpan.FromSeconds(2)); + Assert.Equal("T01.Hi", emitted.SourceReference); + Assert.Equal(AlarmState.Active, emitted.State); + dcl.ExpectMsg(); + + // ...but no `alarm` operational event is logged for it. + AwaitAssert( + () => Assert.Empty(siteLog.OfType("alarm")), + TimeSpan.FromSeconds(1)); + } + + [Fact] + public void SnapshotSwap_ExistingActiveCondition_DoesNotReEmit() + { + var siteLog = new FakeSiteEventLogger(); + var instance = CreateTestProbe(); + var dcl = CreateTestProbe(); + var actor = Spawn(instance.Ref, dcl.Ref, new SingleServiceProvider(siteLog)); + dcl.ExpectMsg(); + + // Live raise — the one and only `alarm` event we expect. + actor.Tell(new NativeAlarmTransitionUpdate("Opc", Transition( + "T01.Hi", AlarmTransitionKind.Raise, + new AlarmConditionState(true, false, null, AlarmShelveState.Unshelved, false, 800)))); + instance.ExpectMsg(m => m.State == AlarmState.Active); + AwaitAssert(() => Assert.Single(siteLog.OfType("alarm")), TimeSpan.FromSeconds(2)); + + // A reconnect snapshot that RE-INCLUDES the same still-active condition is + // a re-sync, not a live transition. It must NOT re-log a second `alarm` + // event (regression for the spurious-reconnect-event bug). + actor.Tell(new NativeAlarmTransitionUpdate("Opc", Transition( + "T01.Hi", AlarmTransitionKind.Snapshot, + new AlarmConditionState(true, false, null, AlarmShelveState.Unshelved, false, 800)))); + actor.Tell(new NativeAlarmTransitionUpdate("Opc", Transition( + "T01.Hi", AlarmTransitionKind.SnapshotComplete, + new AlarmConditionState(true, false, null, AlarmShelveState.Unshelved, false, 800)))); + + // The snapshot still surfaces the condition upward (DebugView re-sync)... + instance.ExpectMsg(m => m.SourceReference == "T01.Hi" && m.State == AlarmState.Active); + + // ...but the `alarm` event count stays at exactly 1 — no re-emit. + Thread.Sleep(200); // give any spurious fire-and-forget log time to land + Assert.Single(siteLog.OfType("alarm")); + } + + [Fact] + public void Acknowledge_EmitsInfoAlarmSiteEventMentioningAcknowledged() + { + var siteLog = new FakeSiteEventLogger(); + var instance = CreateTestProbe(); + var dcl = CreateTestProbe(); + var actor = Spawn(instance.Ref, dcl.Ref, new SingleServiceProvider(siteLog)); + dcl.ExpectMsg(); + + var t0 = DateTimeOffset.UtcNow; + actor.Tell(new NativeAlarmTransitionUpdate("Opc", Transition( + "T01.Hi", AlarmTransitionKind.Raise, + new AlarmConditionState(true, false, null, AlarmShelveState.Unshelved, false, 800), t0))); + instance.ExpectMsg(m => m.State == AlarmState.Active); + + // Operator acknowledges the still-active condition. The Acknowledge + // branch of LogAlarmEvent logs Info and mentions "acknowledged". + actor.Tell(new NativeAlarmTransitionUpdate("Opc", Transition( + "T01.Hi", AlarmTransitionKind.Acknowledge, + new AlarmConditionState(true, true, null, AlarmShelveState.Unshelved, false, 800), t0.AddSeconds(5)))); + instance.ExpectMsg(); + + AwaitAssert(() => + { + var rows = siteLog.OfType("alarm"); + Assert.Equal(2, rows.Count); // raise + acknowledge + var ack = rows[1]; + Assert.Equal("Info", ack.Severity); + Assert.Contains("acknowledged", ack.Message, StringComparison.OrdinalIgnoreCase); + }, TimeSpan.FromSeconds(2)); + } + void IDisposable.Dispose() { Shutdown();