diff --git a/src/ZB.MOM.WW.ScadaBridge.SiteRuntime/Actors/AlarmActor.cs b/src/ZB.MOM.WW.ScadaBridge.SiteRuntime/Actors/AlarmActor.cs
index e3020b23..ca9e2db6 100644
--- a/src/ZB.MOM.WW.ScadaBridge.SiteRuntime/Actors/AlarmActor.cs
+++ b/src/ZB.MOM.WW.ScadaBridge.SiteRuntime/Actors/AlarmActor.cs
@@ -41,6 +41,17 @@ public class AlarmActor : ReceiveActor
private readonly ISiteHealthCollector? _healthCollector;
private readonly IServiceProvider? _serviceProvider;
+ ///
+ /// M1.5: the optional site operational-event log, resolved once from
+ /// at construction and cached. The
+ /// registration is process-lifetime (a singleton), so resolving once on
+ /// the actor's own thread is both correct and cheaper than a per-event
+ /// GetService on the hot path. null when no provider was
+ /// supplied (the test/no-logging path) — then
+ /// no-ops.
+ ///
+ private readonly ISiteEventLogger? _siteEventLogger;
+
///
/// M1.5: priority at or above which a computed-alarm raise is logged as
/// Error to the site event log; below it, raises log as Warning.
@@ -118,6 +129,9 @@ public class AlarmActor : ReceiveActor
_logger = logger;
_healthCollector = healthCollector;
_serviceProvider = serviceProvider;
+ // M1.5: resolve the optional site event logger once and cache it,
+ // rather than calling GetService on every alarm transition.
+ _siteEventLogger = serviceProvider?.GetService();
_priority = alarmConfig.PriorityLevel;
_onTriggerScriptName = alarmConfig.OnTriggerScriptCanonicalName;
_onTriggerCompiledScript = onTriggerCompiledScript;
@@ -323,13 +337,14 @@ public class AlarmActor : ReceiveActor
///
/// M1.5: fire-and-forget an alarm operational event to the optional
- /// . Resolved optionally and never awaited so a
- /// logging failure cannot affect alarm evaluation (matching the established
+ /// (resolved once at construction and cached
+ /// in ). Never awaited so a logging failure
+ /// cannot affect alarm evaluation (matching the established
/// ScriptActor/ScriptExecutionActor pattern).
///
private void LogAlarmEvent(string severity, string message)
{
- _ = _serviceProvider?.GetService()?.LogEventAsync(
+ _ = _siteEventLogger?.LogEventAsync(
"alarm", severity, _instanceName, $"AlarmActor:{_alarmName}", message);
}
diff --git a/src/ZB.MOM.WW.ScadaBridge.SiteRuntime/Actors/DeploymentManagerActor.cs b/src/ZB.MOM.WW.ScadaBridge.SiteRuntime/Actors/DeploymentManagerActor.cs
index 1866e878..10a6aae6 100644
--- a/src/ZB.MOM.WW.ScadaBridge.SiteRuntime/Actors/DeploymentManagerActor.cs
+++ b/src/ZB.MOM.WW.ScadaBridge.SiteRuntime/Actors/DeploymentManagerActor.cs
@@ -643,9 +643,18 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
/// on a deploy/enable/disable/delete outcome.
/// Resolved optionally and never awaited so a logging failure cannot affect the
/// deployment pipeline (matching the established ScriptActor/ScriptExecutionActor
- /// pattern). Only reads the immutable _serviceProvider field, so it is
- /// safe to call from the PipeTo continuations that report disable/delete
- /// outcomes off the actor thread.
+ /// pattern).
+ ///
+ /// Thread-safety: the disable () and delete
+ /// () paths call this from a
+ ///
+ /// continuation that runs on a thread-pool thread, NOT on the actor thread —
+ /// so it must touch only immutable, thread-safe state. It does: the only
+ /// field it reads is the readonly _serviceProvider captured at
+ /// construction (the resolved is a process
+ /// singleton). No actor-private mutable state is referenced, which is what
+ /// makes calling it off the actor thread safe.
+ ///
///
private void LogDeploymentEvent(string severity, string instanceName, string message, string? details = null)
{
diff --git a/src/ZB.MOM.WW.ScadaBridge.SiteRuntime/Actors/NativeAlarmActor.cs b/src/ZB.MOM.WW.ScadaBridge.SiteRuntime/Actors/NativeAlarmActor.cs
index bca89d4d..63a354ed 100644
--- a/src/ZB.MOM.WW.ScadaBridge.SiteRuntime/Actors/NativeAlarmActor.cs
+++ b/src/ZB.MOM.WW.ScadaBridge.SiteRuntime/Actors/NativeAlarmActor.cs
@@ -212,7 +212,14 @@ public class NativeAlarmActor : ReceiveActor
{
_alarms[sourceRef] = t;
PersistUpsert(t);
- Emit(t, t.Condition);
+ // M1.5: a snapshot replay is a re-sync of the source's current
+ // active set on (re)subscribe, NOT a live transition — surface it
+ // upward for the DebugView but do NOT re-log an `alarm` operational
+ // event. Otherwise every DCL reconnect would re-emit an `alarm`
+ // event for every already-active native condition (the
+ // synthesised return-to-normal above IS a real state change and
+ // keeps logSiteEvent: true).
+ Emit(t, t.Condition, logSiteEvent: false);
}
_snapshotBuffer.Clear();
diff --git a/src/ZB.MOM.WW.ScadaBridge.StoreAndForward/StoreAndForwardService.cs b/src/ZB.MOM.WW.ScadaBridge.StoreAndForward/StoreAndForwardService.cs
index e335ea01..b1c3b9e2 100644
--- a/src/ZB.MOM.WW.ScadaBridge.StoreAndForward/StoreAndForwardService.cs
+++ b/src/ZB.MOM.WW.ScadaBridge.StoreAndForward/StoreAndForwardService.cs
@@ -82,6 +82,18 @@ public class StoreAndForwardService
/// recognisable instead of an unattributable empty string.
///
public const string UnknownSiteSentinel = "$unknown-site";
+
+ ///
+ /// M1.7: the detail-string prefix written by
+ /// when an immediate forward attempt throws and the message is buffered for
+ /// the retry sweep. matches on this same prefix
+ /// to distinguish a forward failure (logged) from a routine
+ /// no-handler enqueue (not logged), so both the construction site and the
+ /// check reference this single constant rather than duplicating the
+ /// literal — keeping the two ends from drifting apart.
+ ///
+ private const string BufferedForRetryDetailPrefix = "Buffered for retry";
+
private Timer? _retryTimer;
private int _retryInProgress;
@@ -261,11 +273,12 @@ public class StoreAndForwardService
if (category == StoreAndForwardCategory.Notification)
{
// Spec: log only forward-failure (the immediate forward threw and the
- // notification was buffered for retry — detail "Buffered for retry:")
- // and park. A routine "No handler registered, buffered" enqueue and a
- // forward-success "Delivered" are deliberately NOT logged.
+ // notification was buffered for retry — detail prefixed
+ // BufferedForRetryDetailPrefix) and park. A routine "No handler
+ // registered, buffered" enqueue and a forward-success "Delivered"
+ // are deliberately NOT logged.
var isForwardFailure = action == "Queued"
- && detail.StartsWith("Buffered for retry", StringComparison.Ordinal);
+ && detail.StartsWith(BufferedForRetryDetailPrefix, StringComparison.Ordinal);
if (!isForwardFailure && action != "Parked")
{
return;
@@ -536,7 +549,7 @@ public class StoreAndForwardService
message.LastError = ex.Message;
await BufferAsync(message);
- RaiseActivity("Queued", category, $"Buffered for retry: {target} ({ex.Message})");
+ RaiseActivity("Queued", category, $"{BufferedForRetryDetailPrefix}: {target} ({ex.Message})");
return new StoreAndForwardResult(true, message.Id, true);
}
}
@@ -553,7 +566,7 @@ public class StoreAndForwardService
await BufferAsync(message);
RaiseActivity("Queued", category, attemptImmediateDelivery
? $"No handler registered, buffered: {target}"
- : $"Buffered for retry: {target}");
+ : $"{BufferedForRetryDetailPrefix}: {target}");
return new StoreAndForwardResult(true, message.Id, true);
}
diff --git a/tests/ZB.MOM.WW.ScadaBridge.SiteRuntime.Tests/Actors/DeploymentManagerActorTests.cs b/tests/ZB.MOM.WW.ScadaBridge.SiteRuntime.Tests/Actors/DeploymentManagerActorTests.cs
index d2a4613e..6b57e936 100644
--- a/tests/ZB.MOM.WW.ScadaBridge.SiteRuntime.Tests/Actors/DeploymentManagerActorTests.cs
+++ b/tests/ZB.MOM.WW.ScadaBridge.SiteRuntime.Tests/Actors/DeploymentManagerActorTests.cs
@@ -219,24 +219,27 @@ public class DeploymentManagerActorTests : TestKit, IDisposable
ExpectMsg(TimeSpan.FromSeconds(5));
await Task.Delay(1000);
+ // The deployment site events are emitted fire-and-forget off the actor
+ // thread (LogDeploymentEvent runs in a ContinueWith continuation), so
+ // poll for each event with AwaitAssert rather than a bare Task.Delay —
+ // a fixed sleep is racy under CI load.
actor.Tell(new DisableInstanceCommand("cmd-de1", "EvtPump", DateTimeOffset.UtcNow));
Assert.True(ExpectMsg(TimeSpan.FromSeconds(5)).Success);
- await Task.Delay(300);
+ AwaitAssert(() => Assert.Contains(siteLog.OfType("deployment"),
+ r => r.Message.Contains("disabled", StringComparison.OrdinalIgnoreCase)),
+ TimeSpan.FromSeconds(2));
actor.Tell(new EnableInstanceCommand("cmd-en1", "EvtPump", DateTimeOffset.UtcNow));
Assert.True(ExpectMsg(TimeSpan.FromSeconds(5)).Success);
- await Task.Delay(300);
+ AwaitAssert(() => Assert.Contains(siteLog.OfType("deployment"),
+ r => r.Message.Contains("enabled", StringComparison.OrdinalIgnoreCase)),
+ TimeSpan.FromSeconds(2));
actor.Tell(new DeleteInstanceCommand("cmd-del-evt", "EvtPump", DateTimeOffset.UtcNow));
Assert.True(ExpectMsg(TimeSpan.FromSeconds(5)).Success);
-
- AwaitAssert(() =>
- {
- var rows = siteLog.OfType("deployment");
- Assert.Contains(rows, r => r.Message.Contains("disabled", StringComparison.OrdinalIgnoreCase));
- Assert.Contains(rows, r => r.Message.Contains("enabled", StringComparison.OrdinalIgnoreCase));
- Assert.Contains(rows, r => r.Message.Contains("deleted", StringComparison.OrdinalIgnoreCase));
- }, TimeSpan.FromSeconds(2));
+ AwaitAssert(() => Assert.Contains(siteLog.OfType("deployment"),
+ r => r.Message.Contains("deleted", StringComparison.OrdinalIgnoreCase)),
+ TimeSpan.FromSeconds(2));
}
[Fact]
diff --git a/tests/ZB.MOM.WW.ScadaBridge.SiteRuntime.Tests/Actors/NativeAlarmActorTests.cs b/tests/ZB.MOM.WW.ScadaBridge.SiteRuntime.Tests/Actors/NativeAlarmActorTests.cs
index 4cc5f594..abcd0766 100644
--- a/tests/ZB.MOM.WW.ScadaBridge.SiteRuntime.Tests/Actors/NativeAlarmActorTests.cs
+++ b/tests/ZB.MOM.WW.ScadaBridge.SiteRuntime.Tests/Actors/NativeAlarmActorTests.cs
@@ -1,3 +1,4 @@
+using System.Text.Json;
using Akka.Actor;
using Akka.TestKit.Xunit2;
using Microsoft.Extensions.Logging.Abstractions;
@@ -180,6 +181,101 @@ public class NativeAlarmActorTests : TestKit, IDisposable
}, TimeSpan.FromSeconds(2));
}
+ [Fact]
+ public async Task Rehydration_DoesNotEmitSiteEvent()
+ {
+ // Pre-populate SQLite with an active condition so the actor rehydrates
+ // it on PreStart. Rehydration replays last-known state — it is NOT a
+ // live transition, so it must surface upward (for the DebugView) but
+ // must NOT re-log an `alarm` operational event.
+ var condition = new AlarmConditionState(true, false, null, AlarmShelveState.Unshelved, false, 800);
+ await _storage.UpsertNativeAlarmAsync(
+ "inst", "Pressure", "T01.Hi",
+ JsonSerializer.Serialize(condition), DateTimeOffset.UtcNow);
+
+ var siteLog = new FakeSiteEventLogger();
+ var instance = CreateTestProbe();
+ var dcl = CreateTestProbe();
+ Spawn(instance.Ref, dcl.Ref, new SingleServiceProvider(siteLog));
+
+ // The rehydrated condition is surfaced upward...
+ var emitted = instance.ExpectMsg(TimeSpan.FromSeconds(2));
+ Assert.Equal("T01.Hi", emitted.SourceReference);
+ Assert.Equal(AlarmState.Active, emitted.State);
+ dcl.ExpectMsg();
+
+ // ...but no `alarm` operational event is logged for it.
+ AwaitAssert(
+ () => Assert.Empty(siteLog.OfType("alarm")),
+ TimeSpan.FromSeconds(1));
+ }
+
+ [Fact]
+ public void SnapshotSwap_ExistingActiveCondition_DoesNotReEmit()
+ {
+ var siteLog = new FakeSiteEventLogger();
+ var instance = CreateTestProbe();
+ var dcl = CreateTestProbe();
+ var actor = Spawn(instance.Ref, dcl.Ref, new SingleServiceProvider(siteLog));
+ dcl.ExpectMsg();
+
+ // Live raise — the one and only `alarm` event we expect.
+ actor.Tell(new NativeAlarmTransitionUpdate("Opc", Transition(
+ "T01.Hi", AlarmTransitionKind.Raise,
+ new AlarmConditionState(true, false, null, AlarmShelveState.Unshelved, false, 800))));
+ instance.ExpectMsg(m => m.State == AlarmState.Active);
+ AwaitAssert(() => Assert.Single(siteLog.OfType("alarm")), TimeSpan.FromSeconds(2));
+
+ // A reconnect snapshot that RE-INCLUDES the same still-active condition is
+ // a re-sync, not a live transition. It must NOT re-log a second `alarm`
+ // event (regression for the spurious-reconnect-event bug).
+ actor.Tell(new NativeAlarmTransitionUpdate("Opc", Transition(
+ "T01.Hi", AlarmTransitionKind.Snapshot,
+ new AlarmConditionState(true, false, null, AlarmShelveState.Unshelved, false, 800))));
+ actor.Tell(new NativeAlarmTransitionUpdate("Opc", Transition(
+ "T01.Hi", AlarmTransitionKind.SnapshotComplete,
+ new AlarmConditionState(true, false, null, AlarmShelveState.Unshelved, false, 800))));
+
+ // The snapshot still surfaces the condition upward (DebugView re-sync)...
+ instance.ExpectMsg(m => m.SourceReference == "T01.Hi" && m.State == AlarmState.Active);
+
+ // ...but the `alarm` event count stays at exactly 1 — no re-emit.
+ Thread.Sleep(200); // give any spurious fire-and-forget log time to land
+ Assert.Single(siteLog.OfType("alarm"));
+ }
+
+ [Fact]
+ public void Acknowledge_EmitsInfoAlarmSiteEventMentioningAcknowledged()
+ {
+ var siteLog = new FakeSiteEventLogger();
+ var instance = CreateTestProbe();
+ var dcl = CreateTestProbe();
+ var actor = Spawn(instance.Ref, dcl.Ref, new SingleServiceProvider(siteLog));
+ dcl.ExpectMsg();
+
+ var t0 = DateTimeOffset.UtcNow;
+ actor.Tell(new NativeAlarmTransitionUpdate("Opc", Transition(
+ "T01.Hi", AlarmTransitionKind.Raise,
+ new AlarmConditionState(true, false, null, AlarmShelveState.Unshelved, false, 800), t0)));
+ instance.ExpectMsg(m => m.State == AlarmState.Active);
+
+ // Operator acknowledges the still-active condition. The Acknowledge
+ // branch of LogAlarmEvent logs Info and mentions "acknowledged".
+ actor.Tell(new NativeAlarmTransitionUpdate("Opc", Transition(
+ "T01.Hi", AlarmTransitionKind.Acknowledge,
+ new AlarmConditionState(true, true, null, AlarmShelveState.Unshelved, false, 800), t0.AddSeconds(5))));
+ instance.ExpectMsg();
+
+ AwaitAssert(() =>
+ {
+ var rows = siteLog.OfType("alarm");
+ Assert.Equal(2, rows.Count); // raise + acknowledge
+ var ack = rows[1];
+ Assert.Equal("Info", ack.Severity);
+ Assert.Contains("acknowledged", ack.Message, StringComparison.OrdinalIgnoreCase);
+ }, TimeSpan.FromSeconds(2));
+ }
+
void IDisposable.Dispose()
{
Shutdown();