feat(runtime): #112 ScriptedAlarmActor state persistence via IAlarmActorStateStore
Some checks failed
v2-ci / build (push) Failing after 42s
v2-ci / unit-tests (tests/Core/ZB.MOM.WW.OtOpcUa.Cluster.Tests) (push) Has been skipped
v2-ci / unit-tests (tests/Server/ZB.MOM.WW.OtOpcUa.ControlPlane.Tests) (push) Has been skipped
v2-ci / unit-tests (tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests) (push) Has been skipped
v2-ci / unit-tests (tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests) (push) Has been skipped
v2-ci / unit-tests (tests/Server/ZB.MOM.WW.OtOpcUa.Security.Tests) (push) Has been skipped
v2-ci / integration (push) Has been skipped
Some checks failed
v2-ci / build (push) Failing after 42s
v2-ci / unit-tests (tests/Core/ZB.MOM.WW.OtOpcUa.Cluster.Tests) (push) Has been skipped
v2-ci / unit-tests (tests/Server/ZB.MOM.WW.OtOpcUa.ControlPlane.Tests) (push) Has been skipped
v2-ci / unit-tests (tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests) (push) Has been skipped
v2-ci / unit-tests (tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests) (push) Has been skipped
v2-ci / unit-tests (tests/Server/ZB.MOM.WW.OtOpcUa.Security.Tests) (push) Has been skipped
v2-ci / integration (push) Has been skipped
ScriptedAlarmActor now survives actor restart: PreStart loads from the configured store + restores in-memory state; every Transition() fires a fire-and-forget save. ActiveState still re-derives from the evaluator on first tick (Phase 7 decision #14), but Acked state + lastAckUser persist verbatim so operators don't re-ack across an outage. Three pieces: - IAlarmActorStateStore seam in Commons.Engines, with the AlarmActorStateSnapshot record (alarmId / state / lastTransitionUtc / lastAckUser) and NullAlarmActorStateStore default. - EfAlarmActorStateStore in Runtime.ScriptedAlarms — production adapter over the existing ScriptedAlarmState table in ConfigDb. Maps the actor's 3-state enum to the table's AckedState column (Active⇒Unacknowledged, Acknowledged⇒Acknowledged, Inactive⇒ Acknowledged). Concurrency conflicts are logged + dropped — the next transition writes again. - ScriptedAlarmActor PreStart load (async, piped back as StateRestored) + Transition save. New Props overload takes the store; default is NullAlarmActorStateStore so tests stay quiet. Tests: Runtime 52 -> 57 (+5): - Transition writes Active then Acknowledged snapshots with lastAckUser populated - PreStart with persisted Active state restores so a subsequent AcknowledgeAlarm fires (not ignored as it would be from Inactive) - Empty store boots Inactive (AcknowledgeAlarm correctly ignored) - EfAlarmActorStateStore Save + Load round-trips via in-memory EF - Load for unknown alarmId returns null All 6 v2 test suites green: 157 tests passing. Closes #112. F9 (#80) remaining residual is predicate binding to Core.ScriptedAlarms.ScriptedAlarmEngine — split as F9b in tasks JSON.
This commit is contained in:
@@ -0,0 +1,41 @@
|
||||
namespace ZB.MOM.WW.OtOpcUa.Commons.Engines;
|
||||
|
||||
/// <summary>
|
||||
/// Persistence seam for <c>ScriptedAlarmActor</c>'s in-memory state across actor restarts.
|
||||
/// Captures only the slice the actor's 3-state machine needs (Inactive / Active /
|
||||
/// Acknowledged + last transition + last-ack user). The fuller GxP audit trail
|
||||
/// (<see cref="Configuration.Entities.ScriptedAlarmState"/>'s Comments/Confirmed/Shelving)
|
||||
/// stays in the production engine binding — this seam is the small surface the actor
|
||||
/// consumes directly.
|
||||
/// </summary>
|
||||
public interface IAlarmActorStateStore
|
||||
{
|
||||
Task<AlarmActorStateSnapshot?> LoadAsync(string alarmId, CancellationToken ct);
|
||||
Task SaveAsync(AlarmActorStateSnapshot snapshot, CancellationToken ct);
|
||||
}
|
||||
|
||||
/// <summary>Persisted slice of <c>ScriptedAlarmActor</c>'s state. Active is NOT persisted —
|
||||
/// it re-derives from the evaluator on startup per Phase 7 decision #14. <c>State</c> here
|
||||
/// distinguishes Acknowledged vs not-yet-acknowledged for cases where the actor came up
|
||||
/// Active and operator interaction had already happened.</summary>
|
||||
/// <param name="AlarmId">Matches <c>ScriptedAlarm.ScriptedAlarmId</c>.</param>
|
||||
/// <param name="State">Inactive / Active / Acknowledged — the actor's 3-state enum, projected to string.</param>
|
||||
/// <param name="LastTransitionUtc">When the actor last transitioned.</param>
|
||||
/// <param name="LastAckUser">Who acknowledged most recently. Null when never acked.</param>
|
||||
public sealed record AlarmActorStateSnapshot(
|
||||
string AlarmId,
|
||||
string State,
|
||||
DateTime LastTransitionUtc,
|
||||
string? LastAckUser);
|
||||
|
||||
/// <summary>No-op default. Bound when no production store is configured (tests, smoke runs).
|
||||
/// Load returns null → actor boots Inactive; Save is a no-op so state doesn't leak.</summary>
|
||||
public sealed class NullAlarmActorStateStore : IAlarmActorStateStore
|
||||
{
|
||||
public static readonly NullAlarmActorStateStore Instance = new();
|
||||
private NullAlarmActorStateStore() { }
|
||||
public Task<AlarmActorStateSnapshot?> LoadAsync(string alarmId, CancellationToken ct) =>
|
||||
Task.FromResult<AlarmActorStateSnapshot?>(null);
|
||||
public Task SaveAsync(AlarmActorStateSnapshot snapshot, CancellationToken ct) =>
|
||||
Task.CompletedTask;
|
||||
}
|
||||
@@ -0,0 +1,111 @@
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using ZB.MOM.WW.OtOpcUa.Commons.Engines;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.Entities;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Runtime.ScriptedAlarms;
|
||||
|
||||
/// <summary>
|
||||
/// Production-side <see cref="IAlarmActorStateStore"/> backed by the
|
||||
/// <see cref="ScriptedAlarmState"/> table in the central config DB. The actor's
|
||||
/// 3-state enum projects into the table's two persisted dimensions: Acked + an
|
||||
/// internal "_lastActiveState" recorded via a synthetic mapping (Inactive ⇒ Acked,
|
||||
/// Active ⇒ Unacked, Acknowledged ⇒ Acked). ActiveState itself is deliberately NOT
|
||||
/// persisted — re-derives from the evaluator on startup (Phase 7 decision #14).
|
||||
/// </summary>
|
||||
public sealed class EfAlarmActorStateStore : IAlarmActorStateStore
|
||||
{
|
||||
private readonly IDbContextFactory<OtOpcUaConfigDbContext> _dbFactory;
|
||||
private readonly ILogger<EfAlarmActorStateStore> _logger;
|
||||
|
||||
public EfAlarmActorStateStore(
|
||||
IDbContextFactory<OtOpcUaConfigDbContext> dbFactory,
|
||||
ILogger<EfAlarmActorStateStore> logger)
|
||||
{
|
||||
_dbFactory = dbFactory;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
public async Task<AlarmActorStateSnapshot?> LoadAsync(string alarmId, CancellationToken ct)
|
||||
{
|
||||
using var db = await _dbFactory.CreateDbContextAsync(ct).ConfigureAwait(false);
|
||||
var row = await db.ScriptedAlarmStates.AsNoTracking()
|
||||
.FirstOrDefaultAsync(r => r.ScriptedAlarmId == alarmId, ct)
|
||||
.ConfigureAwait(false);
|
||||
if (row is null) return null;
|
||||
|
||||
var state = MapAckedToActorState(row.AckedState);
|
||||
return new AlarmActorStateSnapshot(
|
||||
AlarmId: alarmId,
|
||||
State: state,
|
||||
LastTransitionUtc: row.UpdatedAtUtc,
|
||||
LastAckUser: row.LastAckUser);
|
||||
}
|
||||
|
||||
public async Task SaveAsync(AlarmActorStateSnapshot snapshot, CancellationToken ct)
|
||||
{
|
||||
using var db = await _dbFactory.CreateDbContextAsync(ct).ConfigureAwait(false);
|
||||
var row = await db.ScriptedAlarmStates
|
||||
.FirstOrDefaultAsync(r => r.ScriptedAlarmId == snapshot.AlarmId, ct)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
var ackedState = MapActorStateToAcked(snapshot.State);
|
||||
if (row is null)
|
||||
{
|
||||
db.ScriptedAlarmStates.Add(new ScriptedAlarmState
|
||||
{
|
||||
ScriptedAlarmId = snapshot.AlarmId,
|
||||
EnabledState = "Enabled",
|
||||
AckedState = ackedState,
|
||||
ConfirmedState = "Confirmed",
|
||||
ShelvingState = "Unshelved",
|
||||
LastAckUser = snapshot.LastAckUser,
|
||||
LastAckUtc = string.Equals(snapshot.State, "Acknowledged", StringComparison.Ordinal)
|
||||
? snapshot.LastTransitionUtc
|
||||
: null,
|
||||
UpdatedAtUtc = snapshot.LastTransitionUtc,
|
||||
CommentsJson = "[]",
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
row.AckedState = ackedState;
|
||||
row.LastAckUser = snapshot.LastAckUser ?? row.LastAckUser;
|
||||
if (string.Equals(snapshot.State, "Acknowledged", StringComparison.Ordinal))
|
||||
row.LastAckUtc = snapshot.LastTransitionUtc;
|
||||
row.UpdatedAtUtc = snapshot.LastTransitionUtc;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
await db.SaveChangesAsync(ct).ConfigureAwait(false);
|
||||
}
|
||||
catch (DbUpdateConcurrencyException ex)
|
||||
{
|
||||
// Two actors racing to save the same alarm is benign — the last writer wins on
|
||||
// UpdatedAtUtc, and the next transition on either side will write again. Log
|
||||
// + drop so a race doesn't crash the dispatcher.
|
||||
_logger.LogDebug(ex, "EfAlarmActorStateStore: concurrency conflict for {AlarmId}; dropping save",
|
||||
snapshot.AlarmId);
|
||||
}
|
||||
}
|
||||
|
||||
private static string MapActorStateToAcked(string actorState) => actorState switch
|
||||
{
|
||||
"Active" => "Unacknowledged",
|
||||
"Acknowledged" => "Acknowledged",
|
||||
// Inactive maps to Acknowledged — when an alarm clears, nothing is left to ack.
|
||||
_ => "Acknowledged",
|
||||
};
|
||||
|
||||
private static string MapAckedToActorState(string ackedState)
|
||||
{
|
||||
// Only Active distinguishes from Acked — Inactive comes from a re-eval, not from
|
||||
// the table. Persisted "Unacknowledged" implies the actor was last Active +
|
||||
// un-acked; we restore it to Active so a restart doesn't drop pending operator work.
|
||||
return string.Equals(ackedState, "Unacknowledged", StringComparison.Ordinal)
|
||||
? "Active"
|
||||
: "Acknowledged";
|
||||
}
|
||||
}
|
||||
@@ -38,36 +38,84 @@ public sealed class ScriptedAlarmActor : ReceiveActor
|
||||
|
||||
private readonly AlarmConfig _config;
|
||||
private readonly IScriptedAlarmEvaluator _evaluator;
|
||||
private readonly IAlarmActorStateStore _stateStore;
|
||||
private readonly Func<DPSPublisher>? _publisherFactory;
|
||||
private readonly ILoggingAdapter _log = Context.GetLogger();
|
||||
private readonly Dictionary<string, object?> _dependencies = new(StringComparer.Ordinal);
|
||||
|
||||
private ScriptedAlarmActorState _state = ScriptedAlarmActorState.Inactive;
|
||||
private string? _lastAckUser;
|
||||
|
||||
public sealed record StateRestored(ScriptedAlarmActorState State, string? LastAckUser);
|
||||
|
||||
public static Props Props(
|
||||
AlarmConfig config,
|
||||
IScriptedAlarmEvaluator? evaluator = null,
|
||||
Func<DPSPublisher>? publisherFactory = null) =>
|
||||
Func<DPSPublisher>? publisherFactory = null,
|
||||
IAlarmActorStateStore? stateStore = null) =>
|
||||
Akka.Actor.Props.Create(() => new ScriptedAlarmActor(
|
||||
config,
|
||||
evaluator ?? NullScriptedAlarmEvaluator.Instance,
|
||||
publisherFactory));
|
||||
publisherFactory,
|
||||
stateStore ?? NullAlarmActorStateStore.Instance));
|
||||
|
||||
/// <summary>Legacy single-arg ctor kept for callers that only care about the state machine
|
||||
/// (no engine evaluation, no DPS fan-out). Equivalent to <c>Props(new AlarmConfig(...))</c>.</summary>
|
||||
/// (no engine evaluation, no DPS fan-out, no persistence). Equivalent to <c>Props(new AlarmConfig(...))</c>.</summary>
|
||||
public static Props Props(string alarmId) =>
|
||||
Props(new AlarmConfig(alarmId, alarmId, EquipmentPath: "", Severity: 500, Predicate: null));
|
||||
|
||||
public ScriptedAlarmActor(AlarmConfig config, IScriptedAlarmEvaluator evaluator, Func<DPSPublisher>? publisherFactory)
|
||||
public ScriptedAlarmActor(
|
||||
AlarmConfig config,
|
||||
IScriptedAlarmEvaluator evaluator,
|
||||
Func<DPSPublisher>? publisherFactory,
|
||||
IAlarmActorStateStore stateStore)
|
||||
{
|
||||
_config = config;
|
||||
_evaluator = evaluator;
|
||||
_publisherFactory = publisherFactory;
|
||||
_stateStore = stateStore;
|
||||
|
||||
Receive<DependencyValueChanged>(OnDependencyChanged);
|
||||
Receive<ConditionMet>(_ => { if (_state == ScriptedAlarmActorState.Inactive) Transition(ScriptedAlarmActorState.Active, user: "system"); });
|
||||
Receive<AcknowledgeAlarm>(msg => { if (_state == ScriptedAlarmActorState.Active) Transition(ScriptedAlarmActorState.Acknowledged, user: msg.Actor); });
|
||||
Receive<ConditionCleared>(_ => { if (_state != ScriptedAlarmActorState.Inactive) Transition(ScriptedAlarmActorState.Inactive, user: "system"); });
|
||||
Receive<StateRestored>(OnStateRestored);
|
||||
}
|
||||
|
||||
protected override void PreStart()
|
||||
{
|
||||
// Load persisted state — when the store has a row, restore in-memory state before the
|
||||
// first dependency-change arrives. Async I/O is piped back as StateRestored so we don't
|
||||
// block the message-loop thread; until it arrives the actor stays at the default Inactive.
|
||||
var self = Self;
|
||||
_ = Task.Run(async () =>
|
||||
{
|
||||
try
|
||||
{
|
||||
var snapshot = await _stateStore.LoadAsync(_config.AlarmId, CancellationToken.None)
|
||||
.ConfigureAwait(false);
|
||||
if (snapshot is null) return;
|
||||
if (!Enum.TryParse<ScriptedAlarmActorState>(snapshot.State, ignoreCase: true, out var parsed))
|
||||
return;
|
||||
self.Tell(new StateRestored(parsed, snapshot.LastAckUser));
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_log.Warning(ex, "ScriptedAlarm {Id}: state-store load failed; booting Inactive",
|
||||
_config.AlarmId);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
private void OnStateRestored(StateRestored msg)
|
||||
{
|
||||
// Active is re-derived from the evaluator at the next DependencyValueChanged — we still
|
||||
// restore Active here so operators don't lose the in-flight transition if a restart races
|
||||
// ahead of the next eval. The first evaluator tick will correct it if the condition cleared.
|
||||
_state = msg.State;
|
||||
_lastAckUser = msg.LastAckUser;
|
||||
_log.Info("ScriptedAlarm {Id}: restored persisted state {State} (lastAck={User})",
|
||||
_config.AlarmId, _state, _lastAckUser ?? "(none)");
|
||||
}
|
||||
|
||||
private void OnDependencyChanged(DependencyValueChanged msg)
|
||||
@@ -110,10 +158,12 @@ public sealed class ScriptedAlarmActor : ReceiveActor
|
||||
{
|
||||
var prev = _state;
|
||||
_state = next;
|
||||
if (next == ScriptedAlarmActorState.Acknowledged) _lastAckUser = user;
|
||||
_log.Info("ScriptedAlarm {Id}: {From} → {To}", _config.AlarmId, prev, next);
|
||||
|
||||
var nowUtc = DateTime.UtcNow;
|
||||
Context.Parent.Tell(new StateChanged(_config.AlarmId, next, nowUtc));
|
||||
PersistStateAsync(nowUtc);
|
||||
|
||||
var kind = next switch
|
||||
{
|
||||
@@ -159,4 +209,28 @@ public sealed class ScriptedAlarmActor : ReceiveActor
|
||||
}
|
||||
DistributedPubSub.Get(Context.System).Mediator.Tell(new Publish(topic, payload));
|
||||
}
|
||||
|
||||
private void PersistStateAsync(DateTime nowUtc)
|
||||
{
|
||||
var snapshot = new AlarmActorStateSnapshot(
|
||||
AlarmId: _config.AlarmId,
|
||||
State: _state.ToString(),
|
||||
LastTransitionUtc: nowUtc,
|
||||
LastAckUser: _lastAckUser);
|
||||
|
||||
// Fire-and-forget. Save failures get logged but don't block the message loop —
|
||||
// the worst case is a restart loses one transition, which then re-derives from
|
||||
// the evaluator's next tick anyway.
|
||||
_ = Task.Run(async () =>
|
||||
{
|
||||
try
|
||||
{
|
||||
await _stateStore.SaveAsync(snapshot, CancellationToken.None).ConfigureAwait(false);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_log.Warning(ex, "ScriptedAlarm {Id}: state-store save failed", _config.AlarmId);
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user