feat(historian): subscribe to alerts topic + translate to AlarmHistorianEvent (Primary-gated, exactly-once)

This commit is contained in:
Joseph Doherty
2026-06-11 11:18:26 -04:00
parent d2cc4a1222
commit bb42e5834a
2 changed files with 195 additions and 20 deletions
@@ -1,10 +1,13 @@
using Akka.Actor;
using Akka.Cluster.Tools.PublishSubscribe;
using Akka.Event;
using ZB.MOM.WW.OtOpcUa.Commons.Messages.Alerts;
using ZB.MOM.WW.OtOpcUa.Commons.Messages.Redundancy;
using ZB.MOM.WW.OtOpcUa.Commons.Types;
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
using ZB.MOM.WW.OtOpcUa.Core.AlarmHistorian;
using ZB.MOM.WW.OtOpcUa.Runtime.OpcUa;
using ZB.MOM.WW.OtOpcUa.Runtime.ScriptedAlarms;
namespace ZB.MOM.WW.OtOpcUa.Runtime.Historian;
@@ -57,39 +60,83 @@ public sealed class HistorianAdapterActor : ReceiveActor
_sink = sink;
_localNode = localNode;
Receive<AlarmHistorianEvent>(evt =>
{
// Warm-standby dedup (forward-looking): only the Primary historizes to the durable sink so a
// future per-node feeder writes exactly once. Default-write until told Secondary/Detached so
// single-node deploys + the boot window never drop historization. (Currently the actor has no
// production feeder — this is a defensive guard for when engine→historian wiring lands.)
if (_localRole is RedundancyRole.Secondary or RedundancyRole.Detached)
{
return;
}
// A direct AlarmHistorianEvent source (kept for a future engine→historian path) goes through the
// same Primary gate as the alerts-topic feed below.
Receive<AlarmHistorianEvent>(Historize);
// Fire-and-forget: SqliteStoreAndForwardSink persists to local SQLite synchronously
// inside EnqueueAsync (it returns once the row is committed), so we don't block on
// network/pipe latency. Failures are surfaced via GetStatus's LastError + drain state.
_ = EnqueueAsync(evt);
});
// Live alarm transitions arrive off the cluster `alerts` DPS topic (subscribed in PreStart). The
// Primary ScriptedAlarmHostActor publishes each transition ONCE, but DistributedPubSub fans that
// single message to EVERY node's subscriber — including BOTH central nodes' historian adapters. The
// Primary gate in Historize keeps only the Primary writing ⇒ exactly-once across the warm pair.
Receive<AlarmTransitionEvent>(t => Historize(Translate(t)));
Receive<GetStatus>(_ => Sender.Tell(_sink.GetStatus()));
// Cluster redundancy snapshots (published on the `redundancy-state` topic, subscribed in PreStart)
// cache this node's role so the AlarmHistorianEvent handler can gate the durable sink enqueue to the
// Primary. The PubSub Subscribe is acked back to Self (no-op below).
// cache this node's role so the historize gate can scope the durable sink enqueue to the Primary.
// The PubSub Subscribe acks (redundancy-state + alerts) are acked back to Self (no-op below).
Receive<RedundancyStateChanged>(OnRedundancyStateChanged);
Receive<SubscribeAck>(_ => { });
}
/// <summary>Subscribes to the <c>redundancy-state</c> topic so cluster role changes land as
/// <see cref="RedundancyStateChanged"/> and cache this node's role — the historian enqueue is gated to
/// the Primary so a future per-node feeder doesn't double-write across the warm-redundant pair.</summary>
/// <summary>Gates a historian event to the Primary then enqueues it fire-and-forget. Warm-standby
/// dedup: only the Primary historizes to the durable sink so the per-node alerts feed writes exactly
/// once. Default-write until told Secondary/Detached so single-node deploys + the boot window never
/// drop historization. Fire-and-forget because <see cref="SqliteStoreAndForwardSink"/> persists to
/// local SQLite synchronously inside <see cref="EnqueueAsync"/> (it returns once the row is committed),
/// so we don't block the mailbox on network/pipe latency; failures surface via <see cref="GetStatus"/>.</summary>
/// <param name="evt">The historian event to gate + enqueue.</param>
private void Historize(AlarmHistorianEvent evt)
{
if (_localRole is RedundancyRole.Secondary or RedundancyRole.Detached)
{
return;
}
_ = EnqueueAsync(evt);
}
/// <summary>Translates a live <see cref="AlarmTransitionEvent"/> (the alerts-topic shape) into the
/// historian's <see cref="AlarmHistorianEvent"/>. <c>AlarmTypeName</c> is null-coalesced to
/// "AlarmCondition": during a rolling restart Akka's JSON serializer applies the CLR default (null) to
/// an old-format message's <c>AlarmTypeName</c> rather than the record's call-site default, and the
/// historian must never store a null alarm type.</summary>
/// <param name="t">The live transition published on the <c>alerts</c> topic.</param>
/// <returns>The translated historian event.</returns>
private static AlarmHistorianEvent Translate(AlarmTransitionEvent t) => new(
AlarmId: t.AlarmId,
EquipmentPath: t.EquipmentPath,
AlarmName: t.AlarmName,
AlarmTypeName: string.IsNullOrEmpty(t.AlarmTypeName) ? "AlarmCondition" : t.AlarmTypeName,
Severity: ToSeverity(t.Severity),
EventKind: t.TransitionKind,
Message: t.Message,
User: t.User,
Comment: t.Comment,
TimestampUtc: t.TimestampUtc);
/// <summary>Maps the OPC UA 11000 numeric severity carried on a transition back to the coarse
/// <see cref="AlarmSeverity"/>, inverting <c>ScriptedAlarmHostActor.SeverityToInt</c>'s bucket ceilings
/// (Low=250, Medium=500, High=750, Critical=1000).</summary>
/// <param name="severity">The OPC UA 11000 numeric severity.</param>
/// <returns>The coarse alarm severity bucket.</returns>
private static AlarmSeverity ToSeverity(int severity) => severity switch
{
<= 250 => AlarmSeverity.Low,
<= 500 => AlarmSeverity.Medium,
<= 750 => AlarmSeverity.High,
_ => AlarmSeverity.Critical,
};
/// <summary>Subscribes to the <c>redundancy-state</c> topic (so cluster role changes land as
/// <see cref="RedundancyStateChanged"/> and cache this node's role — the historize enqueue is gated to
/// the Primary so the alerts feed doesn't double-write across the warm-redundant pair) and to the
/// <c>alerts</c> topic (so live <see cref="AlarmTransitionEvent"/>s are translated + historized).</summary>
protected override void PreStart()
{
_mediator = DistributedPubSub.Get(Context.System).Mediator;
_mediator.Tell(new Subscribe(OpcUaPublishActor.RedundancyStateTopic, Self));
_mediator.Tell(new Subscribe(ScriptedAlarmHostActor.AlertsTopic, Self));
base.PreStart();
}