harden(historian): nullable HistorizeToAveva (missing→historize) for rolling-restart-safe deserialize + middle-link test

This commit is contained in:
Joseph Doherty
2026-06-11 13:00:57 -04:00
parent c20d228384
commit 61b230d79a
4 changed files with 65 additions and 11 deletions
@@ -16,7 +16,7 @@ namespace ZB.MOM.WW.OtOpcUa.Commons.Messages.Alerts;
/// <param name="TimestampUtc">When the transition occurred.</param>
/// <param name="AlarmTypeName">OPC UA Part 9 condition subtype name — one of <c>LimitAlarm</c> / <c>DiscreteAlarm</c> / <c>OffNormalAlarm</c> / <c>AlarmCondition</c> (the base type, used as the default). The historian feed maps this onto the durable alarm-type column.</param>
/// <param name="Comment">Operator-supplied comment on ack / confirm / comment transitions; <c>null</c> for engine-driven transitions (Activated / Cleared / Shelved / …) that carry no comment.</param>
/// <param name="HistorizeToAveva">When <c>false</c>, the durable historian sink suppresses this transition (the live <c>alerts</c> fan-out is unaffected). Defaults to <c>true</c>. On a rolling restart an old-format message deserializes this as <c>false</c> (CLR default); that is safe because the writing node is always the same-version publisher — see HistorianAdapterActor.</param>
/// <param name="HistorizeToAveva">When <c>false</c>, the durable historian sink suppresses this transition (the live <c>alerts</c> fan-out is unaffected); <c>null</c> or <c>true</c> historize. <c>null</c> is the cross-version/rolling-restart case: an old-format message missing the field deserializes to <c>null</c> (CLR default for <c>bool?</c>) and is historized (safe default-on), matching the <c>AlarmTypeName</c> null-coalesce in <c>HistorianAdapterActor.Translate</c>. The producer (<c>ScriptedAlarmHostActor</c>) always sets a concrete <c>true</c>/<c>false</c>.</param>
public sealed record AlarmTransitionEvent(
string AlarmId,
string EquipmentPath,
@@ -28,4 +28,4 @@ public sealed record AlarmTransitionEvent(
DateTime TimestampUtc,
string AlarmTypeName = "AlarmCondition",
string? Comment = null,
bool HistorizeToAveva = true);
bool? HistorizeToAveva = null);
@@ -71,13 +71,12 @@ public sealed class HistorianAdapterActor : ReceiveActor
// ShouldHistorize gate keeps only the Primary writing ⇒ exactly-once across the warm pair.
// NOTE: Translate is intentionally inside the gate so Secondary/Detached nodes never allocate a
// discarded AlarmHistorianEvent.
// t.HistorizeToAveva=false is a per-alarm opt-out of DURABLE historization only — the live `alerts`
// fan-out already happened upstream (the publish is NOT gated on this flag), so we gate the SINK
// write here, not the publish. Rolling-restart-safe: the node that WRITES is always the same-version
// node that PUBLISHED (Primary or boot window), so a cross-version old→new flow only reaches the
// Secondary, which never writes — an old-format message deserializing HistorizeToAveva as the CLR
// default (false) cannot drop a Primary's historization.
Receive<AlarmTransitionEvent>(t => { if (ShouldHistorize() && t.HistorizeToAveva) _ = EnqueueAsync(Translate(t)); });
// t.HistorizeToAveva is not false: only explicit false suppresses the durable sink write. null
// (CLR default for bool?) and true both historize. null is the rolling-restart / cross-version case:
// an old-format message missing the field deserializes to null and is historized (default-on), so no
// audit row is dropped at a handover — same posture as the AlarmTypeName null-coalesce in Translate.
// The producer (ScriptedAlarmHostActor) always sets a concrete true/false.
Receive<AlarmTransitionEvent>(t => { if (ShouldHistorize() && t.HistorizeToAveva is not false) _ = EnqueueAsync(Translate(t)); });
Receive<GetStatus>(_ => Sender.Tell(_sink.GetStatus()));
@@ -178,13 +178,15 @@ public sealed class HistorianAdapterActorTests : RuntimeActorTestBase
}
/// <summary>Builds an <see cref="AlarmTransitionEvent"/> (the shape published on the <c>alerts</c>
/// DPS topic) for the translate tests, with overridable severity / type / comment / kind.</summary>
/// DPS topic) for the translate tests, with overridable severity / type / comment / kind.
/// <paramref name="historizeToAveva"/> is <c>bool?</c> so tests can pass <c>null</c> to simulate the
/// rolling-restart / cross-version case (missing field → CLR default null).</summary>
private static AlarmTransitionEvent SampleTransition(
int severity = 750,
string alarmTypeName = "LimitAlarm",
string? comment = "note",
string transitionKind = "Activated",
bool historizeToAveva = true) => new(
bool? historizeToAveva = true) => new(
AlarmId: "alm-9",
EquipmentPath: "Area/Line/Equip",
AlarmName: "HiHi",
@@ -270,6 +272,23 @@ public sealed class HistorianAdapterActorTests : RuntimeActorTestBase
sink.EnqueueCount.ShouldBe(0);
}
/// <summary>Rolling-restart default-on (T8c): when <c>HistorizeToAveva</c> is <c>null</c> — the shape
/// a cross-version / rolling-restart deserialize produces (old-format message missing the field maps to
/// the CLR default <c>null</c> for <c>bool?</c>) — a Primary node MUST historize. <c>null</c> is the
/// safe default-on posture: no audit row is dropped at a handover, matching the <c>AlarmTypeName</c>
/// null-coalesce precedent in the same <c>HistorianAdapterActor.Translate</c>.</summary>
[Fact]
public void Primary_historizes_when_flag_is_null()
{
var sink = new RecordingSink();
var actor = Sys.ActorOf(HistorianAdapterActor.Props(sink, LocalNode));
TellRedundancyRole(actor, RedundancyRole.Primary);
actor.Tell(SampleTransition(historizeToAveva: null));
AwaitAssert(() => sink.EnqueueCount.ShouldBe(1), Settle);
}
/// <summary>Severity buckets (T9): the OPC UA 11000 numeric severity on the transition maps onto
/// the coarse <see cref="AlarmSeverity"/> at the same ceilings <c>ScriptedAlarmHostActor.SeverityToInt</c>
/// emits (Low≤250, Medium≤500, High≤750, Critical otherwise). Driven end-to-end through the enqueue.</summary>
@@ -576,6 +576,42 @@ public sealed class ScriptedAlarmHostActorTests : RuntimeActorTestBase
alerts.ExpectNoMsg(TimeSpan.FromMilliseconds(500));
}
/// <summary>HistorizeToAveva flag threading (middle-link): the host MUST carry the plan's
/// <c>HistorizeToAveva</c> flag onto the <see cref="AlarmTransitionEvent"/> it publishes to the
/// <c>alerts</c> topic. Verifies both the <c>true</c> (default <see cref="Plan"/>) and the
/// <c>false</c> (<see cref="BadPlan"/> carries false on the fixture) cases so any regression in
/// <c>ScriptedAlarmHostActor.OnEngineEmission</c>'s flag threading is caught here before
/// <c>HistorianAdapterActor</c>'s opt-out gate becomes the first line of defence.</summary>
[Fact]
public void HistorizeToAveva_flag_is_threaded_onto_published_AlarmTransitionEvent()
{
var publish = CreateTestProbe();
var mux = CreateTestProbe();
var alerts = CreateTestProbe();
SubscribeToAlerts(alerts);
// Build two plans: one with HistorizeToAveva=true (Plan default), one with false.
// Use distinct ids + dep refs so both load cleanly side-by-side.
var planTrue = Plan(id: "alm-hist-true", depRef: "H.T", severity: 800); // HistorizeToAveva: true
var planFalse = Plan(id: "alm-hist-false", depRef: "H.F", severity: 800) with { HistorizeToAveva = false };
var (host, _) = Spawn(publish, mux);
host.Tell(new ScriptedAlarmHostActor.ApplyScriptedAlarms(new[] { planTrue, planFalse }));
mux.ExpectMsg<DependencyMuxActor.RegisterInterest>(Timeout); // load completed
// Activate the true-flag alarm.
host.Tell(new VirtualTagActor.DependencyValueChanged("H.T", 99, DateTime.UtcNow));
var evtTrue = alerts.FishForMessage<AlarmTransitionEvent>(
e => e.AlarmId == "alm-hist-true" && e.TransitionKind == "Activated", Timeout);
evtTrue.HistorizeToAveva.ShouldBe(true);
// Activate the false-flag alarm.
host.Tell(new VirtualTagActor.DependencyValueChanged("H.F", 99, DateTime.UtcNow));
var evtFalse = alerts.FishForMessage<AlarmTransitionEvent>(
e => e.AlarmId == "alm-hist-false" && e.TransitionKind == "Activated", Timeout);
evtFalse.HistorizeToAveva.ShouldBe(false);
}
/// <summary>Absent-node default-emit (A1): a <see cref="RedundancyStateChanged"/> snapshot that
/// contains ONLY other nodes (the host's own <see cref="LocalNode"/> is absent) must leave the
/// cached local role unchanged (null/unknown) — the host therefore defaults to emit, publishing