fix(worker): resilient failover switch; FIPS-safe synthetic GUID; dup-reference guard + tests (Worker-026..028, Worker.Tests-031..033)
This commit is contained in:
@@ -313,12 +313,20 @@ public sealed class FailoverAlarmConsumer : IMxAccessAlarmConsumer
|
||||
consecutiveFailures = 0;
|
||||
cleanProbes = 0;
|
||||
|
||||
// Emit the mode-changed notification FIRST and in a guarded block, so
|
||||
// the gateway always learns the feed went degraded even if the priming
|
||||
// snapshot below throws. A handler exception here must never escape the
|
||||
// switch — escaping would (a) leave `active` flipped with no
|
||||
// notification and (b) unwind into RunAlarmPollLoopAsync's trailing
|
||||
// catch, which permanently stops alarm polling (Worker-026).
|
||||
RaiseModeChanged(AlarmProviderMode.Subtag, reason, hresult);
|
||||
|
||||
// Warm the standby snapshot for the gateway hand-off. The gateway
|
||||
// reconciles state from this snapshot, so the return value is not
|
||||
// consumed here — the call exists for its priming side effect.
|
||||
_ = standby.SnapshotActiveAlarms();
|
||||
|
||||
RaiseModeChanged(AlarmProviderMode.Subtag, reason, hresult);
|
||||
// consumed here — the call exists for its priming side effect. A
|
||||
// failure to prime is non-fatal: the switch has already completed and
|
||||
// been announced, and the standby's live transitions will still flow.
|
||||
TryPrimeStandbySnapshot();
|
||||
}
|
||||
|
||||
private void SwitchToPrimary(string reason, int hresult)
|
||||
@@ -327,14 +335,49 @@ public sealed class FailoverAlarmConsumer : IMxAccessAlarmConsumer
|
||||
mode = AlarmProviderMode.Alarmmgr;
|
||||
consecutiveFailures = 0;
|
||||
cleanProbes = 0;
|
||||
|
||||
// Guarded so a ProviderModeChanged handler exception cannot escape into
|
||||
// the STA poll loop and kill alarm delivery (Worker-026).
|
||||
RaiseModeChanged(AlarmProviderMode.Alarmmgr, reason, hresult);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Primes the standby snapshot for the gateway hand-off, swallowing any
|
||||
/// failure. The switch has already completed and the mode change has
|
||||
/// already been announced before this runs, so a priming failure must
|
||||
/// not abort the switch or unwind into the poll loop.
|
||||
/// </summary>
|
||||
private void TryPrimeStandbySnapshot()
|
||||
{
|
||||
try
|
||||
{
|
||||
_ = standby.SnapshotActiveAlarms();
|
||||
}
|
||||
catch (Exception ex) when (ex is not OutOfMemoryException)
|
||||
{
|
||||
// Non-fatal: the standby is active and its live transitions still
|
||||
// flow; the gateway will reconcile from subsequent records. Do not
|
||||
// let a transient snapshot failure escape and stop the poll loop.
|
||||
}
|
||||
}
|
||||
|
||||
private void RaiseModeChanged(AlarmProviderMode newMode, string reason, int hresult)
|
||||
{
|
||||
ProviderModeChanged?.Invoke(
|
||||
this,
|
||||
new AlarmProviderModeChange(newMode, reason, hresult, DateTime.UtcNow));
|
||||
try
|
||||
{
|
||||
ProviderModeChanged?.Invoke(
|
||||
this,
|
||||
new AlarmProviderModeChange(newMode, reason, hresult, DateTime.UtcNow));
|
||||
}
|
||||
catch (Exception ex) when (ex is not OutOfMemoryException)
|
||||
{
|
||||
// A subscriber's OnProviderModeChanged handler threw (e.g. the
|
||||
// AlarmCommandHandler's eventQueue.Enqueue hitting capacity). The
|
||||
// switch itself has already taken effect; swallow so the failure
|
||||
// cannot unwind into RunAlarmPollLoopAsync and permanently stop
|
||||
// alarm polling (Worker-026). The event-queue overflow it most
|
||||
// likely signals is already surfaced as a fault on the IPC path.
|
||||
}
|
||||
}
|
||||
|
||||
private void OnChildTransition(object? sender, MxAlarmTransitionEvent e)
|
||||
|
||||
@@ -42,8 +42,22 @@ public sealed class SubtagAlarmStateMachine
|
||||
|
||||
foreach (AlarmSubtagTarget target in targets)
|
||||
{
|
||||
// Guard duplicate references symmetrically with the dup-address guard
|
||||
// in Bind: two watch-list entries that share an AlarmFullReference but
|
||||
// differ in subtag addresses would otherwise silently overwrite the
|
||||
// earlier _statesByReference entry while its addresses stay bound to an
|
||||
// orphaned (and therefore invisible) AlarmState, producing silently
|
||||
// inconsistent synthesized state. Fail fast at subscribe time instead.
|
||||
string reference = target.AlarmFullReference ?? string.Empty;
|
||||
if (_statesByReference.ContainsKey(reference))
|
||||
{
|
||||
throw new ArgumentException(
|
||||
$"Duplicate alarm full reference '{reference}' is bound to more than one alarm target.",
|
||||
nameof(targets));
|
||||
}
|
||||
|
||||
var state = new AlarmState(target);
|
||||
_statesByReference[target.AlarmFullReference] = state;
|
||||
_statesByReference[reference] = state;
|
||||
|
||||
Bind(target.ActiveSubtag, state, SubtagRole.Active);
|
||||
Bind(target.AckedSubtag, state, SubtagRole.Acked);
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
using System;
|
||||
using System.Security.Cryptography;
|
||||
using System.Text;
|
||||
|
||||
namespace ZB.MOM.WW.MxGateway.Worker.MxAccess;
|
||||
@@ -11,8 +10,29 @@ namespace ZB.MOM.WW.MxGateway.Worker.MxAccess;
|
||||
/// repeated transitions for the same alarm reference correlate downstream
|
||||
/// (acknowledge, snapshot, OPC UA mapping) without an alarmmgr-supplied GUID.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// The 128-bit value is computed with a fixed FNV-1a hash over the UTF-8
|
||||
/// bytes of the reference, deliberately <strong>not</strong> via
|
||||
/// <c>System.Security.Cryptography</c>. On .NET Framework 4.8
|
||||
/// <c>MD5.Create()</c> returns the non-validated
|
||||
/// <c>MD5CryptoServiceProvider</c>, whose constructor throws under the
|
||||
/// Windows FIPS-compliance policy ("not part of the Windows Platform FIPS
|
||||
/// validated cryptographic algorithms"). Because this derivation needs only
|
||||
/// determinism and distinctness — never cryptographic strength — a plain
|
||||
/// non-crypto hash avoids the FIPS gate entirely, so the subtag fallback
|
||||
/// keeps working on regulated (FIPS-enabled) hosts exactly when it is needed.
|
||||
/// </remarks>
|
||||
internal static class SyntheticAlarmGuid
|
||||
{
|
||||
// 64-bit FNV-1a constants (RFC-style; widely used reference values).
|
||||
private const ulong FnvOffsetBasis = 14695981039346656037UL;
|
||||
private const ulong FnvPrime = 1099511628211UL;
|
||||
|
||||
// A second independent seed for the high 8 bytes so the full 128-bit value
|
||||
// is well-distributed across distinct references rather than two correlated
|
||||
// halves of the same single-pass hash.
|
||||
private const ulong FnvSecondSeed = 1469598103934665603UL;
|
||||
|
||||
/// <summary>
|
||||
/// Produces a stable <see cref="Guid"/> for the given alarm reference.
|
||||
/// The same reference always maps to the same GUID; distinct references
|
||||
@@ -32,11 +52,39 @@ internal static class SyntheticAlarmGuid
|
||||
|
||||
byte[] bytes = Encoding.UTF8.GetBytes(reference);
|
||||
|
||||
// MD5 is used purely for a stable, non-cryptographic identity mapping
|
||||
// (reference -> 16-byte GUID), never for security. Its 128-bit output
|
||||
// fits a GUID exactly, which is why it is preferred here.
|
||||
using MD5 md5 = MD5.Create();
|
||||
byte[] hash = md5.ComputeHash(bytes);
|
||||
return new Guid(hash);
|
||||
// Two independent FNV-1a passes fill the low and high 64 bits of the
|
||||
// 128-bit value. The second pass mixes the running length into its seed
|
||||
// so single-character differences and re-orderings still diverge in both
|
||||
// halves, avoiding correlated-half collisions a single pass would risk.
|
||||
ulong low = FnvOffsetBasis;
|
||||
ulong high = FnvSecondSeed;
|
||||
for (int i = 0; i < bytes.Length; i++)
|
||||
{
|
||||
byte b = bytes[i];
|
||||
|
||||
low ^= b;
|
||||
low *= FnvPrime;
|
||||
|
||||
high ^= unchecked(b + (ulong)i);
|
||||
high *= FnvPrime;
|
||||
}
|
||||
|
||||
// Fold the length in so the empty string and other short inputs are not
|
||||
// degenerate (an all-zero / Guid.Empty result is undesirable downstream).
|
||||
low ^= (ulong)bytes.Length;
|
||||
low *= FnvPrime;
|
||||
|
||||
byte[] guidBytes = new byte[16];
|
||||
WriteUInt64(guidBytes, 0, low);
|
||||
WriteUInt64(guidBytes, 8, high);
|
||||
return new Guid(guidBytes);
|
||||
}
|
||||
|
||||
private static void WriteUInt64(byte[] buffer, int offset, ulong value)
|
||||
{
|
||||
for (int i = 0; i < 8; i++)
|
||||
{
|
||||
buffer[offset + i] = (byte)(value >> (i * 8));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user