Compare commits

..

2 Commits

Author SHA1 Message Date
Joseph Doherty
483f55557c Phase 6.3 Stream B + Stream D (core) — ServiceLevelCalculator + RecoveryStateManager + ApplyLeaseRegistry
Lands the pure-logic heart of Phase 6.3. OPC UA node wiring (Stream C),
RedundancyCoordinator topology loader (Stream A), Admin UI + metrics (Stream E),
and client interop tests (Stream F) are follow-up work — tracked as
tasks #145-150.

New Server.Redundancy sub-namespace:

- ServiceLevelCalculator — pure 8-state matrix per decision #154. Inputs:
  role, selfHealthy, peerUa/HttpHealthy, applyInProgress, recoveryDwellMet,
  topologyValid, operatorMaintenance. Output: OPC UA Part 5 §6.3.34 Byte.
  Reserved bands (0=Maintenance, 1=NoData, 2=InvalidTopology) override
  everything; operational bands occupy 30..255.
  Key invariants:
    * Authoritative-Primary = 255, Authoritative-Backup = 100.
    * Isolated-Primary = 230 (retains authority with peer down).
    * Isolated-Backup = 80 (does NOT auto-promote — non-transparent model).
    * Primary-Mid-Apply = 200, Backup-Mid-Apply = 50; apply dominates
      peer-unreachable per Stream C.4 integration expectation.
    * Recovering-Primary = 180, Recovering-Backup = 30.
    * Standalone treats healthy as Authoritative-Primary (no peer concept).
- ServiceLevelBand enum — labels every numeric band for logs + Admin UI.
  Values match the calculator table exactly; compliance script asserts
  drift detection.
- RecoveryStateManager — holds Recovering band until (dwell ≥ 60s default)
  AND (one publish witness observed). Re-fault resets both gates so a
  flapping node doesn't shortcut through recovery twice.
- ApplyLeaseRegistry — keyed on (ConfigGenerationId, PublishRequestId) per
  decision #162. BeginApplyLease returns an IAsyncDisposable so every exit
  path (success, exception, cancellation, dispose-twice) closes the lease.
  ApplyMaxDuration watchdog (10 min default) via PruneStale tick forces
  close after a crashed publisher so ServiceLevel can't stick at mid-apply.

Tests (40 new, all pass):
- ServiceLevelCalculatorTests (27): reserved bands override; self-unhealthy
  → NoData; invalid topology demotes both nodes to 2; authoritative primary
  255; backup 100; isolated primary 230 retains authority; isolated backup
  80 does not promote; http-only unreachable triggers isolated; mid-apply
  primary 200; mid-apply backup 50; apply dominates peer-unreachable; recovering
  primary 180; recovering backup 30; standalone treats healthy as 255;
  classify round-trips every band including Unknown sentinel.
- RecoveryStateManagerTests (6): never-faulted auto-meets dwell; faulted-only
  returns true (semantics-doc test — coordinator short-circuits on
  selfHealthy=false); recovered without witness never meets; witness without
  dwell never meets; witness + dwell-elapsed meets; re-fault resets.
- ApplyLeaseRegistryTests (7): empty registry not-in-progress; begin+dispose
  closes; dispose on exception still closes; dispose twice safe; concurrent
  leases isolated; watchdog closes stale; watchdog leaves recent alone.

Full solution dotnet test: 1137 passing (Phase 6.2 shipped at 1097, Phase 6.3
B + D core = +40 = 1137). Pre-existing Client.CLI Subscribe flake unchanged.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-19 09:56:34 -04:00
d269dcaa1b Merge pull request (#88) - Phase 6.2 exit gate 2026-04-19 09:47:58 -04:00
6 changed files with 708 additions and 0 deletions

View File

@@ -0,0 +1,85 @@
using System.Collections.Concurrent;
namespace ZB.MOM.WW.OtOpcUa.Server.Redundancy;
/// <summary>
/// Tracks in-progress publish-generation apply leases keyed on
/// <c>(ConfigGenerationId, PublishRequestId)</c>. Per decision #162 a sealed lease pattern
/// ensures <see cref="IsApplyInProgress"/> reflects every exit path (success / exception /
/// cancellation) because the IAsyncDisposable returned by <see cref="BeginApplyLease"/>
/// decrements unconditionally.
/// </summary>
/// <remarks>
/// A watchdog loop calls <see cref="PruneStale"/> periodically with the configured
/// <see cref="ApplyMaxDuration"/>; any lease older than that is force-closed so a crashed
/// publisher can't pin the node at <see cref="ServiceLevelBand.PrimaryMidApply"/>.
/// </remarks>
public sealed class ApplyLeaseRegistry
{
private readonly ConcurrentDictionary<LeaseKey, DateTime> _leases = new();
private readonly TimeProvider _timeProvider;
public TimeSpan ApplyMaxDuration { get; }
public ApplyLeaseRegistry(TimeSpan? applyMaxDuration = null, TimeProvider? timeProvider = null)
{
ApplyMaxDuration = applyMaxDuration ?? TimeSpan.FromMinutes(10);
_timeProvider = timeProvider ?? TimeProvider.System;
}
/// <summary>
/// Register a new lease. Returns an <see cref="IAsyncDisposable"/> whose disposal
/// decrements the registry; use <c>await using</c> in the caller so every exit path
/// closes the lease.
/// </summary>
public IAsyncDisposable BeginApplyLease(long generationId, Guid publishRequestId)
{
var key = new LeaseKey(generationId, publishRequestId);
_leases[key] = _timeProvider.GetUtcNow().UtcDateTime;
return new LeaseScope(this, key);
}
/// <summary>True when at least one apply lease is currently open.</summary>
public bool IsApplyInProgress => !_leases.IsEmpty;
/// <summary>Current open-lease count — diagnostics only.</summary>
public int OpenLeaseCount => _leases.Count;
/// <summary>Force-close any lease older than <see cref="ApplyMaxDuration"/>. Watchdog tick.</summary>
/// <returns>Number of leases the watchdog closed on this tick.</returns>
public int PruneStale()
{
var now = _timeProvider.GetUtcNow().UtcDateTime;
var closed = 0;
foreach (var kv in _leases)
{
if (now - kv.Value > ApplyMaxDuration && _leases.TryRemove(kv.Key, out _))
closed++;
}
return closed;
}
private void Release(LeaseKey key) => _leases.TryRemove(key, out _);
private readonly record struct LeaseKey(long GenerationId, Guid PublishRequestId);
private sealed class LeaseScope : IAsyncDisposable
{
private readonly ApplyLeaseRegistry _owner;
private readonly LeaseKey _key;
private int _disposed;
public LeaseScope(ApplyLeaseRegistry owner, LeaseKey key)
{
_owner = owner;
_key = key;
}
public ValueTask DisposeAsync()
{
if (Interlocked.Exchange(ref _disposed, 1) == 0)
_owner.Release(_key);
return ValueTask.CompletedTask;
}
}
}

View File

@@ -0,0 +1,65 @@
namespace ZB.MOM.WW.OtOpcUa.Server.Redundancy;
/// <summary>
/// Tracks the Recovering-band dwell for a node after a <c>Faulted → Healthy</c> transition.
/// Per decision #154 and Phase 6.3 Stream B.4 a node that has just returned to health stays
/// in the Recovering band (180 Primary / 30 Backup) until BOTH: (a) the configured
/// <see cref="DwellTime"/> has elapsed, AND (b) at least one successful publish-witness
/// read has been observed.
/// </summary>
/// <remarks>
/// Purely in-memory, no I/O. The coordinator feeds events into <see cref="MarkFaulted"/>,
/// <see cref="MarkRecovered"/>, and <see cref="RecordPublishWitness"/>; <see cref="IsDwellMet"/>
/// becomes true only after both conditions converge.
/// </remarks>
public sealed class RecoveryStateManager
{
private readonly TimeSpan _dwellTime;
private readonly TimeProvider _timeProvider;
/// <summary>Last time the node transitioned Faulted → Healthy. Null until first recovery.</summary>
private DateTime? _recoveredUtc;
/// <summary>True once a publish-witness read has succeeded after the last recovery.</summary>
private bool _witnessed;
public TimeSpan DwellTime => _dwellTime;
public RecoveryStateManager(TimeSpan? dwellTime = null, TimeProvider? timeProvider = null)
{
_dwellTime = dwellTime ?? TimeSpan.FromSeconds(60);
_timeProvider = timeProvider ?? TimeProvider.System;
}
/// <summary>Report that the node has entered the Faulted state.</summary>
public void MarkFaulted()
{
_recoveredUtc = null;
_witnessed = false;
}
/// <summary>Report that the node has transitioned Faulted → Healthy; dwell clock starts now.</summary>
public void MarkRecovered()
{
_recoveredUtc = _timeProvider.GetUtcNow().UtcDateTime;
_witnessed = false;
}
/// <summary>Report a successful publish-witness read.</summary>
public void RecordPublishWitness() => _witnessed = true;
/// <summary>
/// True when the dwell is considered met: either the node never faulted in the first
/// place, or both (dwell time elapsed + publish witness recorded) since the last
/// recovery. False means the coordinator should report Recovering-band ServiceLevel.
/// </summary>
public bool IsDwellMet()
{
if (_recoveredUtc is null) return true; // never faulted → dwell N/A
if (!_witnessed) return false;
var elapsed = _timeProvider.GetUtcNow().UtcDateTime - _recoveredUtc.Value;
return elapsed >= _dwellTime;
}
}

View File

@@ -0,0 +1,131 @@
using ZB.MOM.WW.OtOpcUa.Configuration.Enums;
namespace ZB.MOM.WW.OtOpcUa.Server.Redundancy;
/// <summary>
/// Pure-function translator from the redundancy-state inputs (role, self health, peer
/// reachability via HTTP + UA probes, apply-in-progress flag, recovery dwell, topology
/// validity) to the OPC UA Part 5 §6.3.34 <see cref="byte"/> ServiceLevel value.
/// </summary>
/// <remarks>
/// <para>Per decision #154 the 8-state matrix avoids the reserved bands (0=Maintenance,
/// 1=NoData) for operational states. Operational values occupy 2..255 so a spec-compliant
/// client that cuts over on "&lt;3 = unhealthy" keeps working without its vendor treating
/// the server as "under maintenance" during normal runtime.</para>
///
/// <para>This class is pure — no threads, no I/O. The coordinator that owns it re-evaluates
/// on every input change and pushes the new byte through an <c>IObserver&lt;byte&gt;</c> to
/// the OPC UA ServiceLevel variable. Tests exercise the full matrix without touching a UA
/// stack.</para>
/// </remarks>
public static class ServiceLevelCalculator
{
/// <summary>Compute the ServiceLevel for the given inputs.</summary>
/// <param name="role">Role declared for this node in the shared config DB.</param>
/// <param name="selfHealthy">This node's own health (from Phase 6.1 /healthz).</param>
/// <param name="peerUaHealthy">Peer node reachable via OPC UA probe.</param>
/// <param name="peerHttpHealthy">Peer node reachable via HTTP /healthz probe.</param>
/// <param name="applyInProgress">True while this node is inside a publish-generation apply window.</param>
/// <param name="recoveryDwellMet">True once the post-fault dwell + publish-witness conditions are met.</param>
/// <param name="topologyValid">False when the cluster has detected &gt;1 Primary (InvalidTopology demotes both nodes).</param>
/// <param name="operatorMaintenance">True when operator has declared the node in maintenance.</param>
public static byte Compute(
RedundancyRole role,
bool selfHealthy,
bool peerUaHealthy,
bool peerHttpHealthy,
bool applyInProgress,
bool recoveryDwellMet,
bool topologyValid,
bool operatorMaintenance = false)
{
// Reserved bands first — they override everything per OPC UA Part 5 §6.3.34.
if (operatorMaintenance) return (byte)ServiceLevelBand.Maintenance; // 0
if (!selfHealthy) return (byte)ServiceLevelBand.NoData; // 1
if (!topologyValid) return (byte)ServiceLevelBand.InvalidTopology; // 2
// Standalone nodes have no peer — treat as authoritative when healthy.
if (role == RedundancyRole.Standalone)
return (byte)(applyInProgress ? ServiceLevelBand.PrimaryMidApply : ServiceLevelBand.AuthoritativePrimary);
var isPrimary = role == RedundancyRole.Primary;
// Apply-in-progress band dominates recovery + isolation (client should cut to peer).
if (applyInProgress)
return (byte)(isPrimary ? ServiceLevelBand.PrimaryMidApply : ServiceLevelBand.BackupMidApply);
// Post-fault recovering — hold until dwell + witness satisfied.
if (!recoveryDwellMet)
return (byte)(isPrimary ? ServiceLevelBand.RecoveringPrimary : ServiceLevelBand.RecoveringBackup);
// Peer unreachable (either probe fails) → isolated band. Per decision #154 Primary
// retains authority at 230 when isolated; Backup signals 80 "take over if asked" and
// does NOT auto-promote (non-transparent model).
var peerReachable = peerUaHealthy && peerHttpHealthy;
if (!peerReachable)
return (byte)(isPrimary ? ServiceLevelBand.IsolatedPrimary : ServiceLevelBand.IsolatedBackup);
return (byte)(isPrimary ? ServiceLevelBand.AuthoritativePrimary : ServiceLevelBand.AuthoritativeBackup);
}
/// <summary>Labels a ServiceLevel byte with its matrix band name — for logs + Admin UI.</summary>
public static ServiceLevelBand Classify(byte value) => value switch
{
(byte)ServiceLevelBand.Maintenance => ServiceLevelBand.Maintenance,
(byte)ServiceLevelBand.NoData => ServiceLevelBand.NoData,
(byte)ServiceLevelBand.InvalidTopology => ServiceLevelBand.InvalidTopology,
(byte)ServiceLevelBand.RecoveringBackup => ServiceLevelBand.RecoveringBackup,
(byte)ServiceLevelBand.BackupMidApply => ServiceLevelBand.BackupMidApply,
(byte)ServiceLevelBand.IsolatedBackup => ServiceLevelBand.IsolatedBackup,
(byte)ServiceLevelBand.AuthoritativeBackup => ServiceLevelBand.AuthoritativeBackup,
(byte)ServiceLevelBand.RecoveringPrimary => ServiceLevelBand.RecoveringPrimary,
(byte)ServiceLevelBand.PrimaryMidApply => ServiceLevelBand.PrimaryMidApply,
(byte)ServiceLevelBand.IsolatedPrimary => ServiceLevelBand.IsolatedPrimary,
(byte)ServiceLevelBand.AuthoritativePrimary => ServiceLevelBand.AuthoritativePrimary,
_ => ServiceLevelBand.Unknown,
};
}
/// <summary>
/// Named bands of the 8-state ServiceLevel matrix. Numeric values match the
/// <see cref="ServiceLevelCalculator"/> table exactly; any drift will be caught by the
/// Phase 6.3 compliance script.
/// </summary>
public enum ServiceLevelBand : byte
{
/// <summary>Operator-declared maintenance. Reserved per OPC UA Part 5 §6.3.34.</summary>
Maintenance = 0,
/// <summary>Unreachable / Faulted. Reserved per OPC UA Part 5 §6.3.34.</summary>
NoData = 1,
/// <summary>Detected-inconsistency band — &gt;1 Primary observed runtime; both nodes self-demote.</summary>
InvalidTopology = 2,
/// <summary>Backup post-fault, dwell not met.</summary>
RecoveringBackup = 30,
/// <summary>Backup inside a publish-apply window.</summary>
BackupMidApply = 50,
/// <summary>Backup with unreachable Primary — "take over if asked"; does NOT auto-promote.</summary>
IsolatedBackup = 80,
/// <summary>Backup nominal operation.</summary>
AuthoritativeBackup = 100,
/// <summary>Primary post-fault, dwell not met.</summary>
RecoveringPrimary = 180,
/// <summary>Primary inside a publish-apply window.</summary>
PrimaryMidApply = 200,
/// <summary>Primary with unreachable peer, self serving — retains authority.</summary>
IsolatedPrimary = 230,
/// <summary>Primary nominal operation.</summary>
AuthoritativePrimary = 255,
/// <summary>Sentinel for unrecognised byte values.</summary>
Unknown = 254,
}

View File

@@ -0,0 +1,118 @@
using Shouldly;
using Xunit;
using ZB.MOM.WW.OtOpcUa.Server.Redundancy;
namespace ZB.MOM.WW.OtOpcUa.Server.Tests;
[Trait("Category", "Unit")]
public sealed class ApplyLeaseRegistryTests
{
private static readonly DateTime T0 = new(2026, 4, 19, 12, 0, 0, DateTimeKind.Utc);
private sealed class FakeTimeProvider : TimeProvider
{
public DateTime Utc { get; set; } = T0;
public override DateTimeOffset GetUtcNow() => new(Utc, TimeSpan.Zero);
}
[Fact]
public async Task EmptyRegistry_NotInProgress()
{
var reg = new ApplyLeaseRegistry();
reg.IsApplyInProgress.ShouldBeFalse();
await Task.Yield();
}
[Fact]
public async Task BeginAndDispose_ClosesLease()
{
var reg = new ApplyLeaseRegistry();
await using (reg.BeginApplyLease(1, Guid.NewGuid()))
{
reg.IsApplyInProgress.ShouldBeTrue();
reg.OpenLeaseCount.ShouldBe(1);
}
reg.IsApplyInProgress.ShouldBeFalse();
}
[Fact]
public async Task Dispose_OnException_StillCloses()
{
var reg = new ApplyLeaseRegistry();
var publishId = Guid.NewGuid();
await Should.ThrowAsync<InvalidOperationException>(async () =>
{
await using var lease = reg.BeginApplyLease(1, publishId);
throw new InvalidOperationException("publish failed");
});
reg.IsApplyInProgress.ShouldBeFalse("await-using semantics must close the lease on exception");
}
[Fact]
public async Task Dispose_TwiceIsSafe()
{
var reg = new ApplyLeaseRegistry();
var lease = reg.BeginApplyLease(1, Guid.NewGuid());
await lease.DisposeAsync();
await lease.DisposeAsync();
reg.IsApplyInProgress.ShouldBeFalse();
}
[Fact]
public async Task MultipleLeases_Concurrent_StayIsolated()
{
var reg = new ApplyLeaseRegistry();
var id1 = Guid.NewGuid();
var id2 = Guid.NewGuid();
await using var lease1 = reg.BeginApplyLease(1, id1);
await using var lease2 = reg.BeginApplyLease(2, id2);
reg.OpenLeaseCount.ShouldBe(2);
await lease1.DisposeAsync();
reg.IsApplyInProgress.ShouldBeTrue("lease2 still open");
await lease2.DisposeAsync();
reg.IsApplyInProgress.ShouldBeFalse();
}
[Fact]
public async Task Watchdog_ClosesStaleLeases()
{
var clock = new FakeTimeProvider();
var reg = new ApplyLeaseRegistry(applyMaxDuration: TimeSpan.FromMinutes(10), timeProvider: clock);
_ = reg.BeginApplyLease(1, Guid.NewGuid()); // intentional leak; not awaited / disposed
// Lease still young → no-op.
clock.Utc = T0.AddMinutes(5);
reg.PruneStale().ShouldBe(0);
reg.IsApplyInProgress.ShouldBeTrue();
// Past the watchdog horizon → force-close.
clock.Utc = T0.AddMinutes(11);
var closed = reg.PruneStale();
closed.ShouldBe(1);
reg.IsApplyInProgress.ShouldBeFalse("ServiceLevel can't stick at mid-apply after a crashed publisher");
await Task.Yield();
}
[Fact]
public async Task Watchdog_LeavesRecentLeaseAlone()
{
var clock = new FakeTimeProvider();
var reg = new ApplyLeaseRegistry(applyMaxDuration: TimeSpan.FromMinutes(10), timeProvider: clock);
await using var lease = reg.BeginApplyLease(1, Guid.NewGuid());
clock.Utc = T0.AddMinutes(3);
reg.PruneStale().ShouldBe(0);
reg.IsApplyInProgress.ShouldBeTrue();
}
}

View File

@@ -0,0 +1,92 @@
using Shouldly;
using Xunit;
using ZB.MOM.WW.OtOpcUa.Server.Redundancy;
namespace ZB.MOM.WW.OtOpcUa.Server.Tests;
[Trait("Category", "Unit")]
public sealed class RecoveryStateManagerTests
{
private static readonly DateTime T0 = new(2026, 4, 19, 12, 0, 0, DateTimeKind.Utc);
private sealed class FakeTimeProvider : TimeProvider
{
public DateTime Utc { get; set; } = T0;
public override DateTimeOffset GetUtcNow() => new(Utc, TimeSpan.Zero);
}
[Fact]
public void NeverFaulted_DwellIsAutomaticallyMet()
{
var mgr = new RecoveryStateManager();
mgr.IsDwellMet().ShouldBeTrue();
}
[Fact]
public void AfterFault_Only_IsDwellMet_Returns_True_ButCallerDoesntQueryDuringFaulted()
{
// Documented semantics: IsDwellMet is only consulted when selfHealthy=true (i.e. the
// node has recovered into Healthy). During Faulted the coordinator short-circuits on
// the self-health check and never calls IsDwellMet. So returning true here is harmless;
// the test captures the intent so a future "return false during Faulted" tweak has to
// deliberately change this test first.
var mgr = new RecoveryStateManager();
mgr.MarkFaulted();
mgr.IsDwellMet().ShouldBeTrue();
}
[Fact]
public void AfterRecovery_NoWitness_DwellNotMet_EvenAfterElapsed()
{
var clock = new FakeTimeProvider();
var mgr = new RecoveryStateManager(dwellTime: TimeSpan.FromSeconds(60), timeProvider: clock);
mgr.MarkFaulted();
mgr.MarkRecovered();
clock.Utc = T0.AddSeconds(120);
mgr.IsDwellMet().ShouldBeFalse("dwell elapsed but no publish witness — must NOT escape Recovering band");
}
[Fact]
public void AfterRecovery_WitnessButTooSoon_DwellNotMet()
{
var clock = new FakeTimeProvider();
var mgr = new RecoveryStateManager(dwellTime: TimeSpan.FromSeconds(60), timeProvider: clock);
mgr.MarkFaulted();
mgr.MarkRecovered();
mgr.RecordPublishWitness();
clock.Utc = T0.AddSeconds(30);
mgr.IsDwellMet().ShouldBeFalse("witness ok but dwell 30s < 60s");
}
[Fact]
public void AfterRecovery_Witness_And_DwellElapsed_Met()
{
var clock = new FakeTimeProvider();
var mgr = new RecoveryStateManager(dwellTime: TimeSpan.FromSeconds(60), timeProvider: clock);
mgr.MarkFaulted();
mgr.MarkRecovered();
mgr.RecordPublishWitness();
clock.Utc = T0.AddSeconds(61);
mgr.IsDwellMet().ShouldBeTrue();
}
[Fact]
public void ReFault_ResetsWitness_AndDwellClock()
{
var clock = new FakeTimeProvider();
var mgr = new RecoveryStateManager(dwellTime: TimeSpan.FromSeconds(60), timeProvider: clock);
mgr.MarkFaulted();
mgr.MarkRecovered();
mgr.RecordPublishWitness();
clock.Utc = T0.AddSeconds(61);
mgr.IsDwellMet().ShouldBeTrue();
mgr.MarkFaulted();
mgr.MarkRecovered();
clock.Utc = T0.AddSeconds(100); // re-entered Recovering, no new witness
mgr.IsDwellMet().ShouldBeFalse("new recovery needs its own witness");
}
}

View File

@@ -0,0 +1,217 @@
using Shouldly;
using Xunit;
using ZB.MOM.WW.OtOpcUa.Configuration.Enums;
using ZB.MOM.WW.OtOpcUa.Server.Redundancy;
namespace ZB.MOM.WW.OtOpcUa.Server.Tests;
[Trait("Category", "Unit")]
public sealed class ServiceLevelCalculatorTests
{
// --- Reserved bands (0, 1, 2) ---
[Fact]
public void OperatorMaintenance_Overrides_Everything()
{
var v = ServiceLevelCalculator.Compute(
RedundancyRole.Primary,
selfHealthy: true, peerUaHealthy: true, peerHttpHealthy: true,
applyInProgress: false, recoveryDwellMet: true, topologyValid: true,
operatorMaintenance: true);
v.ShouldBe((byte)ServiceLevelBand.Maintenance);
}
[Fact]
public void UnhealthySelf_ReturnsNoData()
{
var v = ServiceLevelCalculator.Compute(
RedundancyRole.Primary,
selfHealthy: false, peerUaHealthy: true, peerHttpHealthy: true,
applyInProgress: false, recoveryDwellMet: true, topologyValid: true);
v.ShouldBe((byte)ServiceLevelBand.NoData);
}
[Fact]
public void InvalidTopology_Demotes_BothNodes_To_2()
{
var primary = ServiceLevelCalculator.Compute(
RedundancyRole.Primary,
selfHealthy: true, peerUaHealthy: true, peerHttpHealthy: true,
applyInProgress: false, recoveryDwellMet: true, topologyValid: false);
var secondary = ServiceLevelCalculator.Compute(
RedundancyRole.Secondary,
selfHealthy: true, peerUaHealthy: true, peerHttpHealthy: true,
applyInProgress: false, recoveryDwellMet: true, topologyValid: false);
primary.ShouldBe((byte)ServiceLevelBand.InvalidTopology);
secondary.ShouldBe((byte)ServiceLevelBand.InvalidTopology);
}
// --- Operational bands (authoritative) ---
[Fact]
public void Authoritative_Primary_Is_255()
{
var v = ServiceLevelCalculator.Compute(
RedundancyRole.Primary,
selfHealthy: true, peerUaHealthy: true, peerHttpHealthy: true,
applyInProgress: false, recoveryDwellMet: true, topologyValid: true);
v.ShouldBe((byte)ServiceLevelBand.AuthoritativePrimary);
v.ShouldBe((byte)255);
}
[Fact]
public void Authoritative_Backup_Is_100()
{
var v = ServiceLevelCalculator.Compute(
RedundancyRole.Secondary,
selfHealthy: true, peerUaHealthy: true, peerHttpHealthy: true,
applyInProgress: false, recoveryDwellMet: true, topologyValid: true);
v.ShouldBe((byte)100);
}
// --- Isolated bands ---
[Fact]
public void IsolatedPrimary_PeerUnreachable_Is_230_RetainsAuthority()
{
var v = ServiceLevelCalculator.Compute(
RedundancyRole.Primary,
selfHealthy: true, peerUaHealthy: false, peerHttpHealthy: true,
applyInProgress: false, recoveryDwellMet: true, topologyValid: true);
v.ShouldBe((byte)230);
}
[Fact]
public void IsolatedBackup_PrimaryUnreachable_Is_80_DoesNotPromote()
{
var v = ServiceLevelCalculator.Compute(
RedundancyRole.Secondary,
selfHealthy: true, peerUaHealthy: false, peerHttpHealthy: false,
applyInProgress: false, recoveryDwellMet: true, topologyValid: true);
v.ShouldBe((byte)80, "Backup isolates at 80 — doesn't auto-promote to 255");
}
[Fact]
public void HttpOnly_Unreachable_TriggersIsolated()
{
// Either probe failing marks peer unreachable — UA probe is authoritative but HTTP is
// the fast-fail short-circuit; either missing means "not a valid peer right now".
var v = ServiceLevelCalculator.Compute(
RedundancyRole.Primary,
selfHealthy: true, peerUaHealthy: true, peerHttpHealthy: false,
applyInProgress: false, recoveryDwellMet: true, topologyValid: true);
v.ShouldBe((byte)230);
}
// --- Apply-mid bands ---
[Fact]
public void PrimaryMidApply_Is_200()
{
var v = ServiceLevelCalculator.Compute(
RedundancyRole.Primary,
selfHealthy: true, peerUaHealthy: true, peerHttpHealthy: true,
applyInProgress: true, recoveryDwellMet: true, topologyValid: true);
v.ShouldBe((byte)200);
}
[Fact]
public void BackupMidApply_Is_50()
{
var v = ServiceLevelCalculator.Compute(
RedundancyRole.Secondary,
selfHealthy: true, peerUaHealthy: true, peerHttpHealthy: true,
applyInProgress: true, recoveryDwellMet: true, topologyValid: true);
v.ShouldBe((byte)50);
}
[Fact]
public void ApplyInProgress_Dominates_PeerUnreachable()
{
// Per Stream C.4 integration-test expectation: mid-apply + peer down → apply wins (200).
var v = ServiceLevelCalculator.Compute(
RedundancyRole.Primary,
selfHealthy: true, peerUaHealthy: false, peerHttpHealthy: false,
applyInProgress: true, recoveryDwellMet: true, topologyValid: true);
v.ShouldBe((byte)200);
}
// --- Recovering bands ---
[Fact]
public void RecoveringPrimary_Is_180()
{
var v = ServiceLevelCalculator.Compute(
RedundancyRole.Primary,
selfHealthy: true, peerUaHealthy: true, peerHttpHealthy: true,
applyInProgress: false, recoveryDwellMet: false, topologyValid: true);
v.ShouldBe((byte)180);
}
[Fact]
public void RecoveringBackup_Is_30()
{
var v = ServiceLevelCalculator.Compute(
RedundancyRole.Secondary,
selfHealthy: true, peerUaHealthy: true, peerHttpHealthy: true,
applyInProgress: false, recoveryDwellMet: false, topologyValid: true);
v.ShouldBe((byte)30);
}
// --- Standalone node (no peer) ---
[Fact]
public void Standalone_IsAuthoritativePrimary_WhenHealthy()
{
var v = ServiceLevelCalculator.Compute(
RedundancyRole.Standalone,
selfHealthy: true, peerUaHealthy: false, peerHttpHealthy: false,
applyInProgress: false, recoveryDwellMet: true, topologyValid: true);
v.ShouldBe((byte)255, "Standalone has no peer — treat healthy as authoritative");
}
[Fact]
public void Standalone_MidApply_Is_200()
{
var v = ServiceLevelCalculator.Compute(
RedundancyRole.Standalone,
selfHealthy: true, peerUaHealthy: false, peerHttpHealthy: false,
applyInProgress: true, recoveryDwellMet: true, topologyValid: true);
v.ShouldBe((byte)200);
}
// --- Classify round-trip ---
[Theory]
[InlineData((byte)0, ServiceLevelBand.Maintenance)]
[InlineData((byte)1, ServiceLevelBand.NoData)]
[InlineData((byte)2, ServiceLevelBand.InvalidTopology)]
[InlineData((byte)30, ServiceLevelBand.RecoveringBackup)]
[InlineData((byte)50, ServiceLevelBand.BackupMidApply)]
[InlineData((byte)80, ServiceLevelBand.IsolatedBackup)]
[InlineData((byte)100, ServiceLevelBand.AuthoritativeBackup)]
[InlineData((byte)180, ServiceLevelBand.RecoveringPrimary)]
[InlineData((byte)200, ServiceLevelBand.PrimaryMidApply)]
[InlineData((byte)230, ServiceLevelBand.IsolatedPrimary)]
[InlineData((byte)255, ServiceLevelBand.AuthoritativePrimary)]
[InlineData((byte)123, ServiceLevelBand.Unknown)]
public void Classify_RoundTrips_EveryBand(byte value, ServiceLevelBand expected)
{
ServiceLevelCalculator.Classify(value).ShouldBe(expected);
}
}