Files
lmxopcua/tests/ZB.MOM.WW.OtOpcUa.Core.Tests/Stability/WedgeDetectorTests.cs
Joseph Doherty 1d9008e354 Phase 6.1 Stream B.3/B.4/B.5 — MemoryRecycle + ScheduledRecycleScheduler + demand-aware WedgeDetector
Closes out Stream B per docs/v2/implementation/phase-6-1-resilience-and-observability.md.

Core.Abstractions:
- IDriverSupervisor — process-level supervisor contract a Tier C driver's
  out-of-process topology provides (Galaxy Proxy/Supervisor implements this in
  a follow-up Driver.Galaxy wiring PR). Concerns: DriverInstanceId + RecycleAsync.
  Tier A/B drivers don't implement this; Stream B code asserts tier == C before
  ever calling it.

Core.Stability:
- MemoryRecycle — companion to MemoryTracking. On HardBreach, invokes the
  supervisor IFF tier == C AND a supervisor is wired. Tier A/B HardBreach logs
  a promotion-to-Tier-C recommendation and returns false. Soft/None/Warming
  never triggers a recycle at any tier.
- ScheduledRecycleScheduler — Tier C opt-in periodic recycler per decision #67.
  Ctor throws for Tier A/B (structural guard — scheduled recycle on an
  in-process driver would kill every OPC UA session and every co-hosted
  driver). TickAsync(now) advances the schedule by one interval per fire;
  RequestRecycleNowAsync drives an ad-hoc recycle without shifting the cron.
- WedgeDetector — demand-aware per decision #147. Classify(state, demand, now)
  returns:
    * NotApplicable when driver state != Healthy
    * Idle when Healthy + no pending work (bulkhead=0 && monitored=0 && historic=0)
    * Healthy when Healthy + pending work + progress within threshold
    * Faulted when Healthy + pending work + no progress within threshold
  Threshold clamps to min 60 s. DemandSignal.HasPendingWork ORs the three counters.
  The three false-wedge cases the plan calls out all stay Healthy: idle
  subscription-only, slow historian backfill making progress, write-only burst
  with drained bulkhead.

Tests (22 new, all pass):
- MemoryRecycleTests (7): Tier C hard-breach requests recycle; Tier A/B
  hard-breach never requests; Tier C without supervisor no-ops; soft-breach
  at every tier never requests; None/Warming never request.
- ScheduledRecycleSchedulerTests (6): ctor throws for A/B; zero/negative
  interval throws; tick before due no-ops; tick at/after due fires once and
  advances; RequestRecycleNow fires immediately without shifting schedule;
  multiple fires across ticks advance one interval each.
- WedgeDetectorTests (9): threshold clamp to 60 s; unhealthy driver always
  NotApplicable; idle subscription stays Idle; pending+fresh progress stays
  Healthy; pending+stale progress is Faulted; MonitoredItems active but no
  publish is Faulted; MonitoredItems active with fresh publish stays Healthy;
  historian backfill with fresh progress stays Healthy; write-only burst with
  empty bulkhead is Idle; HasPendingWork theory for any non-zero counter.

Full solution dotnet test: 989 passing (baseline 906, +83 for Phase 6.1 so far).
Pre-existing Client.CLI Subscribe flake unchanged.

Stream B complete. Next up: Stream C (health endpoints + structured logging).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-19 08:03:18 -04:00

113 lines
4.7 KiB
C#

using Shouldly;
using Xunit;
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
using ZB.MOM.WW.OtOpcUa.Core.Stability;
namespace ZB.MOM.WW.OtOpcUa.Core.Tests.Stability;
[Trait("Category", "Unit")]
public sealed class WedgeDetectorTests
{
private static readonly DateTime Now = new(2026, 4, 19, 12, 0, 0, DateTimeKind.Utc);
private static readonly TimeSpan Threshold = TimeSpan.FromSeconds(120);
[Fact]
public void SubSixtySecondThreshold_ClampsToSixty()
{
var detector = new WedgeDetector(TimeSpan.FromSeconds(10));
detector.Threshold.ShouldBe(TimeSpan.FromSeconds(60));
}
[Fact]
public void Unhealthy_Driver_AlwaysNotApplicable()
{
var detector = new WedgeDetector(Threshold);
var demand = new DemandSignal(BulkheadDepth: 5, ActiveMonitoredItems: 10, QueuedHistoryReads: 0, LastProgressUtc: Now.AddMinutes(-10));
detector.Classify(DriverState.Faulted, demand, Now).ShouldBe(WedgeVerdict.NotApplicable);
detector.Classify(DriverState.Degraded, demand, Now).ShouldBe(WedgeVerdict.NotApplicable);
detector.Classify(DriverState.Initializing, demand, Now).ShouldBe(WedgeVerdict.NotApplicable);
}
[Fact]
public void Idle_Subscription_Only_StaysIdle()
{
// Idle driver: bulkhead 0, monitored items 0, no history reads queued.
// Even if LastProgressUtc is ancient, the verdict is Idle, not Faulted.
var detector = new WedgeDetector(Threshold);
var demand = new DemandSignal(0, 0, 0, Now.AddHours(-12));
detector.Classify(DriverState.Healthy, demand, Now).ShouldBe(WedgeVerdict.Idle);
}
[Fact]
public void PendingWork_WithRecentProgress_StaysHealthy()
{
var detector = new WedgeDetector(Threshold);
var demand = new DemandSignal(BulkheadDepth: 2, ActiveMonitoredItems: 0, QueuedHistoryReads: 0, LastProgressUtc: Now.AddSeconds(-30));
detector.Classify(DriverState.Healthy, demand, Now).ShouldBe(WedgeVerdict.Healthy);
}
[Fact]
public void PendingWork_WithStaleProgress_IsFaulted()
{
var detector = new WedgeDetector(Threshold);
var demand = new DemandSignal(BulkheadDepth: 2, ActiveMonitoredItems: 0, QueuedHistoryReads: 0, LastProgressUtc: Now.AddMinutes(-5));
detector.Classify(DriverState.Healthy, demand, Now).ShouldBe(WedgeVerdict.Faulted);
}
[Fact]
public void MonitoredItems_Active_ButNoRecentPublish_IsFaulted()
{
// Subscription-only driver with live MonitoredItems but no publish progress within threshold
// is a real wedge — this is the case the previous "no successful Read" formulation used
// to miss (no reads ever happen).
var detector = new WedgeDetector(Threshold);
var demand = new DemandSignal(BulkheadDepth: 0, ActiveMonitoredItems: 5, QueuedHistoryReads: 0, LastProgressUtc: Now.AddMinutes(-10));
detector.Classify(DriverState.Healthy, demand, Now).ShouldBe(WedgeVerdict.Faulted);
}
[Fact]
public void MonitoredItems_Active_WithFreshPublish_StaysHealthy()
{
var detector = new WedgeDetector(Threshold);
var demand = new DemandSignal(BulkheadDepth: 0, ActiveMonitoredItems: 5, QueuedHistoryReads: 0, LastProgressUtc: Now.AddSeconds(-10));
detector.Classify(DriverState.Healthy, demand, Now).ShouldBe(WedgeVerdict.Healthy);
}
[Fact]
public void HistoryBackfill_SlowButMakingProgress_StaysHealthy()
{
// Slow historian backfill — QueuedHistoryReads > 0 but progress advances within threshold.
var detector = new WedgeDetector(Threshold);
var demand = new DemandSignal(BulkheadDepth: 0, ActiveMonitoredItems: 0, QueuedHistoryReads: 50, LastProgressUtc: Now.AddSeconds(-60));
detector.Classify(DriverState.Healthy, demand, Now).ShouldBe(WedgeVerdict.Healthy);
}
[Fact]
public void WriteOnlyBurst_StaysIdle_WhenBulkheadEmpty()
{
// A write-only driver that just finished a burst: bulkhead drained, no subscriptions, no
// history reads. Idle — the previous formulation would have faulted here because no
// reads were succeeding even though the driver is perfectly healthy.
var detector = new WedgeDetector(Threshold);
var demand = new DemandSignal(0, 0, 0, Now.AddMinutes(-30));
detector.Classify(DriverState.Healthy, demand, Now).ShouldBe(WedgeVerdict.Idle);
}
[Fact]
public void DemandSignal_HasPendingWork_TrueForAnyNonZeroCounter()
{
new DemandSignal(1, 0, 0, Now).HasPendingWork.ShouldBeTrue();
new DemandSignal(0, 1, 0, Now).HasPendingWork.ShouldBeTrue();
new DemandSignal(0, 0, 1, Now).HasPendingWork.ShouldBeTrue();
new DemandSignal(0, 0, 0, Now).HasPendingWork.ShouldBeFalse();
}
}