diff --git a/src/ZB.MOM.WW.OtOpcUa.Core.Abstractions/IDriverSupervisor.cs b/src/ZB.MOM.WW.OtOpcUa.Core.Abstractions/IDriverSupervisor.cs new file mode 100644 index 0000000..0e07271 --- /dev/null +++ b/src/ZB.MOM.WW.OtOpcUa.Core.Abstractions/IDriverSupervisor.cs @@ -0,0 +1,26 @@ +namespace ZB.MOM.WW.OtOpcUa.Core.Abstractions; + +/// +/// Process-level supervisor contract a Tier C driver's out-of-process topology provides +/// (e.g. Driver.Galaxy.Proxy/Supervisor/). Concerns: restart the Host process when a +/// hard fault is detected (memory breach, wedge, scheduled recycle window). +/// +/// +/// Per docs/v2/plan.md decisions #68, #73-74, and #145. Tier A/B drivers do NOT have +/// a supervisor because they run in-process — recycling would kill every OPC UA session and +/// every co-hosted driver. The Core.Stability layer only invokes this interface for Tier C +/// instances after asserting the tier via . +/// +public interface IDriverSupervisor +{ + /// Driver instance this supervisor governs. + string DriverInstanceId { get; } + + /// + /// Request the supervisor to recycle (terminate + restart) the Host process. Implementations + /// are expected to be idempotent under repeat calls during an in-flight recycle. + /// + /// Human-readable reason — flows into the supervisor's logs. + /// Cancels the recycle request; an in-flight restart is not interrupted. + Task RecycleAsync(string reason, CancellationToken cancellationToken); +} diff --git a/src/ZB.MOM.WW.OtOpcUa.Core/Stability/MemoryRecycle.cs b/src/ZB.MOM.WW.OtOpcUa.Core/Stability/MemoryRecycle.cs new file mode 100644 index 0000000..34f97d7 --- /dev/null +++ b/src/ZB.MOM.WW.OtOpcUa.Core/Stability/MemoryRecycle.cs @@ -0,0 +1,65 @@ +using Microsoft.Extensions.Logging; +using ZB.MOM.WW.OtOpcUa.Core.Abstractions; + +namespace ZB.MOM.WW.OtOpcUa.Core.Stability; + +/// +/// Tier C only process-recycle companion to . On a +/// signal, invokes the supplied +/// to restart the out-of-process Host. +/// +/// +/// Per docs/v2/plan.md decisions #74 and #145. Tier A/B hard-breach on an in-process +/// driver would kill every OPC UA session and every co-hosted driver, so for Tier A/B this +/// class logs a promotion-to-Tier-C recommendation and does NOT invoke any supervisor. +/// A future tier-migration workflow acts on the recommendation. +/// +public sealed class MemoryRecycle +{ + private readonly DriverTier _tier; + private readonly IDriverSupervisor? _supervisor; + private readonly ILogger _logger; + + public MemoryRecycle(DriverTier tier, IDriverSupervisor? supervisor, ILogger logger) + { + _tier = tier; + _supervisor = supervisor; + _logger = logger; + } + + /// + /// Handle a classification for the driver. For Tier C with a + /// wired supervisor, HardBreach triggers . + /// All other combinations are no-ops with respect to process state (soft breaches + Tier A/B + /// hard breaches just log). + /// + /// True when a recycle was requested; false otherwise. + public async Task HandleAsync(MemoryTrackingAction action, long footprintBytes, CancellationToken cancellationToken) + { + switch (action) + { + case MemoryTrackingAction.SoftBreach: + _logger.LogWarning( + "Memory soft-breach on driver {DriverId}: footprint={Footprint:N0} bytes, tier={Tier}. Surfaced to Admin; no action.", + _supervisor?.DriverInstanceId ?? "(unknown)", footprintBytes, _tier); + return false; + + case MemoryTrackingAction.HardBreach when _tier == DriverTier.C && _supervisor is not null: + _logger.LogError( + "Memory hard-breach on Tier C driver {DriverId}: footprint={Footprint:N0} bytes. Requesting supervisor recycle.", + _supervisor.DriverInstanceId, footprintBytes); + await _supervisor.RecycleAsync($"Memory hard-breach: {footprintBytes} bytes", cancellationToken).ConfigureAwait(false); + return true; + + case MemoryTrackingAction.HardBreach: + _logger.LogError( + "Memory hard-breach on Tier {Tier} in-process driver {DriverId}: footprint={Footprint:N0} bytes. " + + "Recommending promotion to Tier C; NOT auto-killing (decisions #74, #145).", + _tier, _supervisor?.DriverInstanceId ?? "(unknown)", footprintBytes); + return false; + + default: + return false; + } + } +} diff --git a/src/ZB.MOM.WW.OtOpcUa.Core/Stability/ScheduledRecycleScheduler.cs b/src/ZB.MOM.WW.OtOpcUa.Core/Stability/ScheduledRecycleScheduler.cs new file mode 100644 index 0000000..84174f7 --- /dev/null +++ b/src/ZB.MOM.WW.OtOpcUa.Core/Stability/ScheduledRecycleScheduler.cs @@ -0,0 +1,86 @@ +using Microsoft.Extensions.Logging; +using ZB.MOM.WW.OtOpcUa.Core.Abstractions; + +namespace ZB.MOM.WW.OtOpcUa.Core.Stability; + +/// +/// Tier C opt-in periodic-recycle driver per docs/v2/plan.md decision #67. +/// A tick method advanced by the caller (fed by a background timer in prod; by test clock +/// in unit tests) decides whether the configured interval has elapsed and, if so, drives the +/// supplied to recycle the Host. +/// +/// +/// Tier A/B drivers MUST NOT use this class — scheduled recycle for in-process drivers would +/// kill every OPC UA session and every co-hosted driver. The ctor throws when constructed +/// with any tier other than C to make the misuse structurally impossible. +/// +/// Keeps no background thread of its own — callers invoke on +/// their ambient scheduler tick (Phase 6.1 Stream C's health-endpoint host runs one). That +/// decouples the unit under test from wall-clock time and thread-pool scheduling. +/// +public sealed class ScheduledRecycleScheduler +{ + private readonly TimeSpan _recycleInterval; + private readonly IDriverSupervisor _supervisor; + private readonly ILogger _logger; + private DateTime _nextRecycleUtc; + + /// + /// Construct the scheduler for a Tier C driver. Throws if isn't C. + /// + /// Driver tier; must be . + /// Interval between recycles (e.g. 7 days). + /// Anchor time; next recycle fires at + . + /// Supervisor that performs the actual recycle. + /// Diagnostic sink. + public ScheduledRecycleScheduler( + DriverTier tier, + TimeSpan recycleInterval, + DateTime startUtc, + IDriverSupervisor supervisor, + ILogger logger) + { + if (tier != DriverTier.C) + throw new ArgumentException( + $"ScheduledRecycleScheduler is Tier C only (got {tier}). " + + "In-process drivers must not use scheduled recycle; see decisions #74 and #145.", + nameof(tier)); + + if (recycleInterval <= TimeSpan.Zero) + throw new ArgumentException("RecycleInterval must be positive.", nameof(recycleInterval)); + + _recycleInterval = recycleInterval; + _supervisor = supervisor; + _logger = logger; + _nextRecycleUtc = startUtc + recycleInterval; + } + + /// Next scheduled recycle UTC. Advances by on each fire. + public DateTime NextRecycleUtc => _nextRecycleUtc; + + /// Recycle interval this scheduler was constructed with. + public TimeSpan RecycleInterval => _recycleInterval; + + /// + /// Tick the scheduler forward. If is past + /// , requests a recycle from the supervisor and advances + /// by exactly one interval. Returns true when a recycle fired. + /// + public async Task TickAsync(DateTime utcNow, CancellationToken cancellationToken) + { + if (utcNow < _nextRecycleUtc) + return false; + + _logger.LogInformation( + "Scheduled recycle due for Tier C driver {DriverId} at {Now:o}; advancing next to {Next:o}.", + _supervisor.DriverInstanceId, utcNow, _nextRecycleUtc + _recycleInterval); + + await _supervisor.RecycleAsync("Scheduled periodic recycle", cancellationToken).ConfigureAwait(false); + _nextRecycleUtc += _recycleInterval; + return true; + } + + /// Request an immediate recycle outside the schedule (e.g. MemoryRecycle hard-breach escalation). + public Task RequestRecycleNowAsync(string reason, CancellationToken cancellationToken) => + _supervisor.RecycleAsync(reason, cancellationToken); +} diff --git a/src/ZB.MOM.WW.OtOpcUa.Core/Stability/WedgeDetector.cs b/src/ZB.MOM.WW.OtOpcUa.Core/Stability/WedgeDetector.cs new file mode 100644 index 0000000..f20ab55 --- /dev/null +++ b/src/ZB.MOM.WW.OtOpcUa.Core/Stability/WedgeDetector.cs @@ -0,0 +1,81 @@ +using ZB.MOM.WW.OtOpcUa.Core.Abstractions; + +namespace ZB.MOM.WW.OtOpcUa.Core.Stability; + +/// +/// Demand-aware driver-wedge detector per docs/v2/plan.md decision #147. +/// Flips a driver to only when BOTH of the following hold: +/// (a) there is pending work outstanding, AND (b) no progress has been observed for longer +/// than . Idle drivers, write-only burst drivers, and subscription-only +/// drivers whose signals don't arrive regularly all stay Healthy. +/// +/// +/// Pending work signal is supplied by the caller via : +/// non-zero Polly bulkhead depth, ≥1 active MonitoredItem, or ≥1 queued historian read +/// each qualifies. The detector itself is state-light: all it remembers is the last +/// LastProgressUtc it saw and the last wedge verdict. No history buffer. +/// +/// Default threshold per plan: 5 × PublishingInterval, with a minimum of 60 s. +/// Concrete values are driver-agnostic and configured per-instance by the caller. +/// +public sealed class WedgeDetector +{ + /// Wedge-detection threshold; pass < 60 s and the detector clamps to 60 s. + public TimeSpan Threshold { get; } + + /// Whether the driver reported itself at construction. + public WedgeDetector(TimeSpan threshold) + { + Threshold = threshold < TimeSpan.FromSeconds(60) ? TimeSpan.FromSeconds(60) : threshold; + } + + /// + /// Classify the current state against the demand signal. Does not retain state across + /// calls — each call is self-contained; the caller owns the LastProgressUtc clock. + /// + public WedgeVerdict Classify(DriverState state, DemandSignal demand, DateTime utcNow) + { + if (state != DriverState.Healthy) + return WedgeVerdict.NotApplicable; + + if (!demand.HasPendingWork) + return WedgeVerdict.Idle; + + var sinceProgress = utcNow - demand.LastProgressUtc; + return sinceProgress > Threshold ? WedgeVerdict.Faulted : WedgeVerdict.Healthy; + } +} + +/// +/// Caller-supplied demand snapshot. All three counters are OR'd — any non-zero means work +/// is outstanding, which is the trigger for checking the clock. +/// +/// Polly bulkhead depth (in-flight capability calls). +/// Number of live OPC UA MonitoredItems bound to this driver. +/// Pending historian-read requests the driver owes the server. +/// Last time the driver reported a successful unit of work (read, subscribe-ack, publish). +public readonly record struct DemandSignal( + int BulkheadDepth, + int ActiveMonitoredItems, + int QueuedHistoryReads, + DateTime LastProgressUtc) +{ + /// True when any of the three counters is > 0. + public bool HasPendingWork => BulkheadDepth > 0 || ActiveMonitoredItems > 0 || QueuedHistoryReads > 0; +} + +/// Outcome of a single call. +public enum WedgeVerdict +{ + /// Driver wasn't Healthy to begin with — wedge detection doesn't apply. + NotApplicable, + + /// Driver claims Healthy + no pending work → stays Healthy. + Idle, + + /// Driver claims Healthy + has pending work + has made progress within the threshold → stays Healthy. + Healthy, + + /// Driver claims Healthy + has pending work + has NOT made progress within the threshold → wedged. + Faulted, +} diff --git a/tests/ZB.MOM.WW.OtOpcUa.Core.Tests/Stability/MemoryRecycleTests.cs b/tests/ZB.MOM.WW.OtOpcUa.Core.Tests/Stability/MemoryRecycleTests.cs new file mode 100644 index 0000000..00dbf43 --- /dev/null +++ b/tests/ZB.MOM.WW.OtOpcUa.Core.Tests/Stability/MemoryRecycleTests.cs @@ -0,0 +1,91 @@ +using Microsoft.Extensions.Logging.Abstractions; +using Shouldly; +using Xunit; +using ZB.MOM.WW.OtOpcUa.Core.Abstractions; +using ZB.MOM.WW.OtOpcUa.Core.Stability; + +namespace ZB.MOM.WW.OtOpcUa.Core.Tests.Stability; + +[Trait("Category", "Unit")] +public sealed class MemoryRecycleTests +{ + [Fact] + public async Task TierC_HardBreach_RequestsSupervisorRecycle() + { + var supervisor = new FakeSupervisor(); + var recycle = new MemoryRecycle(DriverTier.C, supervisor, NullLogger.Instance); + + var requested = await recycle.HandleAsync(MemoryTrackingAction.HardBreach, 2_000_000_000, CancellationToken.None); + + requested.ShouldBeTrue(); + supervisor.RecycleCount.ShouldBe(1); + supervisor.LastReason.ShouldContain("hard-breach"); + } + + [Theory] + [InlineData(DriverTier.A)] + [InlineData(DriverTier.B)] + public async Task InProcessTier_HardBreach_NeverRequestsRecycle(DriverTier tier) + { + var supervisor = new FakeSupervisor(); + var recycle = new MemoryRecycle(tier, supervisor, NullLogger.Instance); + + var requested = await recycle.HandleAsync(MemoryTrackingAction.HardBreach, 2_000_000_000, CancellationToken.None); + + requested.ShouldBeFalse("Tier A/B hard-breach logs a promotion recommendation only (decisions #74, #145)"); + supervisor.RecycleCount.ShouldBe(0); + } + + [Fact] + public async Task TierC_WithoutSupervisor_HardBreach_NoOp() + { + var recycle = new MemoryRecycle(DriverTier.C, supervisor: null, NullLogger.Instance); + + var requested = await recycle.HandleAsync(MemoryTrackingAction.HardBreach, 2_000_000_000, CancellationToken.None); + + requested.ShouldBeFalse("no supervisor → no recycle path; action logged only"); + } + + [Theory] + [InlineData(DriverTier.A)] + [InlineData(DriverTier.B)] + [InlineData(DriverTier.C)] + public async Task SoftBreach_NeverRequestsRecycle(DriverTier tier) + { + var supervisor = new FakeSupervisor(); + var recycle = new MemoryRecycle(tier, supervisor, NullLogger.Instance); + + var requested = await recycle.HandleAsync(MemoryTrackingAction.SoftBreach, 1_000_000_000, CancellationToken.None); + + requested.ShouldBeFalse("soft-breach is surface-only at every tier"); + supervisor.RecycleCount.ShouldBe(0); + } + + [Theory] + [InlineData(MemoryTrackingAction.None)] + [InlineData(MemoryTrackingAction.Warming)] + public async Task NonBreachActions_NoOp(MemoryTrackingAction action) + { + var supervisor = new FakeSupervisor(); + var recycle = new MemoryRecycle(DriverTier.C, supervisor, NullLogger.Instance); + + var requested = await recycle.HandleAsync(action, 100_000_000, CancellationToken.None); + + requested.ShouldBeFalse(); + supervisor.RecycleCount.ShouldBe(0); + } + + private sealed class FakeSupervisor : IDriverSupervisor + { + public string DriverInstanceId => "fake-tier-c"; + public int RecycleCount { get; private set; } + public string? LastReason { get; private set; } + + public Task RecycleAsync(string reason, CancellationToken cancellationToken) + { + RecycleCount++; + LastReason = reason; + return Task.CompletedTask; + } + } +} diff --git a/tests/ZB.MOM.WW.OtOpcUa.Core.Tests/Stability/ScheduledRecycleSchedulerTests.cs b/tests/ZB.MOM.WW.OtOpcUa.Core.Tests/Stability/ScheduledRecycleSchedulerTests.cs new file mode 100644 index 0000000..7cb16f4 --- /dev/null +++ b/tests/ZB.MOM.WW.OtOpcUa.Core.Tests/Stability/ScheduledRecycleSchedulerTests.cs @@ -0,0 +1,101 @@ +using Microsoft.Extensions.Logging.Abstractions; +using Shouldly; +using Xunit; +using ZB.MOM.WW.OtOpcUa.Core.Abstractions; +using ZB.MOM.WW.OtOpcUa.Core.Stability; + +namespace ZB.MOM.WW.OtOpcUa.Core.Tests.Stability; + +[Trait("Category", "Unit")] +public sealed class ScheduledRecycleSchedulerTests +{ + private static readonly DateTime T0 = new(2026, 4, 19, 0, 0, 0, DateTimeKind.Utc); + private static readonly TimeSpan Weekly = TimeSpan.FromDays(7); + + [Theory] + [InlineData(DriverTier.A)] + [InlineData(DriverTier.B)] + public void TierAOrB_Ctor_Throws(DriverTier tier) + { + var supervisor = new FakeSupervisor(); + Should.Throw(() => new ScheduledRecycleScheduler( + tier, Weekly, T0, supervisor, NullLogger.Instance)); + } + + [Fact] + public void ZeroOrNegativeInterval_Throws() + { + var supervisor = new FakeSupervisor(); + Should.Throw(() => new ScheduledRecycleScheduler( + DriverTier.C, TimeSpan.Zero, T0, supervisor, NullLogger.Instance)); + Should.Throw(() => new ScheduledRecycleScheduler( + DriverTier.C, TimeSpan.FromSeconds(-1), T0, supervisor, NullLogger.Instance)); + } + + [Fact] + public async Task Tick_BeforeNextRecycle_NoOp() + { + var supervisor = new FakeSupervisor(); + var sch = new ScheduledRecycleScheduler(DriverTier.C, Weekly, T0, supervisor, NullLogger.Instance); + + var fired = await sch.TickAsync(T0 + TimeSpan.FromDays(6), CancellationToken.None); + + fired.ShouldBeFalse(); + supervisor.RecycleCount.ShouldBe(0); + } + + [Fact] + public async Task Tick_AtOrAfterNextRecycle_FiresOnce_AndAdvances() + { + var supervisor = new FakeSupervisor(); + var sch = new ScheduledRecycleScheduler(DriverTier.C, Weekly, T0, supervisor, NullLogger.Instance); + + var fired = await sch.TickAsync(T0 + Weekly + TimeSpan.FromMinutes(1), CancellationToken.None); + + fired.ShouldBeTrue(); + supervisor.RecycleCount.ShouldBe(1); + sch.NextRecycleUtc.ShouldBe(T0 + Weekly + Weekly); + } + + [Fact] + public async Task RequestRecycleNow_Fires_Immediately_WithoutAdvancingSchedule() + { + var supervisor = new FakeSupervisor(); + var sch = new ScheduledRecycleScheduler(DriverTier.C, Weekly, T0, supervisor, NullLogger.Instance); + var nextBefore = sch.NextRecycleUtc; + + await sch.RequestRecycleNowAsync("memory hard-breach", CancellationToken.None); + + supervisor.RecycleCount.ShouldBe(1); + supervisor.LastReason.ShouldBe("memory hard-breach"); + sch.NextRecycleUtc.ShouldBe(nextBefore, "ad-hoc recycle doesn't shift the cron schedule"); + } + + [Fact] + public async Task MultipleFires_AcrossTicks_AdvanceOneIntervalEach() + { + var supervisor = new FakeSupervisor(); + var sch = new ScheduledRecycleScheduler(DriverTier.C, TimeSpan.FromDays(1), T0, supervisor, NullLogger.Instance); + + await sch.TickAsync(T0 + TimeSpan.FromDays(1) + TimeSpan.FromHours(1), CancellationToken.None); + await sch.TickAsync(T0 + TimeSpan.FromDays(2) + TimeSpan.FromHours(1), CancellationToken.None); + await sch.TickAsync(T0 + TimeSpan.FromDays(3) + TimeSpan.FromHours(1), CancellationToken.None); + + supervisor.RecycleCount.ShouldBe(3); + sch.NextRecycleUtc.ShouldBe(T0 + TimeSpan.FromDays(4)); + } + + private sealed class FakeSupervisor : IDriverSupervisor + { + public string DriverInstanceId => "tier-c-fake"; + public int RecycleCount { get; private set; } + public string? LastReason { get; private set; } + + public Task RecycleAsync(string reason, CancellationToken cancellationToken) + { + RecycleCount++; + LastReason = reason; + return Task.CompletedTask; + } + } +} diff --git a/tests/ZB.MOM.WW.OtOpcUa.Core.Tests/Stability/WedgeDetectorTests.cs b/tests/ZB.MOM.WW.OtOpcUa.Core.Tests/Stability/WedgeDetectorTests.cs new file mode 100644 index 0000000..c0c2a79 --- /dev/null +++ b/tests/ZB.MOM.WW.OtOpcUa.Core.Tests/Stability/WedgeDetectorTests.cs @@ -0,0 +1,112 @@ +using Shouldly; +using Xunit; +using ZB.MOM.WW.OtOpcUa.Core.Abstractions; +using ZB.MOM.WW.OtOpcUa.Core.Stability; + +namespace ZB.MOM.WW.OtOpcUa.Core.Tests.Stability; + +[Trait("Category", "Unit")] +public sealed class WedgeDetectorTests +{ + private static readonly DateTime Now = new(2026, 4, 19, 12, 0, 0, DateTimeKind.Utc); + private static readonly TimeSpan Threshold = TimeSpan.FromSeconds(120); + + [Fact] + public void SubSixtySecondThreshold_ClampsToSixty() + { + var detector = new WedgeDetector(TimeSpan.FromSeconds(10)); + detector.Threshold.ShouldBe(TimeSpan.FromSeconds(60)); + } + + [Fact] + public void Unhealthy_Driver_AlwaysNotApplicable() + { + var detector = new WedgeDetector(Threshold); + var demand = new DemandSignal(BulkheadDepth: 5, ActiveMonitoredItems: 10, QueuedHistoryReads: 0, LastProgressUtc: Now.AddMinutes(-10)); + + detector.Classify(DriverState.Faulted, demand, Now).ShouldBe(WedgeVerdict.NotApplicable); + detector.Classify(DriverState.Degraded, demand, Now).ShouldBe(WedgeVerdict.NotApplicable); + detector.Classify(DriverState.Initializing, demand, Now).ShouldBe(WedgeVerdict.NotApplicable); + } + + [Fact] + public void Idle_Subscription_Only_StaysIdle() + { + // Idle driver: bulkhead 0, monitored items 0, no history reads queued. + // Even if LastProgressUtc is ancient, the verdict is Idle, not Faulted. + var detector = new WedgeDetector(Threshold); + var demand = new DemandSignal(0, 0, 0, Now.AddHours(-12)); + + detector.Classify(DriverState.Healthy, demand, Now).ShouldBe(WedgeVerdict.Idle); + } + + [Fact] + public void PendingWork_WithRecentProgress_StaysHealthy() + { + var detector = new WedgeDetector(Threshold); + var demand = new DemandSignal(BulkheadDepth: 2, ActiveMonitoredItems: 0, QueuedHistoryReads: 0, LastProgressUtc: Now.AddSeconds(-30)); + + detector.Classify(DriverState.Healthy, demand, Now).ShouldBe(WedgeVerdict.Healthy); + } + + [Fact] + public void PendingWork_WithStaleProgress_IsFaulted() + { + var detector = new WedgeDetector(Threshold); + var demand = new DemandSignal(BulkheadDepth: 2, ActiveMonitoredItems: 0, QueuedHistoryReads: 0, LastProgressUtc: Now.AddMinutes(-5)); + + detector.Classify(DriverState.Healthy, demand, Now).ShouldBe(WedgeVerdict.Faulted); + } + + [Fact] + public void MonitoredItems_Active_ButNoRecentPublish_IsFaulted() + { + // Subscription-only driver with live MonitoredItems but no publish progress within threshold + // is a real wedge — this is the case the previous "no successful Read" formulation used + // to miss (no reads ever happen). + var detector = new WedgeDetector(Threshold); + var demand = new DemandSignal(BulkheadDepth: 0, ActiveMonitoredItems: 5, QueuedHistoryReads: 0, LastProgressUtc: Now.AddMinutes(-10)); + + detector.Classify(DriverState.Healthy, demand, Now).ShouldBe(WedgeVerdict.Faulted); + } + + [Fact] + public void MonitoredItems_Active_WithFreshPublish_StaysHealthy() + { + var detector = new WedgeDetector(Threshold); + var demand = new DemandSignal(BulkheadDepth: 0, ActiveMonitoredItems: 5, QueuedHistoryReads: 0, LastProgressUtc: Now.AddSeconds(-10)); + + detector.Classify(DriverState.Healthy, demand, Now).ShouldBe(WedgeVerdict.Healthy); + } + + [Fact] + public void HistoryBackfill_SlowButMakingProgress_StaysHealthy() + { + // Slow historian backfill — QueuedHistoryReads > 0 but progress advances within threshold. + var detector = new WedgeDetector(Threshold); + var demand = new DemandSignal(BulkheadDepth: 0, ActiveMonitoredItems: 0, QueuedHistoryReads: 50, LastProgressUtc: Now.AddSeconds(-60)); + + detector.Classify(DriverState.Healthy, demand, Now).ShouldBe(WedgeVerdict.Healthy); + } + + [Fact] + public void WriteOnlyBurst_StaysIdle_WhenBulkheadEmpty() + { + // A write-only driver that just finished a burst: bulkhead drained, no subscriptions, no + // history reads. Idle — the previous formulation would have faulted here because no + // reads were succeeding even though the driver is perfectly healthy. + var detector = new WedgeDetector(Threshold); + var demand = new DemandSignal(0, 0, 0, Now.AddMinutes(-30)); + + detector.Classify(DriverState.Healthy, demand, Now).ShouldBe(WedgeVerdict.Idle); + } + + [Fact] + public void DemandSignal_HasPendingWork_TrueForAnyNonZeroCounter() + { + new DemandSignal(1, 0, 0, Now).HasPendingWork.ShouldBeTrue(); + new DemandSignal(0, 1, 0, Now).HasPendingWork.ShouldBeTrue(); + new DemandSignal(0, 0, 1, Now).HasPendingWork.ShouldBeTrue(); + new DemandSignal(0, 0, 0, Now).HasPendingWork.ShouldBeFalse(); + } +}