diff --git a/src/ZB.MOM.WW.OtOpcUa.Core.Abstractions/DriverTypeRegistry.cs b/src/ZB.MOM.WW.OtOpcUa.Core.Abstractions/DriverTypeRegistry.cs
index 6655886..42e9607 100644
--- a/src/ZB.MOM.WW.OtOpcUa.Core.Abstractions/DriverTypeRegistry.cs
+++ b/src/ZB.MOM.WW.OtOpcUa.Core.Abstractions/DriverTypeRegistry.cs
@@ -69,12 +69,20 @@ public sealed class DriverTypeRegistry
/// JSON Schema (Draft 2020-12) the driver's DriverConfig column must validate against.
/// JSON Schema for DeviceConfig (multi-device drivers); null if the driver has no device layer.
/// JSON Schema for TagConfig; required for every driver since every driver has tags.
+///
+/// Stability tier per docs/v2/driver-stability.md §2-4 and docs/v2/plan.md
+/// decisions #63-74. Drives the shared resilience pipeline defaults
+/// ( × capability → CapabilityPolicy), the MemoryTracking
+/// hybrid-formula constants, and whether process-level MemoryRecycle / scheduled-
+/// recycle protections apply (Tier C only). Every registered driver type must declare one.
+///
public sealed record DriverTypeMetadata(
string TypeName,
NamespaceKindCompatibility AllowedNamespaceKinds,
string DriverConfigJsonSchema,
string? DeviceConfigJsonSchema,
- string TagConfigJsonSchema);
+ string TagConfigJsonSchema,
+ DriverTier Tier);
/// Bitmask of namespace kinds a driver type may populate. Per decision #111.
[Flags]
diff --git a/src/ZB.MOM.WW.OtOpcUa.Core.Abstractions/IDriverSupervisor.cs b/src/ZB.MOM.WW.OtOpcUa.Core.Abstractions/IDriverSupervisor.cs
new file mode 100644
index 0000000..0e07271
--- /dev/null
+++ b/src/ZB.MOM.WW.OtOpcUa.Core.Abstractions/IDriverSupervisor.cs
@@ -0,0 +1,26 @@
+namespace ZB.MOM.WW.OtOpcUa.Core.Abstractions;
+
+///
+/// Process-level supervisor contract a Tier C driver's out-of-process topology provides
+/// (e.g. Driver.Galaxy.Proxy/Supervisor/). Concerns: restart the Host process when a
+/// hard fault is detected (memory breach, wedge, scheduled recycle window).
+///
+///
+/// Per docs/v2/plan.md decisions #68, #73-74, and #145. Tier A/B drivers do NOT have
+/// a supervisor because they run in-process — recycling would kill every OPC UA session and
+/// every co-hosted driver. The Core.Stability layer only invokes this interface for Tier C
+/// instances after asserting the tier via .
+///
+public interface IDriverSupervisor
+{
+ /// Driver instance this supervisor governs.
+ string DriverInstanceId { get; }
+
+ ///
+ /// Request the supervisor to recycle (terminate + restart) the Host process. Implementations
+ /// are expected to be idempotent under repeat calls during an in-flight recycle.
+ ///
+ /// Human-readable reason — flows into the supervisor's logs.
+ /// Cancels the recycle request; an in-flight restart is not interrupted.
+ Task RecycleAsync(string reason, CancellationToken cancellationToken);
+}
diff --git a/src/ZB.MOM.WW.OtOpcUa.Core/Stability/MemoryRecycle.cs b/src/ZB.MOM.WW.OtOpcUa.Core/Stability/MemoryRecycle.cs
new file mode 100644
index 0000000..34f97d7
--- /dev/null
+++ b/src/ZB.MOM.WW.OtOpcUa.Core/Stability/MemoryRecycle.cs
@@ -0,0 +1,65 @@
+using Microsoft.Extensions.Logging;
+using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
+
+namespace ZB.MOM.WW.OtOpcUa.Core.Stability;
+
+///
+/// Tier C only process-recycle companion to . On a
+/// signal, invokes the supplied
+/// to restart the out-of-process Host.
+///
+///
+/// Per docs/v2/plan.md decisions #74 and #145. Tier A/B hard-breach on an in-process
+/// driver would kill every OPC UA session and every co-hosted driver, so for Tier A/B this
+/// class logs a promotion-to-Tier-C recommendation and does NOT invoke any supervisor.
+/// A future tier-migration workflow acts on the recommendation.
+///
+public sealed class MemoryRecycle
+{
+ private readonly DriverTier _tier;
+ private readonly IDriverSupervisor? _supervisor;
+ private readonly ILogger _logger;
+
+ public MemoryRecycle(DriverTier tier, IDriverSupervisor? supervisor, ILogger logger)
+ {
+ _tier = tier;
+ _supervisor = supervisor;
+ _logger = logger;
+ }
+
+ ///
+ /// Handle a classification for the driver. For Tier C with a
+ /// wired supervisor, HardBreach triggers .
+ /// All other combinations are no-ops with respect to process state (soft breaches + Tier A/B
+ /// hard breaches just log).
+ ///
+ /// True when a recycle was requested; false otherwise.
+ public async Task HandleAsync(MemoryTrackingAction action, long footprintBytes, CancellationToken cancellationToken)
+ {
+ switch (action)
+ {
+ case MemoryTrackingAction.SoftBreach:
+ _logger.LogWarning(
+ "Memory soft-breach on driver {DriverId}: footprint={Footprint:N0} bytes, tier={Tier}. Surfaced to Admin; no action.",
+ _supervisor?.DriverInstanceId ?? "(unknown)", footprintBytes, _tier);
+ return false;
+
+ case MemoryTrackingAction.HardBreach when _tier == DriverTier.C && _supervisor is not null:
+ _logger.LogError(
+ "Memory hard-breach on Tier C driver {DriverId}: footprint={Footprint:N0} bytes. Requesting supervisor recycle.",
+ _supervisor.DriverInstanceId, footprintBytes);
+ await _supervisor.RecycleAsync($"Memory hard-breach: {footprintBytes} bytes", cancellationToken).ConfigureAwait(false);
+ return true;
+
+ case MemoryTrackingAction.HardBreach:
+ _logger.LogError(
+ "Memory hard-breach on Tier {Tier} in-process driver {DriverId}: footprint={Footprint:N0} bytes. " +
+ "Recommending promotion to Tier C; NOT auto-killing (decisions #74, #145).",
+ _tier, _supervisor?.DriverInstanceId ?? "(unknown)", footprintBytes);
+ return false;
+
+ default:
+ return false;
+ }
+ }
+}
diff --git a/src/ZB.MOM.WW.OtOpcUa.Core/Stability/MemoryTracking.cs b/src/ZB.MOM.WW.OtOpcUa.Core/Stability/MemoryTracking.cs
new file mode 100644
index 0000000..19dffa2
--- /dev/null
+++ b/src/ZB.MOM.WW.OtOpcUa.Core/Stability/MemoryTracking.cs
@@ -0,0 +1,136 @@
+using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
+
+namespace ZB.MOM.WW.OtOpcUa.Core.Stability;
+
+///
+/// Tier-agnostic memory-footprint tracker. Captures the post-initialize baseline
+/// from the first samples after IDriver.InitializeAsync, then classifies each
+/// subsequent sample against a hybrid soft/hard threshold per
+/// docs/v2/plan.md decision #146 — soft = max(multiplier × baseline, baseline + floor),
+/// hard = 2 × soft.
+///
+///
+/// Per decision #145, this tracker never kills a process. Soft and hard breaches
+/// log + surface to the Admin UI via DriverInstanceResilienceStatus. The matching
+/// process-level recycle protection lives in a separate MemoryRecycle that activates
+/// for Tier C drivers only (where the driver runs out-of-process behind a supervisor that
+/// can safely restart it without tearing down the OPC UA session or co-hosted in-proc
+/// drivers).
+///
+/// Baseline capture: the tracker starts in for
+/// (default 5 min). During that window samples are collected;
+/// the baseline is computed as the median once the window elapses. Before that point every
+/// classification returns .
+///
+public sealed class MemoryTracking
+{
+ private readonly DriverTier _tier;
+ private readonly TimeSpan _baselineWindow;
+ private readonly List _warmupSamples = [];
+ private long _baselineBytes;
+ private TrackingPhase _phase = TrackingPhase.WarmingUp;
+ private DateTime? _warmupStartUtc;
+
+ /// Tier-default multiplier/floor constants per decision #146.
+ public static (int Multiplier, long FloorBytes) GetTierConstants(DriverTier tier) => tier switch
+ {
+ DriverTier.A => (Multiplier: 3, FloorBytes: 50L * 1024 * 1024),
+ DriverTier.B => (Multiplier: 3, FloorBytes: 100L * 1024 * 1024),
+ DriverTier.C => (Multiplier: 2, FloorBytes: 500L * 1024 * 1024),
+ _ => throw new ArgumentOutOfRangeException(nameof(tier), tier, $"No memory-tracking constants defined for tier {tier}."),
+ };
+
+ /// Window over which post-init samples are collected to compute the baseline.
+ public TimeSpan BaselineWindow => _baselineWindow;
+
+ /// Current phase: or .
+ public TrackingPhase Phase => _phase;
+
+ /// Captured baseline; 0 until warmup completes.
+ public long BaselineBytes => _baselineBytes;
+
+ /// Effective soft threshold (zero while warming up).
+ public long SoftThresholdBytes => _baselineBytes == 0 ? 0 : ComputeSoft(_tier, _baselineBytes);
+
+ /// Effective hard threshold = 2 × soft (zero while warming up).
+ public long HardThresholdBytes => _baselineBytes == 0 ? 0 : ComputeSoft(_tier, _baselineBytes) * 2;
+
+ public MemoryTracking(DriverTier tier, TimeSpan? baselineWindow = null)
+ {
+ _tier = tier;
+ _baselineWindow = baselineWindow ?? TimeSpan.FromMinutes(5);
+ }
+
+ ///
+ /// Submit a memory-footprint sample. Returns the action the caller should surface.
+ /// During warmup, always returns and accumulates
+ /// samples; once the window elapses the first steady-phase sample triggers baseline capture
+ /// (median of warmup samples).
+ ///
+ public MemoryTrackingAction Sample(long footprintBytes, DateTime utcNow)
+ {
+ if (_phase == TrackingPhase.WarmingUp)
+ {
+ _warmupStartUtc ??= utcNow;
+ _warmupSamples.Add(footprintBytes);
+ if (utcNow - _warmupStartUtc.Value >= _baselineWindow && _warmupSamples.Count > 0)
+ {
+ _baselineBytes = ComputeMedian(_warmupSamples);
+ _phase = TrackingPhase.Steady;
+ }
+ else
+ {
+ return MemoryTrackingAction.Warming;
+ }
+ }
+
+ if (footprintBytes >= HardThresholdBytes) return MemoryTrackingAction.HardBreach;
+ if (footprintBytes >= SoftThresholdBytes) return MemoryTrackingAction.SoftBreach;
+ return MemoryTrackingAction.None;
+ }
+
+ private static long ComputeSoft(DriverTier tier, long baseline)
+ {
+ var (multiplier, floor) = GetTierConstants(tier);
+ return Math.Max(multiplier * baseline, baseline + floor);
+ }
+
+ private static long ComputeMedian(List samples)
+ {
+ var sorted = samples.Order().ToArray();
+ var mid = sorted.Length / 2;
+ return sorted.Length % 2 == 1
+ ? sorted[mid]
+ : (sorted[mid - 1] + sorted[mid]) / 2;
+ }
+}
+
+/// Phase of a lifecycle.
+public enum TrackingPhase
+{
+ /// Collecting post-init samples; baseline not yet computed.
+ WarmingUp,
+
+ /// Baseline captured; every sample classified against soft/hard thresholds.
+ Steady,
+}
+
+/// Classification the tracker returns per sample.
+public enum MemoryTrackingAction
+{
+ /// Baseline not yet captured; sample collected, no threshold check.
+ Warming,
+
+ /// Below soft threshold.
+ None,
+
+ /// Between soft and hard thresholds — log + surface, no action.
+ SoftBreach,
+
+ ///
+ /// ≥ hard threshold. Log + surface + (Tier C only, via MemoryRecycle) request
+ /// process recycle via the driver supervisor. Tier A/B breach never invokes any
+ /// kill path per decisions #145 and #74.
+ ///
+ HardBreach,
+}
diff --git a/src/ZB.MOM.WW.OtOpcUa.Core/Stability/ScheduledRecycleScheduler.cs b/src/ZB.MOM.WW.OtOpcUa.Core/Stability/ScheduledRecycleScheduler.cs
new file mode 100644
index 0000000..84174f7
--- /dev/null
+++ b/src/ZB.MOM.WW.OtOpcUa.Core/Stability/ScheduledRecycleScheduler.cs
@@ -0,0 +1,86 @@
+using Microsoft.Extensions.Logging;
+using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
+
+namespace ZB.MOM.WW.OtOpcUa.Core.Stability;
+
+///
+/// Tier C opt-in periodic-recycle driver per docs/v2/plan.md decision #67.
+/// A tick method advanced by the caller (fed by a background timer in prod; by test clock
+/// in unit tests) decides whether the configured interval has elapsed and, if so, drives the
+/// supplied to recycle the Host.
+///
+///
+/// Tier A/B drivers MUST NOT use this class — scheduled recycle for in-process drivers would
+/// kill every OPC UA session and every co-hosted driver. The ctor throws when constructed
+/// with any tier other than C to make the misuse structurally impossible.
+///
+/// Keeps no background thread of its own — callers invoke on
+/// their ambient scheduler tick (Phase 6.1 Stream C's health-endpoint host runs one). That
+/// decouples the unit under test from wall-clock time and thread-pool scheduling.
+///
+public sealed class ScheduledRecycleScheduler
+{
+ private readonly TimeSpan _recycleInterval;
+ private readonly IDriverSupervisor _supervisor;
+ private readonly ILogger _logger;
+ private DateTime _nextRecycleUtc;
+
+ ///
+ /// Construct the scheduler for a Tier C driver. Throws if isn't C.
+ ///
+ /// Driver tier; must be .
+ /// Interval between recycles (e.g. 7 days).
+ /// Anchor time; next recycle fires at + .
+ /// Supervisor that performs the actual recycle.
+ /// Diagnostic sink.
+ public ScheduledRecycleScheduler(
+ DriverTier tier,
+ TimeSpan recycleInterval,
+ DateTime startUtc,
+ IDriverSupervisor supervisor,
+ ILogger logger)
+ {
+ if (tier != DriverTier.C)
+ throw new ArgumentException(
+ $"ScheduledRecycleScheduler is Tier C only (got {tier}). " +
+ "In-process drivers must not use scheduled recycle; see decisions #74 and #145.",
+ nameof(tier));
+
+ if (recycleInterval <= TimeSpan.Zero)
+ throw new ArgumentException("RecycleInterval must be positive.", nameof(recycleInterval));
+
+ _recycleInterval = recycleInterval;
+ _supervisor = supervisor;
+ _logger = logger;
+ _nextRecycleUtc = startUtc + recycleInterval;
+ }
+
+ /// Next scheduled recycle UTC. Advances by on each fire.
+ public DateTime NextRecycleUtc => _nextRecycleUtc;
+
+ /// Recycle interval this scheduler was constructed with.
+ public TimeSpan RecycleInterval => _recycleInterval;
+
+ ///
+ /// Tick the scheduler forward. If is past
+ /// , requests a recycle from the supervisor and advances
+ /// by exactly one interval. Returns true when a recycle fired.
+ ///
+ public async Task TickAsync(DateTime utcNow, CancellationToken cancellationToken)
+ {
+ if (utcNow < _nextRecycleUtc)
+ return false;
+
+ _logger.LogInformation(
+ "Scheduled recycle due for Tier C driver {DriverId} at {Now:o}; advancing next to {Next:o}.",
+ _supervisor.DriverInstanceId, utcNow, _nextRecycleUtc + _recycleInterval);
+
+ await _supervisor.RecycleAsync("Scheduled periodic recycle", cancellationToken).ConfigureAwait(false);
+ _nextRecycleUtc += _recycleInterval;
+ return true;
+ }
+
+ /// Request an immediate recycle outside the schedule (e.g. MemoryRecycle hard-breach escalation).
+ public Task RequestRecycleNowAsync(string reason, CancellationToken cancellationToken) =>
+ _supervisor.RecycleAsync(reason, cancellationToken);
+}
diff --git a/src/ZB.MOM.WW.OtOpcUa.Core/Stability/WedgeDetector.cs b/src/ZB.MOM.WW.OtOpcUa.Core/Stability/WedgeDetector.cs
new file mode 100644
index 0000000..f20ab55
--- /dev/null
+++ b/src/ZB.MOM.WW.OtOpcUa.Core/Stability/WedgeDetector.cs
@@ -0,0 +1,81 @@
+using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
+
+namespace ZB.MOM.WW.OtOpcUa.Core.Stability;
+
+///
+/// Demand-aware driver-wedge detector per docs/v2/plan.md decision #147.
+/// Flips a driver to only when BOTH of the following hold:
+/// (a) there is pending work outstanding, AND (b) no progress has been observed for longer
+/// than . Idle drivers, write-only burst drivers, and subscription-only
+/// drivers whose signals don't arrive regularly all stay Healthy.
+///
+///
+/// Pending work signal is supplied by the caller via :
+/// non-zero Polly bulkhead depth, ≥1 active MonitoredItem, or ≥1 queued historian read
+/// each qualifies. The detector itself is state-light: all it remembers is the last
+/// LastProgressUtc it saw and the last wedge verdict. No history buffer.
+///
+/// Default threshold per plan: 5 × PublishingInterval, with a minimum of 60 s.
+/// Concrete values are driver-agnostic and configured per-instance by the caller.
+///
+public sealed class WedgeDetector
+{
+ /// Wedge-detection threshold; pass < 60 s and the detector clamps to 60 s.
+ public TimeSpan Threshold { get; }
+
+ /// Whether the driver reported itself at construction.
+ public WedgeDetector(TimeSpan threshold)
+ {
+ Threshold = threshold < TimeSpan.FromSeconds(60) ? TimeSpan.FromSeconds(60) : threshold;
+ }
+
+ ///
+ /// Classify the current state against the demand signal. Does not retain state across
+ /// calls — each call is self-contained; the caller owns the LastProgressUtc clock.
+ ///
+ public WedgeVerdict Classify(DriverState state, DemandSignal demand, DateTime utcNow)
+ {
+ if (state != DriverState.Healthy)
+ return WedgeVerdict.NotApplicable;
+
+ if (!demand.HasPendingWork)
+ return WedgeVerdict.Idle;
+
+ var sinceProgress = utcNow - demand.LastProgressUtc;
+ return sinceProgress > Threshold ? WedgeVerdict.Faulted : WedgeVerdict.Healthy;
+ }
+}
+
+///
+/// Caller-supplied demand snapshot. All three counters are OR'd — any non-zero means work
+/// is outstanding, which is the trigger for checking the clock.
+///
+/// Polly bulkhead depth (in-flight capability calls).
+/// Number of live OPC UA MonitoredItems bound to this driver.
+/// Pending historian-read requests the driver owes the server.
+/// Last time the driver reported a successful unit of work (read, subscribe-ack, publish).
+public readonly record struct DemandSignal(
+ int BulkheadDepth,
+ int ActiveMonitoredItems,
+ int QueuedHistoryReads,
+ DateTime LastProgressUtc)
+{
+ /// True when any of the three counters is > 0.
+ public bool HasPendingWork => BulkheadDepth > 0 || ActiveMonitoredItems > 0 || QueuedHistoryReads > 0;
+}
+
+/// Outcome of a single call.
+public enum WedgeVerdict
+{
+ /// Driver wasn't Healthy to begin with — wedge detection doesn't apply.
+ NotApplicable,
+
+ /// Driver claims Healthy + no pending work → stays Healthy.
+ Idle,
+
+ /// Driver claims Healthy + has pending work + has made progress within the threshold → stays Healthy.
+ Healthy,
+
+ /// Driver claims Healthy + has pending work + has NOT made progress within the threshold → wedged.
+ Faulted,
+}
diff --git a/tests/ZB.MOM.WW.OtOpcUa.Core.Abstractions.Tests/DriverTypeRegistryTests.cs b/tests/ZB.MOM.WW.OtOpcUa.Core.Abstractions.Tests/DriverTypeRegistryTests.cs
index 2a50d9d..372815f 100644
--- a/tests/ZB.MOM.WW.OtOpcUa.Core.Abstractions.Tests/DriverTypeRegistryTests.cs
+++ b/tests/ZB.MOM.WW.OtOpcUa.Core.Abstractions.Tests/DriverTypeRegistryTests.cs
@@ -7,11 +7,13 @@ public sealed class DriverTypeRegistryTests
{
private static DriverTypeMetadata SampleMetadata(
string typeName = "Modbus",
- NamespaceKindCompatibility allowed = NamespaceKindCompatibility.Equipment) =>
+ NamespaceKindCompatibility allowed = NamespaceKindCompatibility.Equipment,
+ DriverTier tier = DriverTier.B) =>
new(typeName, allowed,
DriverConfigJsonSchema: "{\"type\": \"object\"}",
DeviceConfigJsonSchema: "{\"type\": \"object\"}",
- TagConfigJsonSchema: "{\"type\": \"object\"}");
+ TagConfigJsonSchema: "{\"type\": \"object\"}",
+ Tier: tier);
[Fact]
public void Register_ThenGet_RoundTrips()
@@ -24,6 +26,20 @@ public sealed class DriverTypeRegistryTests
registry.Get("Modbus").ShouldBe(metadata);
}
+ [Theory]
+ [InlineData(DriverTier.A)]
+ [InlineData(DriverTier.B)]
+ [InlineData(DriverTier.C)]
+ public void Register_Requires_NonNullTier(DriverTier tier)
+ {
+ var registry = new DriverTypeRegistry();
+ var metadata = SampleMetadata(typeName: $"Driver-{tier}", tier: tier);
+
+ registry.Register(metadata);
+
+ registry.Get(metadata.TypeName).Tier.ShouldBe(tier);
+ }
+
[Fact]
public void Get_IsCaseInsensitive()
{
diff --git a/tests/ZB.MOM.WW.OtOpcUa.Core.Tests/Stability/MemoryRecycleTests.cs b/tests/ZB.MOM.WW.OtOpcUa.Core.Tests/Stability/MemoryRecycleTests.cs
new file mode 100644
index 0000000..00dbf43
--- /dev/null
+++ b/tests/ZB.MOM.WW.OtOpcUa.Core.Tests/Stability/MemoryRecycleTests.cs
@@ -0,0 +1,91 @@
+using Microsoft.Extensions.Logging.Abstractions;
+using Shouldly;
+using Xunit;
+using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
+using ZB.MOM.WW.OtOpcUa.Core.Stability;
+
+namespace ZB.MOM.WW.OtOpcUa.Core.Tests.Stability;
+
+[Trait("Category", "Unit")]
+public sealed class MemoryRecycleTests
+{
+ [Fact]
+ public async Task TierC_HardBreach_RequestsSupervisorRecycle()
+ {
+ var supervisor = new FakeSupervisor();
+ var recycle = new MemoryRecycle(DriverTier.C, supervisor, NullLogger.Instance);
+
+ var requested = await recycle.HandleAsync(MemoryTrackingAction.HardBreach, 2_000_000_000, CancellationToken.None);
+
+ requested.ShouldBeTrue();
+ supervisor.RecycleCount.ShouldBe(1);
+ supervisor.LastReason.ShouldContain("hard-breach");
+ }
+
+ [Theory]
+ [InlineData(DriverTier.A)]
+ [InlineData(DriverTier.B)]
+ public async Task InProcessTier_HardBreach_NeverRequestsRecycle(DriverTier tier)
+ {
+ var supervisor = new FakeSupervisor();
+ var recycle = new MemoryRecycle(tier, supervisor, NullLogger.Instance);
+
+ var requested = await recycle.HandleAsync(MemoryTrackingAction.HardBreach, 2_000_000_000, CancellationToken.None);
+
+ requested.ShouldBeFalse("Tier A/B hard-breach logs a promotion recommendation only (decisions #74, #145)");
+ supervisor.RecycleCount.ShouldBe(0);
+ }
+
+ [Fact]
+ public async Task TierC_WithoutSupervisor_HardBreach_NoOp()
+ {
+ var recycle = new MemoryRecycle(DriverTier.C, supervisor: null, NullLogger.Instance);
+
+ var requested = await recycle.HandleAsync(MemoryTrackingAction.HardBreach, 2_000_000_000, CancellationToken.None);
+
+ requested.ShouldBeFalse("no supervisor → no recycle path; action logged only");
+ }
+
+ [Theory]
+ [InlineData(DriverTier.A)]
+ [InlineData(DriverTier.B)]
+ [InlineData(DriverTier.C)]
+ public async Task SoftBreach_NeverRequestsRecycle(DriverTier tier)
+ {
+ var supervisor = new FakeSupervisor();
+ var recycle = new MemoryRecycle(tier, supervisor, NullLogger.Instance);
+
+ var requested = await recycle.HandleAsync(MemoryTrackingAction.SoftBreach, 1_000_000_000, CancellationToken.None);
+
+ requested.ShouldBeFalse("soft-breach is surface-only at every tier");
+ supervisor.RecycleCount.ShouldBe(0);
+ }
+
+ [Theory]
+ [InlineData(MemoryTrackingAction.None)]
+ [InlineData(MemoryTrackingAction.Warming)]
+ public async Task NonBreachActions_NoOp(MemoryTrackingAction action)
+ {
+ var supervisor = new FakeSupervisor();
+ var recycle = new MemoryRecycle(DriverTier.C, supervisor, NullLogger.Instance);
+
+ var requested = await recycle.HandleAsync(action, 100_000_000, CancellationToken.None);
+
+ requested.ShouldBeFalse();
+ supervisor.RecycleCount.ShouldBe(0);
+ }
+
+ private sealed class FakeSupervisor : IDriverSupervisor
+ {
+ public string DriverInstanceId => "fake-tier-c";
+ public int RecycleCount { get; private set; }
+ public string? LastReason { get; private set; }
+
+ public Task RecycleAsync(string reason, CancellationToken cancellationToken)
+ {
+ RecycleCount++;
+ LastReason = reason;
+ return Task.CompletedTask;
+ }
+ }
+}
diff --git a/tests/ZB.MOM.WW.OtOpcUa.Core.Tests/Stability/MemoryTrackingTests.cs b/tests/ZB.MOM.WW.OtOpcUa.Core.Tests/Stability/MemoryTrackingTests.cs
new file mode 100644
index 0000000..afd27e2
--- /dev/null
+++ b/tests/ZB.MOM.WW.OtOpcUa.Core.Tests/Stability/MemoryTrackingTests.cs
@@ -0,0 +1,119 @@
+using Shouldly;
+using Xunit;
+using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
+using ZB.MOM.WW.OtOpcUa.Core.Stability;
+
+namespace ZB.MOM.WW.OtOpcUa.Core.Tests.Stability;
+
+[Trait("Category", "Unit")]
+public sealed class MemoryTrackingTests
+{
+ private static readonly DateTime T0 = new(2026, 4, 19, 12, 0, 0, DateTimeKind.Utc);
+
+ [Fact]
+ public void WarmingUp_Returns_Warming_UntilWindowElapses()
+ {
+ var tracker = new MemoryTracking(DriverTier.A, TimeSpan.FromMinutes(5));
+
+ tracker.Sample(100_000_000, T0).ShouldBe(MemoryTrackingAction.Warming);
+ tracker.Sample(105_000_000, T0.AddMinutes(1)).ShouldBe(MemoryTrackingAction.Warming);
+ tracker.Sample(102_000_000, T0.AddMinutes(4.9)).ShouldBe(MemoryTrackingAction.Warming);
+
+ tracker.Phase.ShouldBe(TrackingPhase.WarmingUp);
+ tracker.BaselineBytes.ShouldBe(0);
+ }
+
+ [Fact]
+ public void WindowElapsed_CapturesBaselineAsMedian_AndTransitionsToSteady()
+ {
+ var tracker = new MemoryTracking(DriverTier.A, TimeSpan.FromMinutes(5));
+
+ tracker.Sample(100_000_000, T0);
+ tracker.Sample(200_000_000, T0.AddMinutes(1));
+ tracker.Sample(150_000_000, T0.AddMinutes(2));
+ var first = tracker.Sample(150_000_000, T0.AddMinutes(5));
+
+ tracker.Phase.ShouldBe(TrackingPhase.Steady);
+ tracker.BaselineBytes.ShouldBe(150_000_000L, "median of 4 samples [100, 200, 150, 150] = (150+150)/2 = 150");
+ first.ShouldBe(MemoryTrackingAction.None, "150 MB is the baseline itself, well under soft threshold");
+ }
+
+ [Theory]
+ [InlineData(DriverTier.A, 3, 50)]
+ [InlineData(DriverTier.B, 3, 100)]
+ [InlineData(DriverTier.C, 2, 500)]
+ public void GetTierConstants_MatchesDecision146(DriverTier tier, int expectedMultiplier, long expectedFloorMB)
+ {
+ var (multiplier, floor) = MemoryTracking.GetTierConstants(tier);
+ multiplier.ShouldBe(expectedMultiplier);
+ floor.ShouldBe(expectedFloorMB * 1024 * 1024);
+ }
+
+ [Fact]
+ public void SoftThreshold_UsesMax_OfMultiplierAndFloor_SmallBaseline()
+ {
+ // Tier A: mult=3, floor=50 MB. Baseline 10 MB → 3×10=30 MB < 10+50=60 MB → floor wins.
+ var tracker = WarmupWithBaseline(DriverTier.A, 10L * 1024 * 1024);
+ tracker.SoftThresholdBytes.ShouldBe(60L * 1024 * 1024);
+ }
+
+ [Fact]
+ public void SoftThreshold_UsesMax_OfMultiplierAndFloor_LargeBaseline()
+ {
+ // Tier A: mult=3, floor=50 MB. Baseline 200 MB → 3×200=600 MB > 200+50=250 MB → multiplier wins.
+ var tracker = WarmupWithBaseline(DriverTier.A, 200L * 1024 * 1024);
+ tracker.SoftThresholdBytes.ShouldBe(600L * 1024 * 1024);
+ }
+
+ [Fact]
+ public void HardThreshold_IsTwiceSoft()
+ {
+ var tracker = WarmupWithBaseline(DriverTier.B, 200L * 1024 * 1024);
+ tracker.HardThresholdBytes.ShouldBe(tracker.SoftThresholdBytes * 2);
+ }
+
+ [Fact]
+ public void Sample_Below_Soft_Returns_None()
+ {
+ var tracker = WarmupWithBaseline(DriverTier.A, 100L * 1024 * 1024);
+
+ tracker.Sample(200L * 1024 * 1024, T0.AddMinutes(10)).ShouldBe(MemoryTrackingAction.None);
+ }
+
+ [Fact]
+ public void Sample_AtSoft_Returns_SoftBreach()
+ {
+ // Tier A, baseline 200 MB → soft = 600 MB. Sample exactly at soft.
+ var tracker = WarmupWithBaseline(DriverTier.A, 200L * 1024 * 1024);
+
+ tracker.Sample(tracker.SoftThresholdBytes, T0.AddMinutes(10))
+ .ShouldBe(MemoryTrackingAction.SoftBreach);
+ }
+
+ [Fact]
+ public void Sample_AtHard_Returns_HardBreach()
+ {
+ var tracker = WarmupWithBaseline(DriverTier.A, 200L * 1024 * 1024);
+
+ tracker.Sample(tracker.HardThresholdBytes, T0.AddMinutes(10))
+ .ShouldBe(MemoryTrackingAction.HardBreach);
+ }
+
+ [Fact]
+ public void Sample_AboveHard_Returns_HardBreach()
+ {
+ var tracker = WarmupWithBaseline(DriverTier.A, 200L * 1024 * 1024);
+
+ tracker.Sample(tracker.HardThresholdBytes + 100_000_000, T0.AddMinutes(10))
+ .ShouldBe(MemoryTrackingAction.HardBreach);
+ }
+
+ private static MemoryTracking WarmupWithBaseline(DriverTier tier, long baseline)
+ {
+ var tracker = new MemoryTracking(tier, TimeSpan.FromMinutes(5));
+ tracker.Sample(baseline, T0);
+ tracker.Sample(baseline, T0.AddMinutes(5));
+ tracker.BaselineBytes.ShouldBe(baseline);
+ return tracker;
+ }
+}
diff --git a/tests/ZB.MOM.WW.OtOpcUa.Core.Tests/Stability/ScheduledRecycleSchedulerTests.cs b/tests/ZB.MOM.WW.OtOpcUa.Core.Tests/Stability/ScheduledRecycleSchedulerTests.cs
new file mode 100644
index 0000000..7cb16f4
--- /dev/null
+++ b/tests/ZB.MOM.WW.OtOpcUa.Core.Tests/Stability/ScheduledRecycleSchedulerTests.cs
@@ -0,0 +1,101 @@
+using Microsoft.Extensions.Logging.Abstractions;
+using Shouldly;
+using Xunit;
+using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
+using ZB.MOM.WW.OtOpcUa.Core.Stability;
+
+namespace ZB.MOM.WW.OtOpcUa.Core.Tests.Stability;
+
+[Trait("Category", "Unit")]
+public sealed class ScheduledRecycleSchedulerTests
+{
+ private static readonly DateTime T0 = new(2026, 4, 19, 0, 0, 0, DateTimeKind.Utc);
+ private static readonly TimeSpan Weekly = TimeSpan.FromDays(7);
+
+ [Theory]
+ [InlineData(DriverTier.A)]
+ [InlineData(DriverTier.B)]
+ public void TierAOrB_Ctor_Throws(DriverTier tier)
+ {
+ var supervisor = new FakeSupervisor();
+ Should.Throw(() => new ScheduledRecycleScheduler(
+ tier, Weekly, T0, supervisor, NullLogger.Instance));
+ }
+
+ [Fact]
+ public void ZeroOrNegativeInterval_Throws()
+ {
+ var supervisor = new FakeSupervisor();
+ Should.Throw(() => new ScheduledRecycleScheduler(
+ DriverTier.C, TimeSpan.Zero, T0, supervisor, NullLogger.Instance));
+ Should.Throw(() => new ScheduledRecycleScheduler(
+ DriverTier.C, TimeSpan.FromSeconds(-1), T0, supervisor, NullLogger.Instance));
+ }
+
+ [Fact]
+ public async Task Tick_BeforeNextRecycle_NoOp()
+ {
+ var supervisor = new FakeSupervisor();
+ var sch = new ScheduledRecycleScheduler(DriverTier.C, Weekly, T0, supervisor, NullLogger.Instance);
+
+ var fired = await sch.TickAsync(T0 + TimeSpan.FromDays(6), CancellationToken.None);
+
+ fired.ShouldBeFalse();
+ supervisor.RecycleCount.ShouldBe(0);
+ }
+
+ [Fact]
+ public async Task Tick_AtOrAfterNextRecycle_FiresOnce_AndAdvances()
+ {
+ var supervisor = new FakeSupervisor();
+ var sch = new ScheduledRecycleScheduler(DriverTier.C, Weekly, T0, supervisor, NullLogger.Instance);
+
+ var fired = await sch.TickAsync(T0 + Weekly + TimeSpan.FromMinutes(1), CancellationToken.None);
+
+ fired.ShouldBeTrue();
+ supervisor.RecycleCount.ShouldBe(1);
+ sch.NextRecycleUtc.ShouldBe(T0 + Weekly + Weekly);
+ }
+
+ [Fact]
+ public async Task RequestRecycleNow_Fires_Immediately_WithoutAdvancingSchedule()
+ {
+ var supervisor = new FakeSupervisor();
+ var sch = new ScheduledRecycleScheduler(DriverTier.C, Weekly, T0, supervisor, NullLogger.Instance);
+ var nextBefore = sch.NextRecycleUtc;
+
+ await sch.RequestRecycleNowAsync("memory hard-breach", CancellationToken.None);
+
+ supervisor.RecycleCount.ShouldBe(1);
+ supervisor.LastReason.ShouldBe("memory hard-breach");
+ sch.NextRecycleUtc.ShouldBe(nextBefore, "ad-hoc recycle doesn't shift the cron schedule");
+ }
+
+ [Fact]
+ public async Task MultipleFires_AcrossTicks_AdvanceOneIntervalEach()
+ {
+ var supervisor = new FakeSupervisor();
+ var sch = new ScheduledRecycleScheduler(DriverTier.C, TimeSpan.FromDays(1), T0, supervisor, NullLogger.Instance);
+
+ await sch.TickAsync(T0 + TimeSpan.FromDays(1) + TimeSpan.FromHours(1), CancellationToken.None);
+ await sch.TickAsync(T0 + TimeSpan.FromDays(2) + TimeSpan.FromHours(1), CancellationToken.None);
+ await sch.TickAsync(T0 + TimeSpan.FromDays(3) + TimeSpan.FromHours(1), CancellationToken.None);
+
+ supervisor.RecycleCount.ShouldBe(3);
+ sch.NextRecycleUtc.ShouldBe(T0 + TimeSpan.FromDays(4));
+ }
+
+ private sealed class FakeSupervisor : IDriverSupervisor
+ {
+ public string DriverInstanceId => "tier-c-fake";
+ public int RecycleCount { get; private set; }
+ public string? LastReason { get; private set; }
+
+ public Task RecycleAsync(string reason, CancellationToken cancellationToken)
+ {
+ RecycleCount++;
+ LastReason = reason;
+ return Task.CompletedTask;
+ }
+ }
+}
diff --git a/tests/ZB.MOM.WW.OtOpcUa.Core.Tests/Stability/WedgeDetectorTests.cs b/tests/ZB.MOM.WW.OtOpcUa.Core.Tests/Stability/WedgeDetectorTests.cs
new file mode 100644
index 0000000..c0c2a79
--- /dev/null
+++ b/tests/ZB.MOM.WW.OtOpcUa.Core.Tests/Stability/WedgeDetectorTests.cs
@@ -0,0 +1,112 @@
+using Shouldly;
+using Xunit;
+using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
+using ZB.MOM.WW.OtOpcUa.Core.Stability;
+
+namespace ZB.MOM.WW.OtOpcUa.Core.Tests.Stability;
+
+[Trait("Category", "Unit")]
+public sealed class WedgeDetectorTests
+{
+ private static readonly DateTime Now = new(2026, 4, 19, 12, 0, 0, DateTimeKind.Utc);
+ private static readonly TimeSpan Threshold = TimeSpan.FromSeconds(120);
+
+ [Fact]
+ public void SubSixtySecondThreshold_ClampsToSixty()
+ {
+ var detector = new WedgeDetector(TimeSpan.FromSeconds(10));
+ detector.Threshold.ShouldBe(TimeSpan.FromSeconds(60));
+ }
+
+ [Fact]
+ public void Unhealthy_Driver_AlwaysNotApplicable()
+ {
+ var detector = new WedgeDetector(Threshold);
+ var demand = new DemandSignal(BulkheadDepth: 5, ActiveMonitoredItems: 10, QueuedHistoryReads: 0, LastProgressUtc: Now.AddMinutes(-10));
+
+ detector.Classify(DriverState.Faulted, demand, Now).ShouldBe(WedgeVerdict.NotApplicable);
+ detector.Classify(DriverState.Degraded, demand, Now).ShouldBe(WedgeVerdict.NotApplicable);
+ detector.Classify(DriverState.Initializing, demand, Now).ShouldBe(WedgeVerdict.NotApplicable);
+ }
+
+ [Fact]
+ public void Idle_Subscription_Only_StaysIdle()
+ {
+ // Idle driver: bulkhead 0, monitored items 0, no history reads queued.
+ // Even if LastProgressUtc is ancient, the verdict is Idle, not Faulted.
+ var detector = new WedgeDetector(Threshold);
+ var demand = new DemandSignal(0, 0, 0, Now.AddHours(-12));
+
+ detector.Classify(DriverState.Healthy, demand, Now).ShouldBe(WedgeVerdict.Idle);
+ }
+
+ [Fact]
+ public void PendingWork_WithRecentProgress_StaysHealthy()
+ {
+ var detector = new WedgeDetector(Threshold);
+ var demand = new DemandSignal(BulkheadDepth: 2, ActiveMonitoredItems: 0, QueuedHistoryReads: 0, LastProgressUtc: Now.AddSeconds(-30));
+
+ detector.Classify(DriverState.Healthy, demand, Now).ShouldBe(WedgeVerdict.Healthy);
+ }
+
+ [Fact]
+ public void PendingWork_WithStaleProgress_IsFaulted()
+ {
+ var detector = new WedgeDetector(Threshold);
+ var demand = new DemandSignal(BulkheadDepth: 2, ActiveMonitoredItems: 0, QueuedHistoryReads: 0, LastProgressUtc: Now.AddMinutes(-5));
+
+ detector.Classify(DriverState.Healthy, demand, Now).ShouldBe(WedgeVerdict.Faulted);
+ }
+
+ [Fact]
+ public void MonitoredItems_Active_ButNoRecentPublish_IsFaulted()
+ {
+ // Subscription-only driver with live MonitoredItems but no publish progress within threshold
+ // is a real wedge — this is the case the previous "no successful Read" formulation used
+ // to miss (no reads ever happen).
+ var detector = new WedgeDetector(Threshold);
+ var demand = new DemandSignal(BulkheadDepth: 0, ActiveMonitoredItems: 5, QueuedHistoryReads: 0, LastProgressUtc: Now.AddMinutes(-10));
+
+ detector.Classify(DriverState.Healthy, demand, Now).ShouldBe(WedgeVerdict.Faulted);
+ }
+
+ [Fact]
+ public void MonitoredItems_Active_WithFreshPublish_StaysHealthy()
+ {
+ var detector = new WedgeDetector(Threshold);
+ var demand = new DemandSignal(BulkheadDepth: 0, ActiveMonitoredItems: 5, QueuedHistoryReads: 0, LastProgressUtc: Now.AddSeconds(-10));
+
+ detector.Classify(DriverState.Healthy, demand, Now).ShouldBe(WedgeVerdict.Healthy);
+ }
+
+ [Fact]
+ public void HistoryBackfill_SlowButMakingProgress_StaysHealthy()
+ {
+ // Slow historian backfill — QueuedHistoryReads > 0 but progress advances within threshold.
+ var detector = new WedgeDetector(Threshold);
+ var demand = new DemandSignal(BulkheadDepth: 0, ActiveMonitoredItems: 0, QueuedHistoryReads: 50, LastProgressUtc: Now.AddSeconds(-60));
+
+ detector.Classify(DriverState.Healthy, demand, Now).ShouldBe(WedgeVerdict.Healthy);
+ }
+
+ [Fact]
+ public void WriteOnlyBurst_StaysIdle_WhenBulkheadEmpty()
+ {
+ // A write-only driver that just finished a burst: bulkhead drained, no subscriptions, no
+ // history reads. Idle — the previous formulation would have faulted here because no
+ // reads were succeeding even though the driver is perfectly healthy.
+ var detector = new WedgeDetector(Threshold);
+ var demand = new DemandSignal(0, 0, 0, Now.AddMinutes(-30));
+
+ detector.Classify(DriverState.Healthy, demand, Now).ShouldBe(WedgeVerdict.Idle);
+ }
+
+ [Fact]
+ public void DemandSignal_HasPendingWork_TrueForAnyNonZeroCounter()
+ {
+ new DemandSignal(1, 0, 0, Now).HasPendingWork.ShouldBeTrue();
+ new DemandSignal(0, 1, 0, Now).HasPendingWork.ShouldBeTrue();
+ new DemandSignal(0, 0, 1, Now).HasPendingWork.ShouldBeTrue();
+ new DemandSignal(0, 0, 0, Now).HasPendingWork.ShouldBeFalse();
+ }
+}