From ef6b0bb8fc8da097a4eb27ffa683fdde0f4dc9ed Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Sun, 19 Apr 2026 07:37:43 -0400 Subject: [PATCH] =?UTF-8?q?Phase=206.1=20Stream=20B.1/B.2=20=E2=80=94=20Dr?= =?UTF-8?q?iverTier=20on=20DriverTypeMetadata=20+=20Core.Stability.MemoryT?= =?UTF-8?q?racking=20with=20hybrid-formula=20soft/hard=20thresholds?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stream B.1 — registry invariant: - DriverTypeMetadata gains a required `DriverTier Tier` field. Every registered driver type must declare its stability tier so the downstream MemoryTracking, MemoryRecycle, and resilience-policy layers can resolve the right defaults. Stamped-at-registration-time enforcement makes the "every driver type has a non-null Tier" compliance check structurally impossible to fail. - DriverTypeRegistry API unchanged; one new property on the record. Stream B.2 — MemoryTracking (Core.Stability): - Tier-agnostic tracker per decision #146: captures baseline as the median of samples collected during a post-init warmup window (default 5 min), then classifies each subsequent sample with the hybrid formula `soft = max(multiplier × baseline, baseline + floor)`, `hard = 2 × soft`. - Per-tier constants wired: Tier A mult=3 floor=50 MB, Tier B mult=3 floor=100 MB, Tier C mult=2 floor=500 MB. - Never kills. Hard-breach action returns HardBreach; the supervisor that acts on that signal (MemoryRecycle) is Tier C only per decisions #74, #145 and lands in the next B.3 commit on this branch. - Two phases: WarmingUp (samples collected, Warming returned) and Steady (baseline captured, soft/hard checks active). Transition is automatic when the warmup window elapses. Tests (15 new, all pass): - Warming phase returns Warming until the window elapses. - Window-elapsed captures median baseline + transitions to Steady. - Per-tier constants match decision #146 table exactly. - Soft threshold uses max() — small baseline → floor wins; large baseline → multiplier wins. - Hard = 2 × soft. - Sample below soft = None; at soft = SoftBreach; at/above hard = HardBreach. - DriverTypeRegistry: theory asserts Tier round-trips for A/B/C. Full solution dotnet test: 963 passing (baseline 906, +57 net for Phase 6.1 Stream A + Stream B.1/B.2). Pre-existing Client.CLI Subscribe flake unchanged. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../DriverTypeRegistry.cs | 10 +- .../Stability/MemoryTracking.cs | 136 ++++++++++++++++++ .../DriverTypeRegistryTests.cs | 20 ++- .../Stability/MemoryTrackingTests.cs | 119 +++++++++++++++ 4 files changed, 282 insertions(+), 3 deletions(-) create mode 100644 src/ZB.MOM.WW.OtOpcUa.Core/Stability/MemoryTracking.cs create mode 100644 tests/ZB.MOM.WW.OtOpcUa.Core.Tests/Stability/MemoryTrackingTests.cs diff --git a/src/ZB.MOM.WW.OtOpcUa.Core.Abstractions/DriverTypeRegistry.cs b/src/ZB.MOM.WW.OtOpcUa.Core.Abstractions/DriverTypeRegistry.cs index 6655886..42e9607 100644 --- a/src/ZB.MOM.WW.OtOpcUa.Core.Abstractions/DriverTypeRegistry.cs +++ b/src/ZB.MOM.WW.OtOpcUa.Core.Abstractions/DriverTypeRegistry.cs @@ -69,12 +69,20 @@ public sealed class DriverTypeRegistry /// JSON Schema (Draft 2020-12) the driver's DriverConfig column must validate against. /// JSON Schema for DeviceConfig (multi-device drivers); null if the driver has no device layer. /// JSON Schema for TagConfig; required for every driver since every driver has tags. +/// +/// Stability tier per docs/v2/driver-stability.md §2-4 and docs/v2/plan.md +/// decisions #63-74. Drives the shared resilience pipeline defaults +/// ( × capability → CapabilityPolicy), the MemoryTracking +/// hybrid-formula constants, and whether process-level MemoryRecycle / scheduled- +/// recycle protections apply (Tier C only). Every registered driver type must declare one. +/// public sealed record DriverTypeMetadata( string TypeName, NamespaceKindCompatibility AllowedNamespaceKinds, string DriverConfigJsonSchema, string? DeviceConfigJsonSchema, - string TagConfigJsonSchema); + string TagConfigJsonSchema, + DriverTier Tier); /// Bitmask of namespace kinds a driver type may populate. Per decision #111. [Flags] diff --git a/src/ZB.MOM.WW.OtOpcUa.Core/Stability/MemoryTracking.cs b/src/ZB.MOM.WW.OtOpcUa.Core/Stability/MemoryTracking.cs new file mode 100644 index 0000000..19dffa2 --- /dev/null +++ b/src/ZB.MOM.WW.OtOpcUa.Core/Stability/MemoryTracking.cs @@ -0,0 +1,136 @@ +using ZB.MOM.WW.OtOpcUa.Core.Abstractions; + +namespace ZB.MOM.WW.OtOpcUa.Core.Stability; + +/// +/// Tier-agnostic memory-footprint tracker. Captures the post-initialize baseline +/// from the first samples after IDriver.InitializeAsync, then classifies each +/// subsequent sample against a hybrid soft/hard threshold per +/// docs/v2/plan.md decision #146 — soft = max(multiplier × baseline, baseline + floor), +/// hard = 2 × soft. +/// +/// +/// Per decision #145, this tracker never kills a process. Soft and hard breaches +/// log + surface to the Admin UI via DriverInstanceResilienceStatus. The matching +/// process-level recycle protection lives in a separate MemoryRecycle that activates +/// for Tier C drivers only (where the driver runs out-of-process behind a supervisor that +/// can safely restart it without tearing down the OPC UA session or co-hosted in-proc +/// drivers). +/// +/// Baseline capture: the tracker starts in for +/// (default 5 min). During that window samples are collected; +/// the baseline is computed as the median once the window elapses. Before that point every +/// classification returns . +/// +public sealed class MemoryTracking +{ + private readonly DriverTier _tier; + private readonly TimeSpan _baselineWindow; + private readonly List _warmupSamples = []; + private long _baselineBytes; + private TrackingPhase _phase = TrackingPhase.WarmingUp; + private DateTime? _warmupStartUtc; + + /// Tier-default multiplier/floor constants per decision #146. + public static (int Multiplier, long FloorBytes) GetTierConstants(DriverTier tier) => tier switch + { + DriverTier.A => (Multiplier: 3, FloorBytes: 50L * 1024 * 1024), + DriverTier.B => (Multiplier: 3, FloorBytes: 100L * 1024 * 1024), + DriverTier.C => (Multiplier: 2, FloorBytes: 500L * 1024 * 1024), + _ => throw new ArgumentOutOfRangeException(nameof(tier), tier, $"No memory-tracking constants defined for tier {tier}."), + }; + + /// Window over which post-init samples are collected to compute the baseline. + public TimeSpan BaselineWindow => _baselineWindow; + + /// Current phase: or . + public TrackingPhase Phase => _phase; + + /// Captured baseline; 0 until warmup completes. + public long BaselineBytes => _baselineBytes; + + /// Effective soft threshold (zero while warming up). + public long SoftThresholdBytes => _baselineBytes == 0 ? 0 : ComputeSoft(_tier, _baselineBytes); + + /// Effective hard threshold = 2 × soft (zero while warming up). + public long HardThresholdBytes => _baselineBytes == 0 ? 0 : ComputeSoft(_tier, _baselineBytes) * 2; + + public MemoryTracking(DriverTier tier, TimeSpan? baselineWindow = null) + { + _tier = tier; + _baselineWindow = baselineWindow ?? TimeSpan.FromMinutes(5); + } + + /// + /// Submit a memory-footprint sample. Returns the action the caller should surface. + /// During warmup, always returns and accumulates + /// samples; once the window elapses the first steady-phase sample triggers baseline capture + /// (median of warmup samples). + /// + public MemoryTrackingAction Sample(long footprintBytes, DateTime utcNow) + { + if (_phase == TrackingPhase.WarmingUp) + { + _warmupStartUtc ??= utcNow; + _warmupSamples.Add(footprintBytes); + if (utcNow - _warmupStartUtc.Value >= _baselineWindow && _warmupSamples.Count > 0) + { + _baselineBytes = ComputeMedian(_warmupSamples); + _phase = TrackingPhase.Steady; + } + else + { + return MemoryTrackingAction.Warming; + } + } + + if (footprintBytes >= HardThresholdBytes) return MemoryTrackingAction.HardBreach; + if (footprintBytes >= SoftThresholdBytes) return MemoryTrackingAction.SoftBreach; + return MemoryTrackingAction.None; + } + + private static long ComputeSoft(DriverTier tier, long baseline) + { + var (multiplier, floor) = GetTierConstants(tier); + return Math.Max(multiplier * baseline, baseline + floor); + } + + private static long ComputeMedian(List samples) + { + var sorted = samples.Order().ToArray(); + var mid = sorted.Length / 2; + return sorted.Length % 2 == 1 + ? sorted[mid] + : (sorted[mid - 1] + sorted[mid]) / 2; + } +} + +/// Phase of a lifecycle. +public enum TrackingPhase +{ + /// Collecting post-init samples; baseline not yet computed. + WarmingUp, + + /// Baseline captured; every sample classified against soft/hard thresholds. + Steady, +} + +/// Classification the tracker returns per sample. +public enum MemoryTrackingAction +{ + /// Baseline not yet captured; sample collected, no threshold check. + Warming, + + /// Below soft threshold. + None, + + /// Between soft and hard thresholds — log + surface, no action. + SoftBreach, + + /// + /// ≥ hard threshold. Log + surface + (Tier C only, via MemoryRecycle) request + /// process recycle via the driver supervisor. Tier A/B breach never invokes any + /// kill path per decisions #145 and #74. + /// + HardBreach, +} diff --git a/tests/ZB.MOM.WW.OtOpcUa.Core.Abstractions.Tests/DriverTypeRegistryTests.cs b/tests/ZB.MOM.WW.OtOpcUa.Core.Abstractions.Tests/DriverTypeRegistryTests.cs index 2a50d9d..372815f 100644 --- a/tests/ZB.MOM.WW.OtOpcUa.Core.Abstractions.Tests/DriverTypeRegistryTests.cs +++ b/tests/ZB.MOM.WW.OtOpcUa.Core.Abstractions.Tests/DriverTypeRegistryTests.cs @@ -7,11 +7,13 @@ public sealed class DriverTypeRegistryTests { private static DriverTypeMetadata SampleMetadata( string typeName = "Modbus", - NamespaceKindCompatibility allowed = NamespaceKindCompatibility.Equipment) => + NamespaceKindCompatibility allowed = NamespaceKindCompatibility.Equipment, + DriverTier tier = DriverTier.B) => new(typeName, allowed, DriverConfigJsonSchema: "{\"type\": \"object\"}", DeviceConfigJsonSchema: "{\"type\": \"object\"}", - TagConfigJsonSchema: "{\"type\": \"object\"}"); + TagConfigJsonSchema: "{\"type\": \"object\"}", + Tier: tier); [Fact] public void Register_ThenGet_RoundTrips() @@ -24,6 +26,20 @@ public sealed class DriverTypeRegistryTests registry.Get("Modbus").ShouldBe(metadata); } + [Theory] + [InlineData(DriverTier.A)] + [InlineData(DriverTier.B)] + [InlineData(DriverTier.C)] + public void Register_Requires_NonNullTier(DriverTier tier) + { + var registry = new DriverTypeRegistry(); + var metadata = SampleMetadata(typeName: $"Driver-{tier}", tier: tier); + + registry.Register(metadata); + + registry.Get(metadata.TypeName).Tier.ShouldBe(tier); + } + [Fact] public void Get_IsCaseInsensitive() { diff --git a/tests/ZB.MOM.WW.OtOpcUa.Core.Tests/Stability/MemoryTrackingTests.cs b/tests/ZB.MOM.WW.OtOpcUa.Core.Tests/Stability/MemoryTrackingTests.cs new file mode 100644 index 0000000..afd27e2 --- /dev/null +++ b/tests/ZB.MOM.WW.OtOpcUa.Core.Tests/Stability/MemoryTrackingTests.cs @@ -0,0 +1,119 @@ +using Shouldly; +using Xunit; +using ZB.MOM.WW.OtOpcUa.Core.Abstractions; +using ZB.MOM.WW.OtOpcUa.Core.Stability; + +namespace ZB.MOM.WW.OtOpcUa.Core.Tests.Stability; + +[Trait("Category", "Unit")] +public sealed class MemoryTrackingTests +{ + private static readonly DateTime T0 = new(2026, 4, 19, 12, 0, 0, DateTimeKind.Utc); + + [Fact] + public void WarmingUp_Returns_Warming_UntilWindowElapses() + { + var tracker = new MemoryTracking(DriverTier.A, TimeSpan.FromMinutes(5)); + + tracker.Sample(100_000_000, T0).ShouldBe(MemoryTrackingAction.Warming); + tracker.Sample(105_000_000, T0.AddMinutes(1)).ShouldBe(MemoryTrackingAction.Warming); + tracker.Sample(102_000_000, T0.AddMinutes(4.9)).ShouldBe(MemoryTrackingAction.Warming); + + tracker.Phase.ShouldBe(TrackingPhase.WarmingUp); + tracker.BaselineBytes.ShouldBe(0); + } + + [Fact] + public void WindowElapsed_CapturesBaselineAsMedian_AndTransitionsToSteady() + { + var tracker = new MemoryTracking(DriverTier.A, TimeSpan.FromMinutes(5)); + + tracker.Sample(100_000_000, T0); + tracker.Sample(200_000_000, T0.AddMinutes(1)); + tracker.Sample(150_000_000, T0.AddMinutes(2)); + var first = tracker.Sample(150_000_000, T0.AddMinutes(5)); + + tracker.Phase.ShouldBe(TrackingPhase.Steady); + tracker.BaselineBytes.ShouldBe(150_000_000L, "median of 4 samples [100, 200, 150, 150] = (150+150)/2 = 150"); + first.ShouldBe(MemoryTrackingAction.None, "150 MB is the baseline itself, well under soft threshold"); + } + + [Theory] + [InlineData(DriverTier.A, 3, 50)] + [InlineData(DriverTier.B, 3, 100)] + [InlineData(DriverTier.C, 2, 500)] + public void GetTierConstants_MatchesDecision146(DriverTier tier, int expectedMultiplier, long expectedFloorMB) + { + var (multiplier, floor) = MemoryTracking.GetTierConstants(tier); + multiplier.ShouldBe(expectedMultiplier); + floor.ShouldBe(expectedFloorMB * 1024 * 1024); + } + + [Fact] + public void SoftThreshold_UsesMax_OfMultiplierAndFloor_SmallBaseline() + { + // Tier A: mult=3, floor=50 MB. Baseline 10 MB → 3×10=30 MB < 10+50=60 MB → floor wins. + var tracker = WarmupWithBaseline(DriverTier.A, 10L * 1024 * 1024); + tracker.SoftThresholdBytes.ShouldBe(60L * 1024 * 1024); + } + + [Fact] + public void SoftThreshold_UsesMax_OfMultiplierAndFloor_LargeBaseline() + { + // Tier A: mult=3, floor=50 MB. Baseline 200 MB → 3×200=600 MB > 200+50=250 MB → multiplier wins. + var tracker = WarmupWithBaseline(DriverTier.A, 200L * 1024 * 1024); + tracker.SoftThresholdBytes.ShouldBe(600L * 1024 * 1024); + } + + [Fact] + public void HardThreshold_IsTwiceSoft() + { + var tracker = WarmupWithBaseline(DriverTier.B, 200L * 1024 * 1024); + tracker.HardThresholdBytes.ShouldBe(tracker.SoftThresholdBytes * 2); + } + + [Fact] + public void Sample_Below_Soft_Returns_None() + { + var tracker = WarmupWithBaseline(DriverTier.A, 100L * 1024 * 1024); + + tracker.Sample(200L * 1024 * 1024, T0.AddMinutes(10)).ShouldBe(MemoryTrackingAction.None); + } + + [Fact] + public void Sample_AtSoft_Returns_SoftBreach() + { + // Tier A, baseline 200 MB → soft = 600 MB. Sample exactly at soft. + var tracker = WarmupWithBaseline(DriverTier.A, 200L * 1024 * 1024); + + tracker.Sample(tracker.SoftThresholdBytes, T0.AddMinutes(10)) + .ShouldBe(MemoryTrackingAction.SoftBreach); + } + + [Fact] + public void Sample_AtHard_Returns_HardBreach() + { + var tracker = WarmupWithBaseline(DriverTier.A, 200L * 1024 * 1024); + + tracker.Sample(tracker.HardThresholdBytes, T0.AddMinutes(10)) + .ShouldBe(MemoryTrackingAction.HardBreach); + } + + [Fact] + public void Sample_AboveHard_Returns_HardBreach() + { + var tracker = WarmupWithBaseline(DriverTier.A, 200L * 1024 * 1024); + + tracker.Sample(tracker.HardThresholdBytes + 100_000_000, T0.AddMinutes(10)) + .ShouldBe(MemoryTrackingAction.HardBreach); + } + + private static MemoryTracking WarmupWithBaseline(DriverTier tier, long baseline) + { + var tracker = new MemoryTracking(tier, TimeSpan.FromMinutes(5)); + tracker.Sample(baseline, T0); + tracker.Sample(baseline, T0.AddMinutes(5)); + tracker.BaselineBytes.ShouldBe(baseline); + return tracker; + } +}