Merge pull request (#82) - Phase 6.1 Stream E data layer
This commit was merged in pull request #82.
This commit is contained in:
@@ -0,0 +1,44 @@
|
||||
namespace ZB.MOM.WW.OtOpcUa.Configuration.Entities;
|
||||
|
||||
/// <summary>
|
||||
/// Runtime resilience counters the CapabilityInvoker + MemoryTracking + MemoryRecycle
|
||||
/// surfaces for each <c>(DriverInstanceId, HostName)</c> pair. Separate from
|
||||
/// <see cref="DriverHostStatus"/> (which owns per-host <i>connectivity</i> state) so a
|
||||
/// host that's Running but has tripped its breaker or is approaching its memory ceiling
|
||||
/// shows up distinctly on Admin <c>/hosts</c>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Per <c>docs/v2/implementation/phase-6-1-resilience-and-observability.md</c> §Stream E.1.
|
||||
/// The Admin UI left-joins this table on DriverHostStatus for display; rows are written
|
||||
/// by the runtime via a HostedService that samples the tracker at a configurable
|
||||
/// interval (default 5 s) — writes are non-critical, a missed sample is tolerated.
|
||||
/// </remarks>
|
||||
public sealed class DriverInstanceResilienceStatus
|
||||
{
|
||||
public required string DriverInstanceId { get; set; }
|
||||
public required string HostName { get; set; }
|
||||
|
||||
/// <summary>Most recent time the circuit breaker for this (instance, host) opened; null if never.</summary>
|
||||
public DateTime? LastCircuitBreakerOpenUtc { get; set; }
|
||||
|
||||
/// <summary>Rolling count of consecutive Polly pipeline failures for this (instance, host).</summary>
|
||||
public int ConsecutiveFailures { get; set; }
|
||||
|
||||
/// <summary>Current Polly bulkhead depth (in-flight calls) for this (instance, host).</summary>
|
||||
public int CurrentBulkheadDepth { get; set; }
|
||||
|
||||
/// <summary>Most recent process recycle time (Tier C only; null for in-process tiers).</summary>
|
||||
public DateTime? LastRecycleUtc { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Post-init memory baseline captured by <c>MemoryTracking</c> (median of first
|
||||
/// BaselineWindow samples). Zero while still warming up.
|
||||
/// </summary>
|
||||
public long BaselineFootprintBytes { get; set; }
|
||||
|
||||
/// <summary>Most recent footprint sample the tracker saw (steady-state read).</summary>
|
||||
public long CurrentFootprintBytes { get; set; }
|
||||
|
||||
/// <summary>Row last-write timestamp — advances on every sampling tick.</summary>
|
||||
public DateTime LastSampledUtc { get; set; }
|
||||
}
|
||||
1287
src/ZB.MOM.WW.OtOpcUa.Configuration/Migrations/20260419124034_AddDriverInstanceResilienceStatus.Designer.cs
generated
Normal file
1287
src/ZB.MOM.WW.OtOpcUa.Configuration/Migrations/20260419124034_AddDriverInstanceResilienceStatus.Designer.cs
generated
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,46 @@
|
||||
using System;
|
||||
using Microsoft.EntityFrameworkCore.Migrations;
|
||||
|
||||
#nullable disable
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Configuration.Migrations
|
||||
{
|
||||
/// <inheritdoc />
|
||||
public partial class AddDriverInstanceResilienceStatus : Migration
|
||||
{
|
||||
/// <inheritdoc />
|
||||
protected override void Up(MigrationBuilder migrationBuilder)
|
||||
{
|
||||
migrationBuilder.CreateTable(
|
||||
name: "DriverInstanceResilienceStatus",
|
||||
columns: table => new
|
||||
{
|
||||
DriverInstanceId = table.Column<string>(type: "nvarchar(64)", maxLength: 64, nullable: false),
|
||||
HostName = table.Column<string>(type: "nvarchar(256)", maxLength: 256, nullable: false),
|
||||
LastCircuitBreakerOpenUtc = table.Column<DateTime>(type: "datetime2(3)", nullable: true),
|
||||
ConsecutiveFailures = table.Column<int>(type: "int", nullable: false),
|
||||
CurrentBulkheadDepth = table.Column<int>(type: "int", nullable: false),
|
||||
LastRecycleUtc = table.Column<DateTime>(type: "datetime2(3)", nullable: true),
|
||||
BaselineFootprintBytes = table.Column<long>(type: "bigint", nullable: false),
|
||||
CurrentFootprintBytes = table.Column<long>(type: "bigint", nullable: false),
|
||||
LastSampledUtc = table.Column<DateTime>(type: "datetime2(3)", nullable: false)
|
||||
},
|
||||
constraints: table =>
|
||||
{
|
||||
table.PrimaryKey("PK_DriverInstanceResilienceStatus", x => new { x.DriverInstanceId, x.HostName });
|
||||
});
|
||||
|
||||
migrationBuilder.CreateIndex(
|
||||
name: "IX_DriverResilience_LastSampled",
|
||||
table: "DriverInstanceResilienceStatus",
|
||||
column: "LastSampledUtc");
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
protected override void Down(MigrationBuilder migrationBuilder)
|
||||
{
|
||||
migrationBuilder.DropTable(
|
||||
name: "DriverInstanceResilienceStatus");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -434,6 +434,45 @@ namespace ZB.MOM.WW.OtOpcUa.Configuration.Migrations
|
||||
});
|
||||
});
|
||||
|
||||
modelBuilder.Entity("ZB.MOM.WW.OtOpcUa.Configuration.Entities.DriverInstanceResilienceStatus", b =>
|
||||
{
|
||||
b.Property<string>("DriverInstanceId")
|
||||
.HasMaxLength(64)
|
||||
.HasColumnType("nvarchar(64)");
|
||||
|
||||
b.Property<string>("HostName")
|
||||
.HasMaxLength(256)
|
||||
.HasColumnType("nvarchar(256)");
|
||||
|
||||
b.Property<long>("BaselineFootprintBytes")
|
||||
.HasColumnType("bigint");
|
||||
|
||||
b.Property<int>("ConsecutiveFailures")
|
||||
.HasColumnType("int");
|
||||
|
||||
b.Property<int>("CurrentBulkheadDepth")
|
||||
.HasColumnType("int");
|
||||
|
||||
b.Property<long>("CurrentFootprintBytes")
|
||||
.HasColumnType("bigint");
|
||||
|
||||
b.Property<DateTime?>("LastCircuitBreakerOpenUtc")
|
||||
.HasColumnType("datetime2(3)");
|
||||
|
||||
b.Property<DateTime?>("LastRecycleUtc")
|
||||
.HasColumnType("datetime2(3)");
|
||||
|
||||
b.Property<DateTime>("LastSampledUtc")
|
||||
.HasColumnType("datetime2(3)");
|
||||
|
||||
b.HasKey("DriverInstanceId", "HostName");
|
||||
|
||||
b.HasIndex("LastSampledUtc")
|
||||
.HasDatabaseName("IX_DriverResilience_LastSampled");
|
||||
|
||||
b.ToTable("DriverInstanceResilienceStatus", (string)null);
|
||||
});
|
||||
|
||||
modelBuilder.Entity("ZB.MOM.WW.OtOpcUa.Configuration.Entities.Equipment", b =>
|
||||
{
|
||||
b.Property<Guid>("EquipmentRowId")
|
||||
|
||||
@@ -28,6 +28,7 @@ public sealed class OtOpcUaConfigDbContext(DbContextOptions<OtOpcUaConfigDbConte
|
||||
public DbSet<ConfigAuditLog> ConfigAuditLogs => Set<ConfigAuditLog>();
|
||||
public DbSet<ExternalIdReservation> ExternalIdReservations => Set<ExternalIdReservation>();
|
||||
public DbSet<DriverHostStatus> DriverHostStatuses => Set<DriverHostStatus>();
|
||||
public DbSet<DriverInstanceResilienceStatus> DriverInstanceResilienceStatuses => Set<DriverInstanceResilienceStatus>();
|
||||
|
||||
protected override void OnModelCreating(ModelBuilder modelBuilder)
|
||||
{
|
||||
@@ -49,6 +50,7 @@ public sealed class OtOpcUaConfigDbContext(DbContextOptions<OtOpcUaConfigDbConte
|
||||
ConfigureConfigAuditLog(modelBuilder);
|
||||
ConfigureExternalIdReservation(modelBuilder);
|
||||
ConfigureDriverHostStatus(modelBuilder);
|
||||
ConfigureDriverInstanceResilienceStatus(modelBuilder);
|
||||
}
|
||||
|
||||
private static void ConfigureServerCluster(ModelBuilder modelBuilder)
|
||||
@@ -512,4 +514,21 @@ public sealed class OtOpcUaConfigDbContext(DbContextOptions<OtOpcUaConfigDbConte
|
||||
e.HasIndex(x => x.LastSeenUtc).HasDatabaseName("IX_DriverHostStatus_LastSeen");
|
||||
});
|
||||
}
|
||||
|
||||
private static void ConfigureDriverInstanceResilienceStatus(ModelBuilder modelBuilder)
|
||||
{
|
||||
modelBuilder.Entity<DriverInstanceResilienceStatus>(e =>
|
||||
{
|
||||
e.ToTable("DriverInstanceResilienceStatus");
|
||||
e.HasKey(x => new { x.DriverInstanceId, x.HostName });
|
||||
e.Property(x => x.DriverInstanceId).HasMaxLength(64);
|
||||
e.Property(x => x.HostName).HasMaxLength(256);
|
||||
e.Property(x => x.LastCircuitBreakerOpenUtc).HasColumnType("datetime2(3)");
|
||||
e.Property(x => x.LastRecycleUtc).HasColumnType("datetime2(3)");
|
||||
e.Property(x => x.LastSampledUtc).HasColumnType("datetime2(3)");
|
||||
// LastSampledUtc drives the Admin UI's stale-sample filter same way DriverHostStatus's
|
||||
// LastSeenUtc index does for connectivity rows.
|
||||
e.HasIndex(x => x.LastSampledUtc).HasDatabaseName("IX_DriverResilience_LastSampled");
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,104 @@
|
||||
using System.Collections.Concurrent;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Core.Resilience;
|
||||
|
||||
/// <summary>
|
||||
/// Process-singleton tracker of live resilience counters per
|
||||
/// <c>(DriverInstanceId, HostName)</c>. Populated by the CapabilityInvoker and the
|
||||
/// MemoryTracking layer; consumed by a HostedService that periodically persists a
|
||||
/// snapshot to the <c>DriverInstanceResilienceStatus</c> table for Admin <c>/hosts</c>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Per Phase 6.1 Stream E. No DB dependency here — the tracker is pure in-memory so
|
||||
/// tests can exercise it without EF Core or SQL Server. The HostedService that writes
|
||||
/// snapshots lives in the Server project (Stream E.2); the actual SignalR push + Blazor
|
||||
/// page refresh (E.3) lands in a follow-up visual-review PR.
|
||||
/// </remarks>
|
||||
public sealed class DriverResilienceStatusTracker
|
||||
{
|
||||
private readonly ConcurrentDictionary<StatusKey, ResilienceStatusSnapshot> _status = new();
|
||||
|
||||
/// <summary>Record a Polly pipeline failure for <paramref name="hostName"/>.</summary>
|
||||
public void RecordFailure(string driverInstanceId, string hostName, DateTime utcNow)
|
||||
{
|
||||
var key = new StatusKey(driverInstanceId, hostName);
|
||||
_status.AddOrUpdate(key,
|
||||
_ => new ResilienceStatusSnapshot { ConsecutiveFailures = 1, LastSampledUtc = utcNow },
|
||||
(_, existing) => existing with
|
||||
{
|
||||
ConsecutiveFailures = existing.ConsecutiveFailures + 1,
|
||||
LastSampledUtc = utcNow,
|
||||
});
|
||||
}
|
||||
|
||||
/// <summary>Reset the consecutive-failure count on a successful pipeline execution.</summary>
|
||||
public void RecordSuccess(string driverInstanceId, string hostName, DateTime utcNow)
|
||||
{
|
||||
var key = new StatusKey(driverInstanceId, hostName);
|
||||
_status.AddOrUpdate(key,
|
||||
_ => new ResilienceStatusSnapshot { ConsecutiveFailures = 0, LastSampledUtc = utcNow },
|
||||
(_, existing) => existing with
|
||||
{
|
||||
ConsecutiveFailures = 0,
|
||||
LastSampledUtc = utcNow,
|
||||
});
|
||||
}
|
||||
|
||||
/// <summary>Record a circuit-breaker open event.</summary>
|
||||
public void RecordBreakerOpen(string driverInstanceId, string hostName, DateTime utcNow)
|
||||
{
|
||||
var key = new StatusKey(driverInstanceId, hostName);
|
||||
_status.AddOrUpdate(key,
|
||||
_ => new ResilienceStatusSnapshot { LastBreakerOpenUtc = utcNow, LastSampledUtc = utcNow },
|
||||
(_, existing) => existing with { LastBreakerOpenUtc = utcNow, LastSampledUtc = utcNow });
|
||||
}
|
||||
|
||||
/// <summary>Record a process recycle event (Tier C only).</summary>
|
||||
public void RecordRecycle(string driverInstanceId, string hostName, DateTime utcNow)
|
||||
{
|
||||
var key = new StatusKey(driverInstanceId, hostName);
|
||||
_status.AddOrUpdate(key,
|
||||
_ => new ResilienceStatusSnapshot { LastRecycleUtc = utcNow, LastSampledUtc = utcNow },
|
||||
(_, existing) => existing with { LastRecycleUtc = utcNow, LastSampledUtc = utcNow });
|
||||
}
|
||||
|
||||
/// <summary>Capture / update the MemoryTracking-supplied baseline + current footprint.</summary>
|
||||
public void RecordFootprint(string driverInstanceId, string hostName, long baselineBytes, long currentBytes, DateTime utcNow)
|
||||
{
|
||||
var key = new StatusKey(driverInstanceId, hostName);
|
||||
_status.AddOrUpdate(key,
|
||||
_ => new ResilienceStatusSnapshot
|
||||
{
|
||||
BaselineFootprintBytes = baselineBytes,
|
||||
CurrentFootprintBytes = currentBytes,
|
||||
LastSampledUtc = utcNow,
|
||||
},
|
||||
(_, existing) => existing with
|
||||
{
|
||||
BaselineFootprintBytes = baselineBytes,
|
||||
CurrentFootprintBytes = currentBytes,
|
||||
LastSampledUtc = utcNow,
|
||||
});
|
||||
}
|
||||
|
||||
/// <summary>Snapshot of a specific (instance, host) pair; null if no counters recorded yet.</summary>
|
||||
public ResilienceStatusSnapshot? TryGet(string driverInstanceId, string hostName) =>
|
||||
_status.TryGetValue(new StatusKey(driverInstanceId, hostName), out var snapshot) ? snapshot : null;
|
||||
|
||||
/// <summary>Copy of every currently-tracked (instance, host, snapshot) triple. Safe under concurrent writes.</summary>
|
||||
public IReadOnlyList<(string DriverInstanceId, string HostName, ResilienceStatusSnapshot Snapshot)> Snapshot() =>
|
||||
_status.Select(kvp => (kvp.Key.DriverInstanceId, kvp.Key.HostName, kvp.Value)).ToList();
|
||||
|
||||
private readonly record struct StatusKey(string DriverInstanceId, string HostName);
|
||||
}
|
||||
|
||||
/// <summary>Snapshot of the resilience counters for one <c>(DriverInstanceId, HostName)</c> pair.</summary>
|
||||
public sealed record ResilienceStatusSnapshot
|
||||
{
|
||||
public int ConsecutiveFailures { get; init; }
|
||||
public DateTime? LastBreakerOpenUtc { get; init; }
|
||||
public DateTime? LastRecycleUtc { get; init; }
|
||||
public long BaselineFootprintBytes { get; init; }
|
||||
public long CurrentFootprintBytes { get; init; }
|
||||
public DateTime LastSampledUtc { get; init; }
|
||||
}
|
||||
@@ -29,6 +29,7 @@ public sealed class SchemaComplianceTests
|
||||
"DriverInstance", "Device", "Equipment", "Tag", "PollGroup",
|
||||
"NodeAcl", "ExternalIdReservation",
|
||||
"DriverHostStatus",
|
||||
"DriverInstanceResilienceStatus",
|
||||
};
|
||||
|
||||
var actual = QueryStrings(@"
|
||||
|
||||
@@ -0,0 +1,110 @@
|
||||
using Shouldly;
|
||||
using Xunit;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Resilience;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Core.Tests.Resilience;
|
||||
|
||||
[Trait("Category", "Unit")]
|
||||
public sealed class DriverResilienceStatusTrackerTests
|
||||
{
|
||||
private static readonly DateTime Now = new(2026, 4, 19, 12, 0, 0, DateTimeKind.Utc);
|
||||
|
||||
[Fact]
|
||||
public void TryGet_Returns_Null_Before_AnyWrite()
|
||||
{
|
||||
var tracker = new DriverResilienceStatusTracker();
|
||||
|
||||
tracker.TryGet("drv", "host").ShouldBeNull();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void RecordFailure_Accumulates_ConsecutiveFailures()
|
||||
{
|
||||
var tracker = new DriverResilienceStatusTracker();
|
||||
|
||||
tracker.RecordFailure("drv", "host", Now);
|
||||
tracker.RecordFailure("drv", "host", Now.AddSeconds(1));
|
||||
tracker.RecordFailure("drv", "host", Now.AddSeconds(2));
|
||||
|
||||
tracker.TryGet("drv", "host")!.ConsecutiveFailures.ShouldBe(3);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void RecordSuccess_Resets_ConsecutiveFailures()
|
||||
{
|
||||
var tracker = new DriverResilienceStatusTracker();
|
||||
tracker.RecordFailure("drv", "host", Now);
|
||||
tracker.RecordFailure("drv", "host", Now.AddSeconds(1));
|
||||
|
||||
tracker.RecordSuccess("drv", "host", Now.AddSeconds(2));
|
||||
|
||||
tracker.TryGet("drv", "host")!.ConsecutiveFailures.ShouldBe(0);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void RecordBreakerOpen_Populates_LastBreakerOpenUtc()
|
||||
{
|
||||
var tracker = new DriverResilienceStatusTracker();
|
||||
|
||||
tracker.RecordBreakerOpen("drv", "host", Now);
|
||||
|
||||
tracker.TryGet("drv", "host")!.LastBreakerOpenUtc.ShouldBe(Now);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void RecordRecycle_Populates_LastRecycleUtc()
|
||||
{
|
||||
var tracker = new DriverResilienceStatusTracker();
|
||||
|
||||
tracker.RecordRecycle("drv", "host", Now);
|
||||
|
||||
tracker.TryGet("drv", "host")!.LastRecycleUtc.ShouldBe(Now);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void RecordFootprint_CapturesBaselineAndCurrent()
|
||||
{
|
||||
var tracker = new DriverResilienceStatusTracker();
|
||||
|
||||
tracker.RecordFootprint("drv", "host", baselineBytes: 100_000_000, currentBytes: 150_000_000, Now);
|
||||
|
||||
var snap = tracker.TryGet("drv", "host")!;
|
||||
snap.BaselineFootprintBytes.ShouldBe(100_000_000);
|
||||
snap.CurrentFootprintBytes.ShouldBe(150_000_000);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void DifferentHosts_AreIndependent()
|
||||
{
|
||||
var tracker = new DriverResilienceStatusTracker();
|
||||
|
||||
tracker.RecordFailure("drv", "host-a", Now);
|
||||
tracker.RecordFailure("drv", "host-b", Now);
|
||||
tracker.RecordSuccess("drv", "host-a", Now.AddSeconds(1));
|
||||
|
||||
tracker.TryGet("drv", "host-a")!.ConsecutiveFailures.ShouldBe(0);
|
||||
tracker.TryGet("drv", "host-b")!.ConsecutiveFailures.ShouldBe(1);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Snapshot_ReturnsAll_TrackedPairs()
|
||||
{
|
||||
var tracker = new DriverResilienceStatusTracker();
|
||||
tracker.RecordFailure("drv-1", "host-a", Now);
|
||||
tracker.RecordFailure("drv-1", "host-b", Now);
|
||||
tracker.RecordFailure("drv-2", "host-a", Now);
|
||||
|
||||
var snapshot = tracker.Snapshot();
|
||||
|
||||
snapshot.Count.ShouldBe(3);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ConcurrentWrites_DoNotLose_Failures()
|
||||
{
|
||||
var tracker = new DriverResilienceStatusTracker();
|
||||
Parallel.For(0, 500, _ => tracker.RecordFailure("drv", "host", Now));
|
||||
|
||||
tracker.TryGet("drv", "host")!.ConsecutiveFailures.ShouldBe(500);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user