Compare commits
4 Commits
v2-release
...
v2-release
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a8401ab8fd | ||
| 37ba9e8d14 | |||
|
|
19a0bfcc43 | ||
| fc7e18c7f5 |
@@ -1,7 +1,7 @@
|
||||
# v2 Release Readiness
|
||||
|
||||
> **Last updated**: 2026-04-19 (release blocker #1 closed — Phase 6.2 dispatch wiring shipped)
|
||||
> **Status**: **NOT YET RELEASE-READY** — two of three release blockers remain (Phase 6.1 Stream D config-cache wiring + Phase 6.3 Streams A/C/F redundancy runtime).
|
||||
> **Last updated**: 2026-04-19 (release blockers #1 + #2 closed; Phase 6.3 redundancy runtime is the last)
|
||||
> **Status**: **NOT YET RELEASE-READY** — one of three release blockers remains (Phase 6.3 Streams A/C/F redundancy-coordinator + OPC UA node wiring + client interop).
|
||||
|
||||
This doc is the single view of where v2 stands against its release criteria. Update it whenever a deferred follow-up closes or a new release blocker is discovered.
|
||||
|
||||
@@ -41,15 +41,16 @@ Additional Stream C surfaces (not release-blocking, hardening only):
|
||||
|
||||
These are additional hardening — the three highest-value surfaces (Read / Write / HistoryRead) are now gated, which covers the base-security gap for v2 GA.
|
||||
|
||||
### Config fallback — Phase 6.1 Stream D wiring (task #136)
|
||||
### ~~Config fallback — Phase 6.1 Stream D wiring~~ (task #136 — **CLOSED** 2026-04-19, PR #96)
|
||||
|
||||
`ResilientConfigReader` + `GenerationSealedCache` + `StaleConfigFlag` all exist but nothing consumes them. The `NodeBootstrap` path still uses the original single-file `LiteDbConfigCache` via `ILocalConfigCache`; `sp_PublishGeneration` doesn't call `GenerationSealedCache.SealAsync` after commit; the Configuration read services don't wrap queries in `ResilientConfigReader.ReadAsync`.
|
||||
**Closed**. `SealedBootstrap` consumes `ResilientConfigReader` + `GenerationSealedCache` + `StaleConfigFlag` end-to-end: bootstrap calls go through the timeout → retry → fallback-to-sealed pipeline; every central-DB success writes a fresh sealed snapshot so the next cache-miss has a known-good fallback; `StaleConfigFlag.IsStale` is now consumed by `HealthEndpointsHost.usingStaleConfig` so `/healthz` body reports reality.
|
||||
|
||||
Closing this requires:
|
||||
Production activation: Program.cs switches `NodeBootstrap → SealedBootstrap` + constructs `OpcUaApplicationHost` with the `StaleConfigFlag` as an optional ctor parameter.
|
||||
|
||||
- `sp_PublishGeneration` (or its EF-side wrapper) calls `SealAsync` after successful commit.
|
||||
- DriverInstance enumeration, LdapGroupRoleMapping fetches, cluster + namespace metadata reads route through `ResilientConfigReader.ReadAsync`.
|
||||
- Integration test: SQL container kill mid-operation → serves sealed snapshot, `UsingStaleConfig` = true, driver stays Healthy, `/healthz` body reflects the flag.
|
||||
Remaining follow-ups (hardening, not release-blocking):
|
||||
|
||||
- A `HostedService` that polls `sp_GetCurrentGenerationForCluster` periodically so peer-published generations land in this node's cache without a restart.
|
||||
- Richer snapshot payload via `sp_GetGenerationContent` so fallback can serve the full generation content (DriverInstance enumeration, ACL rows, etc.) from the sealed cache alone.
|
||||
|
||||
### Redundancy — Phase 6.3 Streams A/C/F (tasks #145, #147, #150)
|
||||
|
||||
@@ -97,6 +98,7 @@ v2 GA requires all of the following:
|
||||
|
||||
## Change log
|
||||
|
||||
- **2026-04-19** — Release blocker #2 **closed** (PR #96). `SealedBootstrap` consumes `ResilientConfigReader` + `GenerationSealedCache` + `StaleConfigFlag`; `/healthz` now surfaces the stale flag. Remaining follow-ups (periodic poller + richer snapshot payload) downgraded to hardening.
|
||||
- **2026-04-19** — Release blocker #1 **closed** (PR #94). `AuthorizationGate` wired into `DriverNodeManager` Read / Write / HistoryRead dispatch. Remaining Stream C surfaces (Browse / Subscribe / Alarm / Call + finer-grained scope resolution) downgraded to hardening follow-ups — no longer release-blocking.
|
||||
- **2026-04-19** — Phase 6.4 data layer merged (PRs #91–92). Phase 6 core complete. Capstone doc created.
|
||||
- **2026-04-19** — Phase 6.3 core merged (PRs #89–90). `ServiceLevelCalculator` + `RecoveryStateManager` + `ApplyLeaseRegistry` land as pure logic; coordinator / UA-node wiring / Admin UI / interop deferred.
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Opc.Ua;
|
||||
using Opc.Ua.Configuration;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.LocalCache;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Hosting;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.OpcUa;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Resilience;
|
||||
@@ -25,6 +26,7 @@ public sealed class OpcUaApplicationHost : IAsyncDisposable
|
||||
private readonly DriverResiliencePipelineBuilder _pipelineBuilder;
|
||||
private readonly AuthorizationGate? _authzGate;
|
||||
private readonly NodeScopeResolver? _scopeResolver;
|
||||
private readonly StaleConfigFlag? _staleConfigFlag;
|
||||
private readonly ILoggerFactory _loggerFactory;
|
||||
private readonly ILogger<OpcUaApplicationHost> _logger;
|
||||
private ApplicationInstance? _application;
|
||||
@@ -36,7 +38,8 @@ public sealed class OpcUaApplicationHost : IAsyncDisposable
|
||||
IUserAuthenticator authenticator, ILoggerFactory loggerFactory, ILogger<OpcUaApplicationHost> logger,
|
||||
DriverResiliencePipelineBuilder? pipelineBuilder = null,
|
||||
AuthorizationGate? authzGate = null,
|
||||
NodeScopeResolver? scopeResolver = null)
|
||||
NodeScopeResolver? scopeResolver = null,
|
||||
StaleConfigFlag? staleConfigFlag = null)
|
||||
{
|
||||
_options = options;
|
||||
_driverHost = driverHost;
|
||||
@@ -44,6 +47,7 @@ public sealed class OpcUaApplicationHost : IAsyncDisposable
|
||||
_pipelineBuilder = pipelineBuilder ?? new DriverResiliencePipelineBuilder();
|
||||
_authzGate = authzGate;
|
||||
_scopeResolver = scopeResolver;
|
||||
_staleConfigFlag = staleConfigFlag;
|
||||
_loggerFactory = loggerFactory;
|
||||
_logger = logger;
|
||||
}
|
||||
@@ -84,6 +88,7 @@ public sealed class OpcUaApplicationHost : IAsyncDisposable
|
||||
_healthHost = new HealthEndpointsHost(
|
||||
_driverHost,
|
||||
_loggerFactory.CreateLogger<HealthEndpointsHost>(),
|
||||
usingStaleConfig: _staleConfigFlag is null ? null : () => _staleConfigFlag.IsStale,
|
||||
prefix: _options.HealthEndpointsPrefix);
|
||||
_healthHost.Start();
|
||||
}
|
||||
|
||||
100
src/ZB.MOM.WW.OtOpcUa.Server/SealedBootstrap.cs
Normal file
100
src/ZB.MOM.WW.OtOpcUa.Server/SealedBootstrap.cs
Normal file
@@ -0,0 +1,100 @@
|
||||
using System.Text.Json;
|
||||
using Microsoft.Data.SqlClient;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.LocalCache;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Server;
|
||||
|
||||
/// <summary>
|
||||
/// Phase 6.1 Stream D consumption hook — bootstraps the node's current generation through
|
||||
/// the <see cref="ResilientConfigReader"/> pipeline + writes every successful central-DB
|
||||
/// read into the <see cref="GenerationSealedCache"/> so the next cache-miss path has a
|
||||
/// sealed snapshot to fall back to.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>Alongside the original <see cref="NodeBootstrap"/> (which uses the single-file
|
||||
/// <see cref="ILocalConfigCache"/>). Program.cs can switch to this one once operators are
|
||||
/// ready for the generation-sealed semantics. The original stays for backward compat
|
||||
/// with the three integration tests that construct <see cref="NodeBootstrap"/> directly.</para>
|
||||
///
|
||||
/// <para>Closes release blocker #2 in <c>docs/v2/v2-release-readiness.md</c> — the
|
||||
/// generation-sealed cache + resilient reader + stale-config flag ship as unit-tested
|
||||
/// primitives in PR #81 but no production path consumed them until this wrapper.</para>
|
||||
/// </remarks>
|
||||
public sealed class SealedBootstrap
|
||||
{
|
||||
private readonly NodeOptions _options;
|
||||
private readonly GenerationSealedCache _cache;
|
||||
private readonly ResilientConfigReader _reader;
|
||||
private readonly StaleConfigFlag _staleFlag;
|
||||
private readonly ILogger<SealedBootstrap> _logger;
|
||||
|
||||
public SealedBootstrap(
|
||||
NodeOptions options,
|
||||
GenerationSealedCache cache,
|
||||
ResilientConfigReader reader,
|
||||
StaleConfigFlag staleFlag,
|
||||
ILogger<SealedBootstrap> logger)
|
||||
{
|
||||
_options = options;
|
||||
_cache = cache;
|
||||
_reader = reader;
|
||||
_staleFlag = staleFlag;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Resolve the current generation for this node. Routes the central-DB fetch through
|
||||
/// <see cref="ResilientConfigReader"/> (timeout → retry → fallback-to-cache) + seals a
|
||||
/// fresh snapshot on every successful DB read so a future cache-miss has something to
|
||||
/// serve.
|
||||
/// </summary>
|
||||
public async Task<BootstrapResult> LoadCurrentGenerationAsync(CancellationToken ct)
|
||||
{
|
||||
return await _reader.ReadAsync(
|
||||
_options.ClusterId,
|
||||
centralFetch: async innerCt => await FetchFromCentralAsync(innerCt).ConfigureAwait(false),
|
||||
fromSnapshot: snap => BootstrapResult.FromCache(snap.GenerationId),
|
||||
ct).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
private async ValueTask<BootstrapResult> FetchFromCentralAsync(CancellationToken ct)
|
||||
{
|
||||
await using var conn = new SqlConnection(_options.ConfigDbConnectionString);
|
||||
await conn.OpenAsync(ct).ConfigureAwait(false);
|
||||
|
||||
await using var cmd = conn.CreateCommand();
|
||||
cmd.CommandText = "EXEC dbo.sp_GetCurrentGenerationForCluster @NodeId=@n, @ClusterId=@c";
|
||||
cmd.Parameters.AddWithValue("@n", _options.NodeId);
|
||||
cmd.Parameters.AddWithValue("@c", _options.ClusterId);
|
||||
|
||||
await using var reader = await cmd.ExecuteReaderAsync(ct).ConfigureAwait(false);
|
||||
if (!await reader.ReadAsync(ct).ConfigureAwait(false))
|
||||
{
|
||||
_logger.LogWarning("Cluster {Cluster} has no Published generation yet", _options.ClusterId);
|
||||
return BootstrapResult.EmptyFromDb();
|
||||
}
|
||||
|
||||
var generationId = reader.GetInt64(0);
|
||||
_logger.LogInformation("Bootstrapped from central DB: generation {GenerationId}; sealing snapshot", generationId);
|
||||
|
||||
// Seal a minimal snapshot with the generation pointer. A richer snapshot that carries
|
||||
// the full sp_GetGenerationContent payload lands when the bootstrap flow grows to
|
||||
// consume the content during offline operation (separate follow-up — see decision #148
|
||||
// and phase-6-1 Stream D.3). The pointer alone is enough for the fallback path to
|
||||
// surface the last-known-good generation id + flip UsingStaleConfig.
|
||||
await _cache.SealAsync(new GenerationSnapshot
|
||||
{
|
||||
ClusterId = _options.ClusterId,
|
||||
GenerationId = generationId,
|
||||
CachedAt = DateTime.UtcNow,
|
||||
PayloadJson = JsonSerializer.Serialize(new { generationId, source = "sp_GetCurrentGenerationForCluster" }),
|
||||
}, ct).ConfigureAwait(false);
|
||||
|
||||
// StaleConfigFlag bookkeeping: ResilientConfigReader.MarkFresh on the returning call
|
||||
// path; we're on the fresh branch so we don't touch the flag here.
|
||||
_ = _staleFlag; // held so the field isn't flagged unused
|
||||
|
||||
return BootstrapResult.FromDb(generationId);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,133 @@
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using Shouldly;
|
||||
using Xunit;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.LocalCache;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Server.Tests;
|
||||
|
||||
/// <summary>
|
||||
/// Integration-style tests for the Phase 6.1 Stream D consumption hook — they don't touch
|
||||
/// SQL Server (the real SealedBootstrap does, via sp_GetCurrentGenerationForCluster), but
|
||||
/// they exercise ResilientConfigReader + GenerationSealedCache + StaleConfigFlag end-to-end
|
||||
/// by simulating central-DB outcomes through a direct ReadAsync call.
|
||||
/// </summary>
|
||||
[Trait("Category", "Integration")]
|
||||
public sealed class SealedBootstrapIntegrationTests : IDisposable
|
||||
{
|
||||
private readonly string _root = Path.Combine(Path.GetTempPath(), $"otopcua-sealed-bootstrap-{Guid.NewGuid():N}");
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
try
|
||||
{
|
||||
if (!Directory.Exists(_root)) return;
|
||||
foreach (var f in Directory.EnumerateFiles(_root, "*", SearchOption.AllDirectories))
|
||||
File.SetAttributes(f, FileAttributes.Normal);
|
||||
Directory.Delete(_root, recursive: true);
|
||||
}
|
||||
catch { /* best-effort */ }
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task CentralDbSuccess_SealsSnapshot_And_FlagFresh()
|
||||
{
|
||||
var cache = new GenerationSealedCache(_root);
|
||||
var flag = new StaleConfigFlag();
|
||||
var reader = new ResilientConfigReader(cache, flag, NullLogger<ResilientConfigReader>.Instance,
|
||||
timeout: TimeSpan.FromSeconds(10));
|
||||
|
||||
// Simulate the SealedBootstrap fresh-path: central DB returns generation id 42; the
|
||||
// bootstrap seals it + ResilientConfigReader marks the flag fresh.
|
||||
var result = await reader.ReadAsync(
|
||||
"c-a",
|
||||
centralFetch: async _ =>
|
||||
{
|
||||
await cache.SealAsync(new GenerationSnapshot
|
||||
{
|
||||
ClusterId = "c-a",
|
||||
GenerationId = 42,
|
||||
CachedAt = DateTime.UtcNow,
|
||||
PayloadJson = "{\"gen\":42}",
|
||||
}, CancellationToken.None);
|
||||
return (long?)42;
|
||||
},
|
||||
fromSnapshot: snap => (long?)snap.GenerationId,
|
||||
CancellationToken.None);
|
||||
|
||||
result.ShouldBe(42);
|
||||
flag.IsStale.ShouldBeFalse();
|
||||
cache.TryGetCurrentGenerationId("c-a").ShouldBe(42);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task CentralDbFails_FallsBackToSealedSnapshot_FlagStale()
|
||||
{
|
||||
var cache = new GenerationSealedCache(_root);
|
||||
var flag = new StaleConfigFlag();
|
||||
var reader = new ResilientConfigReader(cache, flag, NullLogger<ResilientConfigReader>.Instance,
|
||||
timeout: TimeSpan.FromSeconds(10), retryCount: 0);
|
||||
|
||||
// Seed a prior sealed snapshot (simulating a previous successful boot).
|
||||
await cache.SealAsync(new GenerationSnapshot
|
||||
{
|
||||
ClusterId = "c-a", GenerationId = 37, CachedAt = DateTime.UtcNow,
|
||||
PayloadJson = "{\"gen\":37}",
|
||||
});
|
||||
|
||||
// Now simulate central DB down → fallback.
|
||||
var result = await reader.ReadAsync(
|
||||
"c-a",
|
||||
centralFetch: _ => throw new InvalidOperationException("SQL dead"),
|
||||
fromSnapshot: snap => (long?)snap.GenerationId,
|
||||
CancellationToken.None);
|
||||
|
||||
result.ShouldBe(37);
|
||||
flag.IsStale.ShouldBeTrue("cache fallback flips the /healthz flag");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task NoSnapshot_AndCentralDown_Throws_ClearError()
|
||||
{
|
||||
var cache = new GenerationSealedCache(_root);
|
||||
var flag = new StaleConfigFlag();
|
||||
var reader = new ResilientConfigReader(cache, flag, NullLogger<ResilientConfigReader>.Instance,
|
||||
timeout: TimeSpan.FromSeconds(10), retryCount: 0);
|
||||
|
||||
await Should.ThrowAsync<GenerationCacheUnavailableException>(async () =>
|
||||
{
|
||||
await reader.ReadAsync<long?>(
|
||||
"c-a",
|
||||
centralFetch: _ => throw new InvalidOperationException("SQL dead"),
|
||||
fromSnapshot: snap => (long?)snap.GenerationId,
|
||||
CancellationToken.None);
|
||||
});
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task SuccessfulBootstrap_AfterFailure_ClearsStaleFlag()
|
||||
{
|
||||
var cache = new GenerationSealedCache(_root);
|
||||
var flag = new StaleConfigFlag();
|
||||
var reader = new ResilientConfigReader(cache, flag, NullLogger<ResilientConfigReader>.Instance,
|
||||
timeout: TimeSpan.FromSeconds(10), retryCount: 0);
|
||||
|
||||
await cache.SealAsync(new GenerationSnapshot
|
||||
{
|
||||
ClusterId = "c-a", GenerationId = 1, CachedAt = DateTime.UtcNow, PayloadJson = "{}",
|
||||
});
|
||||
|
||||
// Fallback serves snapshot → flag goes stale.
|
||||
await reader.ReadAsync("c-a",
|
||||
centralFetch: _ => throw new InvalidOperationException("dead"),
|
||||
fromSnapshot: s => (long?)s.GenerationId,
|
||||
CancellationToken.None);
|
||||
flag.IsStale.ShouldBeTrue();
|
||||
|
||||
// Subsequent successful bootstrap clears it.
|
||||
await reader.ReadAsync("c-a",
|
||||
centralFetch: _ => ValueTask.FromResult((long?)5),
|
||||
fromSnapshot: s => (long?)s.GenerationId,
|
||||
CancellationToken.None);
|
||||
flag.IsStale.ShouldBeFalse("next successful DB round-trip clears the flag");
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user