diff --git a/src/Server/ZB.MOM.WW.OtOpcUa.Admin/Program.cs b/src/Server/ZB.MOM.WW.OtOpcUa.Admin/Program.cs index f159693..5857924 100644 --- a/src/Server/ZB.MOM.WW.OtOpcUa.Admin/Program.cs +++ b/src/Server/ZB.MOM.WW.OtOpcUa.Admin/Program.cs @@ -65,8 +65,17 @@ builder.Services.AddScoped(); builder.Services.AddScoped(); builder.Services.AddSingleton(); builder.Services.AddScoped(); -builder.Services.AddScoped(); +// EF-backed inner service registered under the keyed-service key so the resilient +// singleton decorator resolves it per-scope without a captive-dependency issue. +builder.Services.AddKeyedScoped( + ZB.MOM.WW.OtOpcUa.Admin.Security.ResilientLdapGroupRoleMappingService.InnerServiceKey); +// Resilient singleton decorator: timeout 2 s → retry 3× jittered → fallback to in-memory snapshot. +// Uses IServiceScopeFactory to open a short-lived scope for each DB call. +// The static LdapOptions.GroupToRole bootstrap dictionary in AdminRoleGrantResolver is the +// lock-out-proof floor; this decorator only guards the DB-backed augmentation rows. +builder.Services.AddSingleton(); // Phase 7 Stream F — scripting + virtual tag + scripted alarm draft services, test // harness, and historian diagnostics. The historian sink is the Null variant here — diff --git a/src/Server/ZB.MOM.WW.OtOpcUa.Admin/Security/ResilientLdapGroupRoleMappingService.cs b/src/Server/ZB.MOM.WW.OtOpcUa.Admin/Security/ResilientLdapGroupRoleMappingService.cs new file mode 100644 index 0000000..eb7b144 --- /dev/null +++ b/src/Server/ZB.MOM.WW.OtOpcUa.Admin/Security/ResilientLdapGroupRoleMappingService.cs @@ -0,0 +1,171 @@ +using System.Collections.Concurrent; +using Microsoft.Extensions.Logging; +using Polly; +using Polly.Retry; +using Polly.Timeout; +using ZB.MOM.WW.OtOpcUa.Configuration.Entities; +using ZB.MOM.WW.OtOpcUa.Configuration.Services; + +namespace ZB.MOM.WW.OtOpcUa.Admin.Security; + +/// +/// Resilience decorator for that wraps the +/// hot-path call in the Phase 6.1-style pipeline: +/// timeout 2 s → retry 3× jittered → fallback to in-memory sealed snapshot. +/// +/// +/// Registered as a singleton so the in-memory snapshot survives across sign-in +/// requests. The inner is resolved via the +/// keyed-service key "inner", allowing the EF-backed scoped service to be +/// registered as the "inner" implementation while this singleton decorator is the primary +/// binding. +/// +/// Because the inner service is scoped (it owns an EF DbContext), this +/// singleton uses to open a short-lived scope for +/// each DB call. The scope is disposed immediately after the call completes. +/// +/// On each successful the result is stored in a +/// keyed by the canonicalised group set. On +/// any failure (DB unreachable, SQL exception, timeout) after all retries, the cached +/// result for that exact group set is returned. When no prior success exists for the group +/// set, an empty list is returned — the static +/// bootstrap dictionary in is the lock-out-proof +/// floor that remains functional regardless of DB state. +/// +/// Write methods (, ) and +/// are passed through unchanged — the resilience layer is +/// read-path only, consistent with the Phase 6.1 design decision that writes must fail +/// hard on DB outage rather than landing against a stale state. +/// +public sealed class ResilientLdapGroupRoleMappingService : ILdapGroupRoleMappingService +{ + /// + /// DI keyed-service key used to register the inner (EF-backed) implementation so the + /// decorator can resolve it without creating a circular dependency on itself. + /// + public const string InnerServiceKey = "LdapGroupRoleMappingService.Inner"; + + private readonly IServiceScopeFactory _scopeFactory; + private readonly ResiliencePipeline _pipeline; + private readonly ILogger _logger; + + // Keyed by the normalised group set (NUL-separated sorted group names, lower-case). + private readonly ConcurrentDictionary> _snapshot = + new(StringComparer.Ordinal); + + public ResilientLdapGroupRoleMappingService( + IServiceScopeFactory scopeFactory, + ILogger logger, + TimeSpan? timeout = null, + int retryCount = 3) + { + _scopeFactory = scopeFactory; + _logger = logger; + + var builder = new ResiliencePipelineBuilder() + .AddTimeout(new TimeoutStrategyOptions + { + Timeout = timeout ?? TimeSpan.FromSeconds(2), + }); + + if (retryCount > 0) + { + builder.AddRetry(new RetryStrategyOptions + { + MaxRetryAttempts = retryCount, + BackoffType = DelayBackoffType.Exponential, + UseJitter = true, + Delay = TimeSpan.FromMilliseconds(100), + MaxDelay = TimeSpan.FromSeconds(1), + ShouldHandle = new PredicateBuilder().Handle( + ex => ex is not OperationCanceledException), + }); + } + + _pipeline = builder.Build(); + } + + /// + /// + /// Executed through the timeout → retry pipeline. On full failure the last snapshot + /// for this group set (if any) is returned; otherwise an empty list. The static + /// appsettings.json bootstrap dictionary in + /// remains the ultimate fallback — a DB outage never causes a total login denial. + /// + public async Task> GetByGroupsAsync( + IEnumerable ldapGroups, CancellationToken cancellationToken) + { + ArgumentNullException.ThrowIfNull(ldapGroups); + + var groupList = ldapGroups.ToList(); + if (groupList.Count == 0) return []; + + var cacheKey = CacheKey(groupList); + + try + { + var result = await _pipeline.ExecuteAsync(async ct => + { + await using var scope = _scopeFactory.CreateAsyncScope(); + var inner = (ILdapGroupRoleMappingService)scope.ServiceProvider + .GetRequiredKeyedService(InnerServiceKey); + return await inner.GetByGroupsAsync(groupList, ct).ConfigureAwait(false); + }, cancellationToken).ConfigureAwait(false); + + // Seal the snapshot so a subsequent DB outage can fall back to it. + _snapshot[cacheKey] = result; + return result; + } + catch (Exception ex) when (ex is not OperationCanceledException) + { + _logger.LogWarning(ex, + "LDAP role-mapping DB read failed after retries; falling back to snapshot for group set [{Groups}]", + string.Join(", ", groupList)); + + return _snapshot.TryGetValue(cacheKey, out var cached) + ? cached + : []; + } + } + + /// + /// Pass-through — not covered by the resilience pipeline (Admin UI listing only). + public async Task> ListAllAsync(CancellationToken cancellationToken) + { + await using var scope = _scopeFactory.CreateAsyncScope(); + var inner = (ILdapGroupRoleMappingService)scope.ServiceProvider + .GetRequiredKeyedService(InnerServiceKey); + return await inner.ListAllAsync(cancellationToken).ConfigureAwait(false); + } + + /// + /// Pass-through — writes must fail hard on DB outage per Phase 6.1 design decision. + public async Task CreateAsync(LdapGroupRoleMapping row, CancellationToken cancellationToken) + { + await using var scope = _scopeFactory.CreateAsyncScope(); + var inner = (ILdapGroupRoleMappingService)scope.ServiceProvider + .GetRequiredKeyedService(InnerServiceKey); + return await inner.CreateAsync(row, cancellationToken).ConfigureAwait(false); + } + + /// + /// Pass-through — writes must fail hard on DB outage per Phase 6.1 design decision. + public async Task DeleteAsync(Guid id, CancellationToken cancellationToken) + { + await using var scope = _scopeFactory.CreateAsyncScope(); + var inner = (ILdapGroupRoleMappingService)scope.ServiceProvider + .GetRequiredKeyedService(InnerServiceKey); + await inner.DeleteAsync(id, cancellationToken).ConfigureAwait(false); + } + + // ── helpers ──────────────────────────────────────────────────────────────────────────────── + + /// + /// Canonicalise a group set into a stable cache key: sort, lower-case, join with NUL. + /// Two calls with the same groups in different orders produce the same key. + /// + internal static string CacheKey(IEnumerable groups) + => string.Join('\0', groups + .Select(g => g.ToLowerInvariant()) + .Order(StringComparer.Ordinal)); +} diff --git a/tests/Server/ZB.MOM.WW.OtOpcUa.Admin.Tests/ResilientLdapGroupRoleMappingServiceTests.cs b/tests/Server/ZB.MOM.WW.OtOpcUa.Admin.Tests/ResilientLdapGroupRoleMappingServiceTests.cs new file mode 100644 index 0000000..70c14e3 --- /dev/null +++ b/tests/Server/ZB.MOM.WW.OtOpcUa.Admin.Tests/ResilientLdapGroupRoleMappingServiceTests.cs @@ -0,0 +1,278 @@ +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Logging.Abstractions; +using Shouldly; +using Xunit; +using ZB.MOM.WW.OtOpcUa.Admin.Security; +using ZB.MOM.WW.OtOpcUa.Configuration.Entities; +using ZB.MOM.WW.OtOpcUa.Configuration.Enums; +using ZB.MOM.WW.OtOpcUa.Configuration.Services; + +namespace ZB.MOM.WW.OtOpcUa.Admin.Tests; + +/// +/// Unit tests for — the Phase 6.2 +/// Stream A.2 resilience decorator (timeout → retry → in-memory-snapshot fallback) +/// that guards against a transient Config DB outage. +/// +[Trait("Category", "Unit")] +public sealed class ResilientLdapGroupRoleMappingServiceTests +{ + // ── fake inner service ──────────────────────────────────────────────────────────────────── + + /// + /// Configurable in-memory . Throws on demand + /// so we can exercise the resilience path without a real DB. + /// + private sealed class FakeInner : ILdapGroupRoleMappingService + { + private readonly IReadOnlyList _rows; + public bool ThrowOnRead { get; set; } + public int ReadAttempts { get; private set; } + + public FakeInner(IReadOnlyList? rows = null) + => _rows = rows ?? []; + + public Task> GetByGroupsAsync( + IEnumerable ldapGroups, CancellationToken cancellationToken) + { + ReadAttempts++; + if (ThrowOnRead) throw new InvalidOperationException("DB unavailable (test)"); + var set = ldapGroups.ToHashSet(StringComparer.OrdinalIgnoreCase); + return Task.FromResult>( + _rows.Where(r => set.Contains(r.LdapGroup)).ToList()); + } + + public Task> ListAllAsync(CancellationToken cancellationToken) + => Task.FromResult(_rows); + + public Task CreateAsync(LdapGroupRoleMapping row, CancellationToken cancellationToken) + => Task.FromResult(row); + + public Task DeleteAsync(Guid id, CancellationToken cancellationToken) + => Task.CompletedTask; + } + + // ── factory helper ──────────────────────────────────────────────────────────────────────── + + /// + /// Build a backed by a real + /// that registers under the + /// keyed-service key . + /// + private static ResilientLdapGroupRoleMappingService Build( + FakeInner inner, + TimeSpan? timeout = null, + int retryCount = 0) + { + var services = new ServiceCollection(); + services.AddKeyedSingleton( + ResilientLdapGroupRoleMappingService.InnerServiceKey, inner); + + var provider = services.BuildServiceProvider(); + return new ResilientLdapGroupRoleMappingService( + provider.GetRequiredService(), + NullLogger.Instance, + timeout ?? TimeSpan.FromSeconds(10), + retryCount); + } + + // ── tests — resilience pipeline ─────────────────────────────────────────────────────────── + + [Fact] + public async Task DbSuccess_returns_result_and_seals_snapshot() + { + var row = Row("cn=ops", AdminRole.FleetAdmin); + var fake = new FakeInner([row]); + var svc = Build(fake); + + var result = await svc.GetByGroupsAsync(["cn=ops"], CancellationToken.None); + + result.Count.ShouldBe(1); + result[0].LdapGroup.ShouldBe("cn=ops"); + fake.ReadAttempts.ShouldBe(1); + } + + [Fact] + public async Task DbFailure_with_snapshot_returns_cached_result() + { + var row = Row("cn=ops", AdminRole.FleetAdmin); + var fake = new FakeInner([row]); + var svc = Build(fake, retryCount: 0); + + // First call succeeds — populates the snapshot. + await svc.GetByGroupsAsync(["cn=ops"], CancellationToken.None); + + // Now break the DB. + fake.ThrowOnRead = true; + + var fallback = await svc.GetByGroupsAsync(["cn=ops"], CancellationToken.None); + + fallback.Count.ShouldBe(1); + fallback[0].LdapGroup.ShouldBe("cn=ops"); + } + + [Fact] + public async Task DbFailure_without_snapshot_returns_empty_list() + { + var fake = new FakeInner([Row("cn=ops", AdminRole.FleetAdmin)]) { ThrowOnRead = true }; + var svc = Build(fake, retryCount: 0); + + var result = await svc.GetByGroupsAsync(["cn=ops"], CancellationToken.None); + + // Empty list — the static LdapOptions.GroupToRole bootstrap in AdminRoleGrantResolver + // is the lock-out-proof floor; no DB rows means only static dict grants fire. + result.ShouldBeEmpty(); + } + + [Fact] + public async Task DbFailure_retries_before_fallback() + { + var fake = new FakeInner([Row("cn=ops", AdminRole.FleetAdmin)]) { ThrowOnRead = true }; + // retryCount=2: 1 initial + 2 retries = 3 attempts total before falling back. + var svc = Build(fake, timeout: TimeSpan.FromSeconds(30), retryCount: 2); + + var result = await svc.GetByGroupsAsync(["cn=ops"], CancellationToken.None); + + fake.ReadAttempts.ShouldBe(3, "1 initial + 2 retries before snapshot fallback"); + result.ShouldBeEmpty("no prior snapshot — empty fallback, not a throw"); + } + + [Fact] + public async Task Empty_groups_bypasses_pipeline_and_returns_empty() + { + var fake = new FakeInner([Row("cn=ops", AdminRole.FleetAdmin)]); + var svc = Build(fake); + + var result = await svc.GetByGroupsAsync([], CancellationToken.None); + + result.ShouldBeEmpty(); + fake.ReadAttempts.ShouldBe(0, "pipeline must not fire for empty group list"); + } + + [Fact] + public async Task Cancellation_propagates_without_fallback() + { + var fake = new FakeInner([Row("cn=ops", AdminRole.FleetAdmin)]); + var svc = Build(fake, retryCount: 0); + using var cts = new CancellationTokenSource(); + cts.Cancel(); + + await Should.ThrowAsync( + () => svc.GetByGroupsAsync(["cn=ops"], cts.Token)); + } + + // ── tests — snapshot key semantics ──────────────────────────────────────────────────────── + + [Fact] + public async Task Snapshot_is_keyed_by_group_set_regardless_of_order() + { + var row1 = Row("cn=a", AdminRole.FleetAdmin); + var row2 = Row("cn=b", AdminRole.ConfigEditor); + var fake = new FakeInner([row1, row2]); + var svc = Build(fake, retryCount: 0); + + // Seed the snapshot with [b, a] order. + await svc.GetByGroupsAsync(["cn=b", "cn=a"], CancellationToken.None); + fake.ThrowOnRead = true; + + // Request with [a, b] order — same canonical key → fallback snapshot available. + var fallback = await svc.GetByGroupsAsync(["cn=a", "cn=b"], CancellationToken.None); + fallback.Count.ShouldBe(2); + } + + [Fact] + public async Task Different_group_sets_have_independent_snapshots() + { + var row1 = Row("cn=ops", AdminRole.FleetAdmin); + var row2 = Row("cn=viewer", AdminRole.ConfigViewer); + var fake = new FakeInner([row1, row2]); + var svc = Build(fake, retryCount: 0); + + // Seed snapshot for cn=ops only. + await svc.GetByGroupsAsync(["cn=ops"], CancellationToken.None); + fake.ThrowOnRead = true; + + // cn=viewer never had a successful call → no snapshot → empty fallback. + var fallback = await svc.GetByGroupsAsync(["cn=viewer"], CancellationToken.None); + fallback.ShouldBeEmpty(); + } + + // ── tests — CacheKey helper ─────────────────────────────────────────────────────────────── + + [Fact] + public void CacheKey_is_order_independent() + { + var key1 = ResilientLdapGroupRoleMappingService.CacheKey(["cn=a", "cn=b", "cn=c"]); + var key2 = ResilientLdapGroupRoleMappingService.CacheKey(["cn=c", "cn=a", "cn=b"]); + key1.ShouldBe(key2); + } + + [Fact] + public void CacheKey_is_case_insensitive() + { + var key1 = ResilientLdapGroupRoleMappingService.CacheKey(["CN=Ops"]); + var key2 = ResilientLdapGroupRoleMappingService.CacheKey(["cn=ops"]); + key1.ShouldBe(key2); + } + + [Fact] + public void CacheKey_distinguishes_different_sets() + { + var key1 = ResilientLdapGroupRoleMappingService.CacheKey(["cn=a"]); + var key2 = ResilientLdapGroupRoleMappingService.CacheKey(["cn=b"]); + key1.ShouldNotBe(key2); + } + + [Fact] + public void CacheKey_single_group_roundtrips() + { + var key = ResilientLdapGroupRoleMappingService.CacheKey(["cn=fleet-admin"]); + key.ShouldBe("cn=fleet-admin"); + } + + // ── pass-through methods ────────────────────────────────────────────────────────────────── + + [Fact] + public async Task ListAllAsync_passes_through_to_inner() + { + var row = Row("cn=ops", AdminRole.FleetAdmin); + var fake = new FakeInner([row]); + var svc = Build(fake); + + var result = await svc.ListAllAsync(CancellationToken.None); + + result.Count.ShouldBe(1); + } + + [Fact] + public async Task CreateAsync_passes_through_to_inner() + { + var row = Row("cn=ops", AdminRole.FleetAdmin); + var fake = new FakeInner(); + var svc = Build(fake); + + var created = await svc.CreateAsync(row, CancellationToken.None); + created.ShouldBe(row); + } + + [Fact] + public async Task DeleteAsync_passes_through_to_inner() + { + var fake = new FakeInner(); + var svc = Build(fake); + + // Should not throw. + await svc.DeleteAsync(Guid.NewGuid(), CancellationToken.None); + } + + // ── helpers ─────────────────────────────────────────────────────────────────────────────── + + private static LdapGroupRoleMapping Row(string group, AdminRole role) => new() + { + Id = Guid.NewGuid(), + LdapGroup = group, + Role = role, + IsSystemWide = true, + ClusterId = null, + }; +}