diff --git a/code-reviews/Core/findings.md b/code-reviews/Core/findings.md index d55414f..a6a126f 100644 --- a/code-reviews/Core/findings.md +++ b/code-reviews/Core/findings.md @@ -7,7 +7,7 @@ | Review date | 2026-05-22 | | Commit reviewed | `76d35d1` | | Status | Reviewed | -| Open findings | 12 | +| Open findings | 10 | ## Checklist coverage @@ -33,13 +33,13 @@ | Severity | High | | Category | Correctness & logic bugs | | Location | `src/Core/ZB.MOM.WW.OtOpcUa.Core/Authorization/UserAuthorizationState.cs:50-68` | -| Status | Open | +| Status | Resolved | **Description:** `NeedsRefresh` can never return `true` with the default field values. `AuthCacheMaxStaleness` defaults to 5 minutes and `MembershipFreshnessInterval` defaults to 15 minutes. `NeedsRefresh(utcNow)` is defined as `!IsStale(utcNow) && elapsed > MembershipFreshnessInterval`, i.e. it needs `elapsed > 15 min` AND `elapsed <= 5 min` simultaneously — an empty set. The session crosses the staleness ceiling (5 min) and fails closed long before it ever reaches the 15-minute freshness boundary that is supposed to signal "kick off an async re-resolution while still serving cached memberships." Decision #151 / #152 in `docs/v2/implementation/phase-6-2-authorization-runtime.md` intends the freshness window (15 min, re-resolve) to be the inner trigger and the staleness ceiling to be the outer hard limit; with these defaults the ordering is inverted, so the "refresh while warm" path is dead code and every long-lived session hard-fails authorization after 5 minutes. **Recommendation:** Either swap the defaults so `MembershipFreshnessInterval` (e.g. 5 min) is strictly less than `AuthCacheMaxStaleness` (e.g. 15 min) — matching the doc's stated intent — or, if the 5/15 values are correct, redefine which window is the refresh trigger and which is the fail-closed ceiling. Add a unit test asserting `NeedsRefresh` returns `true` for at least one point in time with the production defaults. -**Resolution:** _(open)_ +**Resolution:** Resolved 2026-05-22 — swapped the defaults so `MembershipFreshnessInterval` is 5 min and `AuthCacheMaxStaleness` is 15 min (freshness = inner re-resolve trigger, staleness = outer fail-closed ceiling); added a `NeedsRefresh_FiresWithin_ProductionDefault_Windows` regression test. ### Core-002 @@ -48,13 +48,13 @@ | Severity | High | | Category | Security | | Location | `src/Core/ZB.MOM.WW.OtOpcUa.Core/Authorization/TriePermissionEvaluator.cs:24-50` | -| Status | Open | +| Status | Resolved | **Description:** `TriePermissionEvaluator.Authorize` never compares the session's `AuthGenerationId` against the generation of the trie it evaluates against. It calls `_cache.GetTrie(scope.ClusterId)` — the current-generation shortcut — and authorizes against whatever generation the cache happens to hold. `UserAuthorizationState` carries `AuthGenerationId` precisely so a stale session can be detected, and the Phase 6.2 design (`phase-6-2-authorization-runtime.md` adversarial-review item #3 "Redundancy-safe invalidation", plus the §Scope `PermissionTrieCache + freshness` row) requires the hot-path call to look up `CurrentGenerationId` and force a re-evaluation on mismatch. As written, a session bound at generation N silently evaluates against generation N+1 the instant another node publishes — grants added or removed in N+1 take effect for that session without the intended generation-stamp re-check, and the provenance returned in `AuthorizationDecision` misreports which generation produced the verdict. **Recommendation:** In `Authorize`, after resolving the trie, compare `trie.GenerationId` to `session.AuthGenerationId`. On mismatch either fetch the session's bound generation via `_cache.GetTrie(clusterId, session.AuthGenerationId)` and evaluate against it, or signal the caller to re-resolve the session's auth state before retrying. Add a test for the publish-during-session scenario. -**Resolution:** _(open)_ +**Resolution:** Resolved 2026-05-22 — `Authorize` now compares `trie.GenerationId` to `session.AuthGenerationId` and, on mismatch, re-fetches the session's bound generation via `_cache.GetTrie(clusterId, session.AuthGenerationId)`, failing closed when that generation has been pruned; added publish-during-session and pruned-generation regression tests. ### Core-003 diff --git a/src/Core/ZB.MOM.WW.OtOpcUa.Core/Authorization/TriePermissionEvaluator.cs b/src/Core/ZB.MOM.WW.OtOpcUa.Core/Authorization/TriePermissionEvaluator.cs index f175505..e2b7dae 100644 --- a/src/Core/ZB.MOM.WW.OtOpcUa.Core/Authorization/TriePermissionEvaluator.cs +++ b/src/Core/ZB.MOM.WW.OtOpcUa.Core/Authorization/TriePermissionEvaluator.cs @@ -37,6 +37,21 @@ public sealed class TriePermissionEvaluator : IPermissionEvaluator var trie = _cache.GetTrie(scope.ClusterId); if (trie is null) return AuthorizationDecision.NotGranted(); + // Decision #153 / Phase 6.2 adversarial-review item #3 (redundancy-safe invalidation): + // the GetTrie shortcut returns whatever generation the cache currently holds, which may + // have advanced past the generation this session was bound to (another node published). + // Evaluate against the session's *bound* generation so a grant added or removed in a + // newer generation cannot silently take effect mid-session, and so the provenance in the + // AuthorizationDecision reports the generation that actually produced the verdict. + if (trie.GenerationId != session.AuthGenerationId) + { + trie = _cache.GetTrie(scope.ClusterId, session.AuthGenerationId); + + // The session's bound generation has been pruned out of the cache — fail closed and + // force the caller to re-resolve the session's auth state before retrying. + if (trie is null) return AuthorizationDecision.NotGranted(); + } + var matches = trie.CollectMatches(scope, session.LdapGroups); if (matches.Count == 0) return AuthorizationDecision.NotGranted(); diff --git a/src/Core/ZB.MOM.WW.OtOpcUa.Core/Authorization/UserAuthorizationState.cs b/src/Core/ZB.MOM.WW.OtOpcUa.Core/Authorization/UserAuthorizationState.cs index 91f8212..df2a726 100644 --- a/src/Core/ZB.MOM.WW.OtOpcUa.Core/Authorization/UserAuthorizationState.cs +++ b/src/Core/ZB.MOM.WW.OtOpcUa.Core/Authorization/UserAuthorizationState.cs @@ -7,13 +7,19 @@ namespace ZB.MOM.WW.OtOpcUa.Core.Authorization; /// /// /// Per decision #151 the membership is bounded by -/// (default 15 min). After that, the next hot-path authz call re-resolves LDAP group +/// (default 5 min). After that, the next hot-path authz call re-resolves LDAP group /// memberships; failure to re-resolve (LDAP unreachable) flips the session to fail-closed /// until a refresh succeeds. /// -/// Per decision #152 (default 5 min) is separate from +/// Per decision #152 (default 15 min) is separate from /// Phase 6.1's availability-oriented 24h cache — beyond this window the evaluator returns /// regardless of config-cache warmth. +/// +/// The freshness window is the inner trigger and the staleness ceiling the outer hard +/// limit: MUST be strictly less than +/// so that ("re-resolve +/// while still serving cached memberships") has a non-empty window before +/// fails the session closed. /// public sealed record UserAuthorizationState { @@ -47,10 +53,10 @@ public sealed record UserAuthorizationState public required long MembershipVersion { get; init; } /// Bounded membership freshness window; past this the next authz call refreshes. - public TimeSpan MembershipFreshnessInterval { get; init; } = TimeSpan.FromMinutes(15); + public TimeSpan MembershipFreshnessInterval { get; init; } = TimeSpan.FromMinutes(5); /// Hard staleness ceiling — beyond this, the evaluator fails closed. - public TimeSpan AuthCacheMaxStaleness { get; init; } = TimeSpan.FromMinutes(5); + public TimeSpan AuthCacheMaxStaleness { get; init; } = TimeSpan.FromMinutes(15); /// /// True when - exceeds diff --git a/tests/Core/ZB.MOM.WW.OtOpcUa.Core.Tests/Authorization/TriePermissionEvaluatorTests.cs b/tests/Core/ZB.MOM.WW.OtOpcUa.Core.Tests/Authorization/TriePermissionEvaluatorTests.cs index fbe9bb6..ac97e44 100644 --- a/tests/Core/ZB.MOM.WW.OtOpcUa.Core.Tests/Authorization/TriePermissionEvaluatorTests.cs +++ b/tests/Core/ZB.MOM.WW.OtOpcUa.Core.Tests/Authorization/TriePermissionEvaluatorTests.cs @@ -32,14 +32,15 @@ public sealed class TriePermissionEvaluatorTests PermissionFlags = flags, }; - private static UserAuthorizationState Session(string[] groups, DateTime? resolvedUtc = null, string clusterId = "c1") => + private static UserAuthorizationState Session( + string[] groups, DateTime? resolvedUtc = null, string clusterId = "c1", long authGenerationId = 1) => new() { SessionId = "sess", ClusterId = clusterId, LdapGroups = groups, MembershipResolvedUtc = resolvedUtc ?? Now, - AuthGenerationId = 1, + AuthGenerationId = authGenerationId, MembershipVersion = 1, }; @@ -123,7 +124,7 @@ public sealed class TriePermissionEvaluatorTests { var evaluator = MakeEvaluator([Row("cn=ops", NodeAclScopeKind.Cluster, null, NodePermissions.Read)]); var session = Session(["cn=ops"], resolvedUtc: Now); - _time.Utc = Now.AddMinutes(10); // well past the 5-min AuthCacheMaxStaleness default + _time.Utc = Now.AddMinutes(20); // well past the 15-min AuthCacheMaxStaleness default var decision = evaluator.Authorize(session, OpcUaOperation.Read, Scope()); @@ -141,6 +142,46 @@ public sealed class TriePermissionEvaluatorTests decision.Verdict.ShouldBe(AuthorizationVerdict.NotGranted); } + [Fact] + public void StaleGeneration_EvaluatesAgainst_SessionBoundGeneration() + { + // Core-002 regression: session is bound to generation 1 (Read granted). Another node + // publishes generation 2 with the grant removed and it becomes the cache "current". + // The evaluator must still honour the session's bound generation 1, not generation 2. + var gen1Row = Row("cn=ops", NodeAclScopeKind.Cluster, null, NodePermissions.Read); + gen1Row.GenerationId = 1; + var cache = new PermissionTrieCache(); + cache.Install(PermissionTrieBuilder.Build("c1", 1, [gen1Row])); + cache.Install(PermissionTrieBuilder.Build("c1", 2, [])); // gen 2 — grant revoked, now current + var evaluator = new TriePermissionEvaluator(cache, _time); + + var decision = evaluator.Authorize( + Session(["cn=ops"], authGenerationId: 1), OpcUaOperation.Read, Scope()); + + decision.Verdict.ShouldBe(AuthorizationVerdict.Allow, + "the session bound to generation 1 must evaluate against generation 1, not the newer current generation"); + decision.Provenance.Count.ShouldBe(1); + } + + [Fact] + public void StaleGeneration_FailsClosed_WhenBoundGenerationPruned() + { + // Core-002 regression: session is bound to a generation no longer in the cache. + // The evaluator must fail closed rather than silently using the current generation. + var gen5Row = Row("cn=ops", NodeAclScopeKind.Cluster, null, NodePermissions.Read); + gen5Row.GenerationId = 5; + var cache = new PermissionTrieCache(); + cache.Install(PermissionTrieBuilder.Build("c1", 5, [gen5Row])); + var evaluator = new TriePermissionEvaluator(cache, _time); + + // Session bound to generation 1, which was never installed / has been pruned. + var decision = evaluator.Authorize( + Session(["cn=ops"], authGenerationId: 1), OpcUaOperation.Read, Scope()); + + decision.Verdict.ShouldBe(AuthorizationVerdict.NotGranted, + "a session bound to a generation absent from the cache must fail closed"); + } + [Fact] public void OperationToPermission_Mapping_IsTotal() { diff --git a/tests/Core/ZB.MOM.WW.OtOpcUa.Core.Tests/Authorization/UserAuthorizationStateTests.cs b/tests/Core/ZB.MOM.WW.OtOpcUa.Core.Tests/Authorization/UserAuthorizationStateTests.cs index 28527a4..f502b9b 100644 --- a/tests/Core/ZB.MOM.WW.OtOpcUa.Core.Tests/Authorization/UserAuthorizationStateTests.cs +++ b/tests/Core/ZB.MOM.WW.OtOpcUa.Core.Tests/Authorization/UserAuthorizationStateTests.cs @@ -33,7 +33,21 @@ public sealed class UserAuthorizationStateTests { var session = Fresh(Now); - session.NeedsRefresh(Now.AddMinutes(16)).ShouldBeFalse("past freshness but also past the 5-min staleness ceiling — should be Stale, not NeedsRefresh"); + session.NeedsRefresh(Now.AddMinutes(16)).ShouldBeFalse("past freshness but also past the 15-min staleness ceiling — should be Stale, not NeedsRefresh"); + } + + [Fact] + public void NeedsRefresh_FiresWithin_ProductionDefault_Windows() + { + // Core-001 regression: with the production defaults the refresh window must be + // non-empty. Freshness defaults to 5 min and staleness to 15 min, so between + // those two boundaries NeedsRefresh must return true while IsStale stays false. + var session = Fresh(Now); + + session.MembershipFreshnessInterval.ShouldBeLessThan(session.AuthCacheMaxStaleness); + + session.NeedsRefresh(Now.AddMinutes(10)).ShouldBeTrue("10 min is past the 5-min freshness window but within the 15-min staleness ceiling"); + session.IsStale(Now.AddMinutes(10)).ShouldBeFalse("10 min is still within the 15-min staleness ceiling"); } [Fact] @@ -55,6 +69,6 @@ public sealed class UserAuthorizationStateTests { var session = Fresh(Now); - session.IsStale(Now.AddMinutes(6)).ShouldBeTrue("default AuthCacheMaxStaleness is 5 min"); + session.IsStale(Now.AddMinutes(16)).ShouldBeTrue("default AuthCacheMaxStaleness is 15 min"); } }