fix(core): resolve Medium code-review finding (Core-005)

Change ClusterEntry from sealed record to sealed class so TryUpdate
uses reference equality for the CAS comparison.  Prune now uses a
read-compute-TryUpdate retry loop that restarts when a concurrent
Install updates the entry between the read and the write, preventing
a race that could silently drop the just-installed newest generation.
Two regression tests added to PermissionTrieCacheTests.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Joseph Doherty
2026-05-22 08:23:52 -04:00
parent 09cd579220
commit debe163f4d
2 changed files with 69 additions and 9 deletions

View File

@@ -54,26 +54,51 @@ public sealed class PermissionTrieCache
/// <summary>
/// Retain only the most-recent <paramref name="keepLatest"/> generations for a cluster.
/// No-op when there's nothing to drop.
/// No-op when there's nothing to drop. Thread-safe: uses a CAS loop with
/// <see cref="ConcurrentDictionary{TKey,TValue}.TryUpdate"/> (reference equality on the
/// class-typed entry) so a concurrent <see cref="Install"/> on the same cluster is never
/// silently overwritten.
/// </summary>
public void Prune(string clusterId, int keepLatest = 3)
{
if (keepLatest < 1) throw new ArgumentOutOfRangeException(nameof(keepLatest), keepLatest, "keepLatest must be >= 1");
if (!_byCluster.TryGetValue(clusterId, out var entry)) return;
if (entry.Tries.Count <= keepLatest) return;
var keep = entry.Tries
// CAS retry loop: read a snapshot, compute the pruned entry, atomically swap.
// Retry if another writer (Install or a concurrent Prune) updated the entry first.
while (true)
{
if (!_byCluster.TryGetValue(clusterId, out var observed)) return;
if (observed.Tries.Count <= keepLatest) return;
var keep = observed.Tries
.OrderByDescending(kvp => kvp.Key)
.Take(keepLatest)
.ToDictionary(kvp => kvp.Key, kvp => kvp.Value);
_byCluster[clusterId] = new ClusterEntry(entry.Current, keep);
// Preserve the current pointer; if it was pruned (shouldn't happen since Current
// is always the newest generation), fall back to the newest retained entry.
var current = keep.TryGetValue(observed.Current.GenerationId, out var kept)
? kept
: keep.OrderByDescending(kvp => kvp.Key).First().Value;
var pruned = new ClusterEntry(current, keep);
// TryUpdate uses reference equality for ClusterEntry (class, not record) so it
// succeeds only when the stored reference is still the one we observed.
if (_byCluster.TryUpdate(clusterId, pruned, observed))
return;
// Another thread updated the entry between our read and our write — re-read and retry.
}
}
/// <summary>Diagnostics counter: number of cached (cluster, generation) tries.</summary>
public int CachedTrieCount => _byCluster.Values.Sum(e => e.Tries.Count);
private sealed record ClusterEntry(PermissionTrie Current, IReadOnlyDictionary<long, PermissionTrie> Tries)
// Class (not record) so TryUpdate in Prune uses reference equality for the CAS comparison.
private sealed class ClusterEntry(PermissionTrie current, IReadOnlyDictionary<long, PermissionTrie> tries)
{
public PermissionTrie Current { get; } = current;
public IReadOnlyDictionary<long, PermissionTrie> Tries { get; } = tries;
public static ClusterEntry FromSingle(PermissionTrie trie) =>
new(trie, new Dictionary<long, PermissionTrie> { [trie.GenerationId] = trie });

View File

@@ -101,4 +101,39 @@ public sealed class PermissionTrieCacheTests
cache.CurrentGenerationId("c1").ShouldBe(1);
cache.CurrentGenerationId("c2").ShouldBe(9);
}
/// <summary>
/// Core-005 regression: a concurrent Install that adds a new generation between a Prune
/// read and write must not be silently overwritten. We can't deterministically reproduce
/// a true data race in a unit test, but we can verify that Prune performed after Install
/// honours the newly-installed generation rather than clobbering it.
/// </summary>
[Fact]
public void Prune_AfterConcurrentInstall_PreservesLatestGeneration()
{
var cache = new PermissionTrieCache();
for (var g = 1L; g <= 5; g++) cache.Install(Trie("c1", g));
// Simulate: Install of generation 6 races with a pending Prune(keepLatest:2).
// After Prune runs, generation 6 (the "just installed" one) must still be current.
cache.Install(Trie("c1", 6));
cache.Prune("c1", keepLatest: 2);
cache.CurrentGenerationId("c1").ShouldBe(6, "current must remain the newest installed generation");
cache.GetTrie("c1", 6).ShouldNotBeNull("generation 6 must be retained as current");
cache.GetTrie("c1", 5).ShouldNotBeNull("generation 5 retained (keepLatest=2)");
cache.GetTrie("c1", 4).ShouldBeNull("generation 4 was pruned");
}
[Fact]
public void Prune_Current_Pointer_Survives_Pruning()
{
// Current is always the highest generation; verify it is not lost after prune.
var cache = new PermissionTrieCache();
for (var g = 1L; g <= 6; g++) cache.Install(Trie("c1", g));
cache.Prune("c1", keepLatest: 3);
cache.GetTrie("c1")!.GenerationId.ShouldBe(6, "current must still be the highest generation after pruning");
}
}