fix(core-alarm-historian): resolve Low code-review findings (Core.AlarmHistorian-008,011)

- Core.AlarmHistorian-008: cache queue depth in an Interlocked counter so EnqueueAsync no longer runs COUNT(*) on every alarm; consolidate DrainOnceAsync onto a single SqliteConnection per tick (purge, batch read, dead-letter, and outcome transaction all share it). - Core.AlarmHistorian-011: confirm the stale Galaxy.Host XML doc references were already fixed under earlier commits; flip to Resolved. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-23 05:38:26 -04:00
parent b92fea15d4
commit 0da4f3b63a
3 changed files with 269 additions and 74 deletions
@@ -609,6 +609,130 @@ public sealed class SqliteStoreAndForwardSinkTests : IDisposable
        }
    }

+    /// <summary>
+    ///     Regression for Core.AlarmHistorian-008: <c>EnqueueAsync</c> must NOT run a
+    ///     <c>SELECT COUNT(*)</c> on every enqueue when we are far below capacity. The
+    ///     optimisation tracks the queue depth in memory and only consults the database
+    ///     when the cached value indicates the capacity wall is in reach. This regression
+    ///     pins the perf characteristic: after many enqueues below capacity, the
+    ///     capacity-probe count must stay bounded — not grow proportionally to the
+    ///     enqueue count as the un-optimised path did.
+    /// </summary>
+    [Fact]
+    public async Task EnqueueAsync_does_not_count_all_rows_on_every_call_below_capacity()
+    {
+        var writer = new FakeWriter();
+        using var sink = new SqliteStoreAndForwardSink(
+            _dbPath, writer, _log, batchSize: 100, capacity: 10_000);
+
+        for (var i = 0; i < 200; i++)
+            await sink.EnqueueAsync(Event($"E{i}"), CancellationToken.None);
+
+        // Pre-fix: probe count == enqueue count (200). Post-fix: ≤ a handful (initial
+        // load + occasional periodic re-syncs). 25 is generous headroom.
+        sink.DebugCapacityProbeCount.ShouldBeLessThan(25,
+            "EnqueueAsync must not run a per-call SELECT COUNT(*) below capacity");
+    }
+
+    /// <summary>
+    ///     Regression for Core.AlarmHistorian-008: across every queue mutation (enqueue,
+    ///     Ack drain, PermanentFail drain, capacity eviction, RetryDeadLettered) the
+    ///     queue depth reported by <see cref="SqliteStoreAndForwardSink.GetStatus"/> must
+    ///     stay aligned with a fresh <c>COUNT(*)</c> against the database. Catches drift
+    ///     bugs in the in-memory counter introduced by the perf optimisation.
+    /// </summary>
+    [Fact]
+    public async Task Enqueue_and_drain_keep_queue_depth_consistent_with_storage()
+    {
+        var writer = new FakeWriter();
+        using var sink = new SqliteStoreAndForwardSink(
+            _dbPath, writer, _log, batchSize: 5, capacity: 8);
+
+        // Burst-enqueue below capacity — the in-memory counter must stay aligned with the
+        // SELECT COUNT(*) that GetStatus runs.
+        for (var i = 0; i < 6; i++)
+            await sink.EnqueueAsync(Event($"burst-{i}"), CancellationToken.None);
+        AssertQueueDepthMatchesStorage(sink);
+        sink.GetStatus().QueueDepth.ShouldBe(6);
+
+        // Push past capacity — capacity must still be enforced even when EnqueueAsync no
+        // longer runs COUNT(*) on every call.
+        for (var i = 0; i < 5; i++)
+            await sink.EnqueueAsync(Event($"overflow-{i}"), CancellationToken.None);
+        sink.GetStatus().QueueDepth.ShouldBe(8, "capacity must still be honoured by the perf-optimised path");
+        sink.GetStatus().EvictedCount.ShouldBe(3, "eviction counter must reflect every evicted row");
+        AssertQueueDepthMatchesStorage(sink);
+
+        // Drain a partial batch (Ack) — the in-memory counter must follow the deletes
+        // applied inside the single consolidated drain transaction.
+        await sink.DrainOnceAsync(CancellationToken.None);
+        AssertQueueDepthMatchesStorage(sink);
+
+        // Add a dead-lettered row and verify the counter does NOT include it (QueueDepth
+        // is non-dead-lettered only).
+        writer.NextOutcomePerEvent.Enqueue(HistorianWriteOutcome.PermanentFail);
+        await sink.EnqueueAsync(Event("to-dead-letter"), CancellationToken.None);
+        await sink.DrainOnceAsync(CancellationToken.None);
+        AssertQueueDepthMatchesStorage(sink);
+        sink.GetStatus().DeadLetterDepth.ShouldBeGreaterThanOrEqualTo(1);
+
+        // RetryDeadLettered moves DLQ rows back into the live queue — the counter must
+        // pick that up.
+        sink.RetryDeadLettered();
+        AssertQueueDepthMatchesStorage(sink);
+    }
+
+    /// <summary>
+    ///     Stress regression for Core.AlarmHistorian-008: interleave many enqueues and
+    ///     drains across threads and confirm the in-memory counter stays consistent
+    ///     with storage. Catches drift bugs in the optimised path that would only show
+    ///     up under contention.
+    /// </summary>
+    [Fact]
+    public async Task Counter_remains_consistent_under_concurrent_enqueue_and_drain()
+    {
+        var writer = new FakeWriter();
+        using var sink = new SqliteStoreAndForwardSink(_dbPath, writer, _log);
+
+        var enqueuers = Enumerable.Range(0, 3).Select(t => Task.Run(async () =>
+        {
+            for (var i = 0; i < 60; i++)
+                await sink.EnqueueAsync(Event($"T{t}-{i}"), CancellationToken.None);
+        }));
+        var drainers = Enumerable.Range(0, 2).Select(_ => Task.Run(async () =>
+        {
+            for (var i = 0; i < 30; i++)
+            {
+                await sink.DrainOnceAsync(CancellationToken.None);
+                await Task.Delay(2);
+            }
+        }));
+
+        await Task.WhenAll(enqueuers.Concat(drainers));
+
+        // Drain anything left over.
+        for (var i = 0; i < 10; i++)
+            await sink.DrainOnceAsync(CancellationToken.None);
+
+        AssertQueueDepthMatchesStorage(sink);
+        sink.GetStatus().QueueDepth.ShouldBe(0, "every event drained at the end of the run");
+    }
+
+    /// <summary>
+    ///     Helper that confirms the queue depth surfaced by GetStatus matches a fresh
+    ///     COUNT(*) read directly from storage — proves the in-memory counter has not
+    ///     drifted from the persisted truth.
+    /// </summary>
+    private void AssertQueueDepthMatchesStorage(SqliteStoreAndForwardSink sink)
+    {
+        using var conn = new SqliteConnection($"Data Source={_dbPath}");
+        conn.Open();
+        using var cmd = conn.CreateCommand();
+        cmd.CommandText = "SELECT COUNT(*) FROM Queue WHERE DeadLettered = 0";
+        var live = (long)(cmd.ExecuteScalar() ?? 0L);
+        sink.GetStatus().QueueDepth.ShouldBe(live, "GetStatus must agree with a fresh COUNT(*)");
+    }
+
    /// <summary>Insert a queue row whose PayloadJson cannot deserialize into an AlarmHistorianEvent.</summary>
    private void InsertCorruptRow(string alarmId)
    {