fix(core-alarm-historian): resolve Low code-review findings (Core.AlarmHistorian-008,011)
- Core.AlarmHistorian-008: cache queue depth in an Interlocked counter so EnqueueAsync no longer runs COUNT(*) on every alarm; consolidate DrainOnceAsync onto a single SqliteConnection per tick (purge, batch read, dead-letter, and outcome transaction all share it). - Core.AlarmHistorian-011: confirm the stale Galaxy.Host XML doc references were already fixed under earlier commits; flip to Resolved. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -609,6 +609,130 @@ public sealed class SqliteStoreAndForwardSinkTests : IDisposable
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Regression for Core.AlarmHistorian-008: <c>EnqueueAsync</c> must NOT run a
|
||||
/// <c>SELECT COUNT(*)</c> on every enqueue when we are far below capacity. The
|
||||
/// optimisation tracks the queue depth in memory and only consults the database
|
||||
/// when the cached value indicates the capacity wall is in reach. This regression
|
||||
/// pins the perf characteristic: after many enqueues below capacity, the
|
||||
/// capacity-probe count must stay bounded — not grow proportionally to the
|
||||
/// enqueue count as the un-optimised path did.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task EnqueueAsync_does_not_count_all_rows_on_every_call_below_capacity()
|
||||
{
|
||||
var writer = new FakeWriter();
|
||||
using var sink = new SqliteStoreAndForwardSink(
|
||||
_dbPath, writer, _log, batchSize: 100, capacity: 10_000);
|
||||
|
||||
for (var i = 0; i < 200; i++)
|
||||
await sink.EnqueueAsync(Event($"E{i}"), CancellationToken.None);
|
||||
|
||||
// Pre-fix: probe count == enqueue count (200). Post-fix: ≤ a handful (initial
|
||||
// load + occasional periodic re-syncs). 25 is generous headroom.
|
||||
sink.DebugCapacityProbeCount.ShouldBeLessThan(25,
|
||||
"EnqueueAsync must not run a per-call SELECT COUNT(*) below capacity");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Regression for Core.AlarmHistorian-008: across every queue mutation (enqueue,
|
||||
/// Ack drain, PermanentFail drain, capacity eviction, RetryDeadLettered) the
|
||||
/// queue depth reported by <see cref="SqliteStoreAndForwardSink.GetStatus"/> must
|
||||
/// stay aligned with a fresh <c>COUNT(*)</c> against the database. Catches drift
|
||||
/// bugs in the in-memory counter introduced by the perf optimisation.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task Enqueue_and_drain_keep_queue_depth_consistent_with_storage()
|
||||
{
|
||||
var writer = new FakeWriter();
|
||||
using var sink = new SqliteStoreAndForwardSink(
|
||||
_dbPath, writer, _log, batchSize: 5, capacity: 8);
|
||||
|
||||
// Burst-enqueue below capacity — the in-memory counter must stay aligned with the
|
||||
// SELECT COUNT(*) that GetStatus runs.
|
||||
for (var i = 0; i < 6; i++)
|
||||
await sink.EnqueueAsync(Event($"burst-{i}"), CancellationToken.None);
|
||||
AssertQueueDepthMatchesStorage(sink);
|
||||
sink.GetStatus().QueueDepth.ShouldBe(6);
|
||||
|
||||
// Push past capacity — capacity must still be enforced even when EnqueueAsync no
|
||||
// longer runs COUNT(*) on every call.
|
||||
for (var i = 0; i < 5; i++)
|
||||
await sink.EnqueueAsync(Event($"overflow-{i}"), CancellationToken.None);
|
||||
sink.GetStatus().QueueDepth.ShouldBe(8, "capacity must still be honoured by the perf-optimised path");
|
||||
sink.GetStatus().EvictedCount.ShouldBe(3, "eviction counter must reflect every evicted row");
|
||||
AssertQueueDepthMatchesStorage(sink);
|
||||
|
||||
// Drain a partial batch (Ack) — the in-memory counter must follow the deletes
|
||||
// applied inside the single consolidated drain transaction.
|
||||
await sink.DrainOnceAsync(CancellationToken.None);
|
||||
AssertQueueDepthMatchesStorage(sink);
|
||||
|
||||
// Add a dead-lettered row and verify the counter does NOT include it (QueueDepth
|
||||
// is non-dead-lettered only).
|
||||
writer.NextOutcomePerEvent.Enqueue(HistorianWriteOutcome.PermanentFail);
|
||||
await sink.EnqueueAsync(Event("to-dead-letter"), CancellationToken.None);
|
||||
await sink.DrainOnceAsync(CancellationToken.None);
|
||||
AssertQueueDepthMatchesStorage(sink);
|
||||
sink.GetStatus().DeadLetterDepth.ShouldBeGreaterThanOrEqualTo(1);
|
||||
|
||||
// RetryDeadLettered moves DLQ rows back into the live queue — the counter must
|
||||
// pick that up.
|
||||
sink.RetryDeadLettered();
|
||||
AssertQueueDepthMatchesStorage(sink);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Stress regression for Core.AlarmHistorian-008: interleave many enqueues and
|
||||
/// drains across threads and confirm the in-memory counter stays consistent
|
||||
/// with storage. Catches drift bugs in the optimised path that would only show
|
||||
/// up under contention.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task Counter_remains_consistent_under_concurrent_enqueue_and_drain()
|
||||
{
|
||||
var writer = new FakeWriter();
|
||||
using var sink = new SqliteStoreAndForwardSink(_dbPath, writer, _log);
|
||||
|
||||
var enqueuers = Enumerable.Range(0, 3).Select(t => Task.Run(async () =>
|
||||
{
|
||||
for (var i = 0; i < 60; i++)
|
||||
await sink.EnqueueAsync(Event($"T{t}-{i}"), CancellationToken.None);
|
||||
}));
|
||||
var drainers = Enumerable.Range(0, 2).Select(_ => Task.Run(async () =>
|
||||
{
|
||||
for (var i = 0; i < 30; i++)
|
||||
{
|
||||
await sink.DrainOnceAsync(CancellationToken.None);
|
||||
await Task.Delay(2);
|
||||
}
|
||||
}));
|
||||
|
||||
await Task.WhenAll(enqueuers.Concat(drainers));
|
||||
|
||||
// Drain anything left over.
|
||||
for (var i = 0; i < 10; i++)
|
||||
await sink.DrainOnceAsync(CancellationToken.None);
|
||||
|
||||
AssertQueueDepthMatchesStorage(sink);
|
||||
sink.GetStatus().QueueDepth.ShouldBe(0, "every event drained at the end of the run");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Helper that confirms the queue depth surfaced by GetStatus matches a fresh
|
||||
/// COUNT(*) read directly from storage — proves the in-memory counter has not
|
||||
/// drifted from the persisted truth.
|
||||
/// </summary>
|
||||
private void AssertQueueDepthMatchesStorage(SqliteStoreAndForwardSink sink)
|
||||
{
|
||||
using var conn = new SqliteConnection($"Data Source={_dbPath}");
|
||||
conn.Open();
|
||||
using var cmd = conn.CreateCommand();
|
||||
cmd.CommandText = "SELECT COUNT(*) FROM Queue WHERE DeadLettered = 0";
|
||||
var live = (long)(cmd.ExecuteScalar() ?? 0L);
|
||||
sink.GetStatus().QueueDepth.ShouldBe(live, "GetStatus must agree with a fresh COUNT(*)");
|
||||
}
|
||||
|
||||
/// <summary>Insert a queue row whose PayloadJson cannot deserialize into an AlarmHistorianEvent.</summary>
|
||||
private void InsertCorruptRow(string alarmId)
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user