fix(alarm-historian): keep queue rows aligned to events on drain (Core.AlarmHistorian-001)
ReadBatch built parallel rowIds / events lists: rowIds.Add ran for every row but events.Add was guarded by `if (evt is not null)`. A corrupt / null-deserializing payload desynced the lists, so DrainOnceAsync applied each outcome to the wrong RowId — an Ack could delete an un-sent event (silent alarm-event data loss) and the corrupt row stalled the queue head forever. ReadBatch now returns a single list of QueueRow(long RowId, AlarmHistorianEvent? Event) records so a rowId can never drift from its event; deserialization is wrapped to yield null on JsonException. DrainOnceAsync immediately dead-letters rows whose payload is null/un-deserializable and forwards only well-formed events to the writer, mapping outcomes by RowId. Regression tests cover a corrupt row mid-batch and at the queue head. Core.AlarmHistorian suite: 16/16 pass. Resolves code-review finding Core.AlarmHistorian-001 (Critical). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,3 +1,4 @@
|
||||
using Microsoft.Data.Sqlite;
|
||||
using Serilog;
|
||||
using Serilog.Core;
|
||||
using Serilog.Events;
|
||||
@@ -283,4 +284,78 @@ public sealed class SqliteStoreAndForwardSinkTests : IDisposable
|
||||
await Should.ThrowAsync<ObjectDisposedException>(
|
||||
() => sink.EnqueueAsync(Event("A1"), CancellationToken.None));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Regression for Core.AlarmHistorian-001: a corrupt / un-deserializable
|
||||
/// PayloadJson row sitting between good rows must not desync the rowId/event
|
||||
/// lists. The corrupt row is dead-lettered (so it cannot stall the queue head
|
||||
/// forever), and every good row's outcome is applied to the CORRECT RowId —
|
||||
/// no good event is silently lost.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task Drain_with_corrupt_payload_row_deadletters_it_and_keeps_good_rows_aligned()
|
||||
{
|
||||
var writer = new FakeWriter();
|
||||
using var sink = new SqliteStoreAndForwardSink(_dbPath, writer, _log);
|
||||
|
||||
// Row 1: good. Row 2: corrupt JSON (inserted directly). Row 3: good.
|
||||
await sink.EnqueueAsync(Event("good-1"), CancellationToken.None);
|
||||
InsertCorruptRow("corrupt");
|
||||
await sink.EnqueueAsync(Event("good-2"), CancellationToken.None);
|
||||
|
||||
await sink.DrainOnceAsync(CancellationToken.None);
|
||||
|
||||
// The writer must only see the two well-formed events, in order.
|
||||
writer.Batches.Count.ShouldBe(1);
|
||||
writer.Batches[0].Select(e => e.AlarmId).ShouldBe(["good-1", "good-2"]);
|
||||
|
||||
var status = sink.GetStatus();
|
||||
// Both good rows acked + removed; the corrupt row is dead-lettered, not lost.
|
||||
status.QueueDepth.ShouldBe(0, "both good rows acked and removed");
|
||||
status.DeadLetterDepth.ShouldBe(1, "corrupt row dead-lettered, not silently dropped");
|
||||
|
||||
// A second drain must be a clean no-op — the corrupt row no longer stalls the head.
|
||||
await sink.DrainOnceAsync(CancellationToken.None);
|
||||
writer.Batches.Count.ShouldBe(1, "no further batches — corrupt row no longer re-read");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// A corrupt row at the very head of the queue must be dead-lettered and not
|
||||
/// prevent the good rows behind it from draining.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task Drain_with_corrupt_head_row_does_not_stall_queue()
|
||||
{
|
||||
var writer = new FakeWriter();
|
||||
using var sink = new SqliteStoreAndForwardSink(_dbPath, writer, _log);
|
||||
|
||||
InsertCorruptRow("corrupt-head");
|
||||
await sink.EnqueueAsync(Event("good-1"), CancellationToken.None);
|
||||
|
||||
await sink.DrainOnceAsync(CancellationToken.None);
|
||||
|
||||
writer.Batches.Count.ShouldBe(1);
|
||||
writer.Batches[0].Select(e => e.AlarmId).ShouldBe(["good-1"]);
|
||||
|
||||
var status = sink.GetStatus();
|
||||
status.QueueDepth.ShouldBe(0);
|
||||
status.DeadLetterDepth.ShouldBe(1);
|
||||
}
|
||||
|
||||
/// <summary>Insert a queue row whose PayloadJson cannot deserialize into an AlarmHistorianEvent.</summary>
|
||||
private void InsertCorruptRow(string alarmId)
|
||||
{
|
||||
using var conn = new SqliteConnection($"Data Source={_dbPath}");
|
||||
conn.Open();
|
||||
using var cmd = conn.CreateCommand();
|
||||
cmd.CommandText = """
|
||||
INSERT INTO Queue (AlarmId, EnqueuedUtc, PayloadJson, AttemptCount)
|
||||
VALUES ($alarmId, $enqueued, $payload, 0);
|
||||
""";
|
||||
cmd.Parameters.AddWithValue("$alarmId", alarmId);
|
||||
cmd.Parameters.AddWithValue("$enqueued", DateTime.UtcNow.ToString("O"));
|
||||
// JSON literal "null" round-trips through Deserialize<T> as a null reference.
|
||||
cmd.Parameters.AddWithValue("$payload", "null");
|
||||
cmd.ExecuteNonQuery();
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user