fix(alarm-historian): keep queue rows aligned to events on drain (Core.AlarmHistorian-001)
ReadBatch built parallel rowIds / events lists: rowIds.Add ran for every row but events.Add was guarded by `if (evt is not null)`. A corrupt / null-deserializing payload desynced the lists, so DrainOnceAsync applied each outcome to the wrong RowId — an Ack could delete an un-sent event (silent alarm-event data loss) and the corrupt row stalled the queue head forever. ReadBatch now returns a single list of QueueRow(long RowId, AlarmHistorianEvent? Event) records so a rowId can never drift from its event; deserialization is wrapped to yield null on JsonException. DrainOnceAsync immediately dead-letters rows whose payload is null/un-deserializable and forwards only well-formed events to the writer, mapping outcomes by RowId. Regression tests cover a corrupt row mid-batch and at the queue head. Core.AlarmHistorian suite: 16/16 pass. Resolves code-review finding Core.AlarmHistorian-001 (Critical). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -142,8 +142,34 @@ public sealed class SqliteStoreAndForwardSink : IAlarmHistorianSink, IDisposable
|
||||
_lastDrainUtc = _clock();
|
||||
|
||||
PurgeAgedDeadLetters();
|
||||
var (rowIds, events) = ReadBatch();
|
||||
if (rowIds.Count == 0)
|
||||
var batch = ReadBatch();
|
||||
if (batch.Count == 0)
|
||||
{
|
||||
_drainState = HistorianDrainState.Idle;
|
||||
return;
|
||||
}
|
||||
|
||||
// A null/un-deserializable payload can never succeed — dead-letter it
|
||||
// immediately for its own RowId so it cannot stall the queue head, and
|
||||
// exclude it from the batch handed to the writer.
|
||||
var corruptRowIds = batch.Where(r => r.Event is null).Select(r => r.RowId).ToList();
|
||||
var liveRows = batch.Where(r => r.Event is not null).ToList();
|
||||
var events = liveRows.Select(r => r.Event!).ToList();
|
||||
|
||||
if (corruptRowIds.Count > 0)
|
||||
{
|
||||
using var corruptConn = new SqliteConnection(_connectionString);
|
||||
corruptConn.Open();
|
||||
using var corruptTx = corruptConn.BeginTransaction();
|
||||
foreach (var rowId in corruptRowIds)
|
||||
DeadLetterRow(corruptConn, corruptTx, rowId, $"corrupt payload at {_clock():O}");
|
||||
corruptTx.Commit();
|
||||
_logger.Warning(
|
||||
"Dead-lettered {Count} historian queue row(s) with un-deserializable payload",
|
||||
corruptRowIds.Count);
|
||||
}
|
||||
|
||||
if (events.Count == 0)
|
||||
{
|
||||
_drainState = HistorianDrainState.Idle;
|
||||
return;
|
||||
@@ -179,7 +205,7 @@ public sealed class SqliteStoreAndForwardSink : IAlarmHistorianSink, IDisposable
|
||||
for (var i = 0; i < outcomes.Count; i++)
|
||||
{
|
||||
var outcome = outcomes[i];
|
||||
var rowId = rowIds[i];
|
||||
var rowId = liveRows[i].RowId;
|
||||
switch (outcome)
|
||||
{
|
||||
case HistorianWriteOutcome.Ack:
|
||||
@@ -252,10 +278,17 @@ public sealed class SqliteStoreAndForwardSink : IAlarmHistorianSink, IDisposable
|
||||
return cmd.ExecuteNonQuery();
|
||||
}
|
||||
|
||||
private (List<long> rowIds, List<AlarmHistorianEvent> events) ReadBatch()
|
||||
/// <summary>
|
||||
/// One queued row paired with its deserialized event. <see cref="Event"/> is
|
||||
/// <c>null</c> when the row's <c>PayloadJson</c> is corrupt or un-deserializable —
|
||||
/// the <see cref="RowId"/> always stays bound to its own row so outcomes can
|
||||
/// never be mapped to the wrong row.
|
||||
/// </summary>
|
||||
private readonly record struct QueueRow(long RowId, AlarmHistorianEvent? Event);
|
||||
|
||||
private List<QueueRow> ReadBatch()
|
||||
{
|
||||
var rowIds = new List<long>();
|
||||
var events = new List<AlarmHistorianEvent>();
|
||||
var rows = new List<QueueRow>();
|
||||
using var conn = new SqliteConnection(_connectionString);
|
||||
conn.Open();
|
||||
using var cmd = conn.CreateCommand();
|
||||
@@ -269,12 +302,21 @@ public sealed class SqliteStoreAndForwardSink : IAlarmHistorianSink, IDisposable
|
||||
using var reader = cmd.ExecuteReader();
|
||||
while (reader.Read())
|
||||
{
|
||||
rowIds.Add(reader.GetInt64(0));
|
||||
var rowId = reader.GetInt64(0);
|
||||
var payload = reader.GetString(1);
|
||||
var evt = JsonSerializer.Deserialize<AlarmHistorianEvent>(payload);
|
||||
if (evt is not null) events.Add(evt);
|
||||
AlarmHistorianEvent? evt;
|
||||
try
|
||||
{
|
||||
evt = JsonSerializer.Deserialize<AlarmHistorianEvent>(payload);
|
||||
}
|
||||
catch (JsonException)
|
||||
{
|
||||
// Malformed JSON — carry a null event so the caller dead-letters this row.
|
||||
evt = null;
|
||||
}
|
||||
rows.Add(new QueueRow(rowId, evt));
|
||||
}
|
||||
return (rowIds, events);
|
||||
return rows;
|
||||
}
|
||||
|
||||
private static void DeleteRow(SqliteConnection conn, SqliteTransaction tx, long rowId)
|
||||
|
||||
Reference in New Issue
Block a user