fix(historian-gateway): cancellation-safe alarm writer + dispose-safe outbox + provisioner polish + outbox tests

I-1: GatewayAlarmHistorianWriter no longer dead-letters events cancelled
mid-drain at shutdown. WriteBatchAsync short-circuits remaining events to
RetryPlease once cancellation is requested, and SendOneAsync catches
OperationCanceledException (when the token is cancelled) -> RetryPlease,
so in-flight events stay queued instead of being permanently dropped.

I-2: FasterLogHistorizationOutbox.Dispose now guards the awaited periodic
loop with a broad catch (Exception) after the OperationCanceledException
catch, so a non-Faster teardown fault (e.g. ObjectDisposedException) can
never escape Dispose.

M-1: GatewayTagProvisioner skips the empty EnsureTags round-trip when every
request is non-historizable (early return).

M-2: GatewayTagProvisioner handles plain shutdown cancellation quietly
(Debug, not Warning), counting the unsent batch as Failed, never throwing.

M-3/M-4: Added remove-last-entry (TailAddress truncation branch) and
FIFO implicit-ack (RemoveAsync acks up to and including the target)
durability tests, both reopen-and-survive.

M-5: Clarifying comment in RecoverState on the transient over-capacity
rebuild after a crash between append-commit and drop-truncation-commit.

Claude-Session: https://claude.ai/code/session_012SDSQ3AcaXqPcBtDESBRii
This commit is contained in:
Joseph Doherty
2026-06-26 17:47:20 -04:00
parent 0be79219fc
commit 22711444cc
6 changed files with 127 additions and 1 deletions
@@ -59,6 +59,54 @@ public sealed class FasterLogHistorizationOutboxTests : IDisposable
Assert.Equal(keep, batch[0].Id);
}
[Fact]
public async Task Remove_last_entry_empties_and_survives_restart()
{
// Removing the only entry exercises the node.Next == null ⇒ TailAddress truncation branch:
// the head advances to the tail, the outbox empties, and that empty state must persist across
// a restart (a stale survivor here would silently re-drain an already-acked entry).
var dir = NewTempDir();
var a = E("A", 1);
{
using var o = new FasterLogHistorizationOutbox(dir, HistorizationCommitMode.PerEntry);
await o.AppendAsync(a, TestContext.Current.CancellationToken);
await o.RemoveAsync(a.Id, TestContext.Current.CancellationToken); // ack the only entry
Assert.Equal(0, await o.CountAsync(TestContext.Current.CancellationToken));
}
using var reopened = new FasterLogHistorizationOutbox(dir, HistorizationCommitMode.PerEntry);
Assert.Equal(0, await reopened.CountAsync(TestContext.Current.CancellationToken));
Assert.Empty(await reopened.PeekBatchAsync(10, TestContext.Current.CancellationToken));
}
[Fact]
public async Task Remove_acks_fifo_up_to_and_including_target_and_survives_restart()
{
// FIFO implicit-ack: acking B truncates everything up to AND including B (so A is implicitly
// acked too), leaving only the newer C. This is the documented head-advance semantics in
// RemoveAsync, and it must persist across a restart.
var dir = NewTempDir();
var a = E("A", 1);
var b = E("B", 2);
var c = E("C", 3);
{
using var o = new FasterLogHistorizationOutbox(dir, HistorizationCommitMode.PerEntry);
await o.AppendAsync(a, TestContext.Current.CancellationToken);
await o.AppendAsync(b, TestContext.Current.CancellationToken);
await o.AppendAsync(c, TestContext.Current.CancellationToken);
await o.RemoveAsync(b.Id, TestContext.Current.CancellationToken); // acks A and B, leaves C
Assert.Equal(1, await o.CountAsync(TestContext.Current.CancellationToken));
var remaining = await o.PeekBatchAsync(10, TestContext.Current.CancellationToken);
Assert.Equal(new[] { c.Id }, remaining.Select(e => e.Id));
}
using var reopened = new FasterLogHistorizationOutbox(dir, HistorizationCommitMode.PerEntry);
Assert.Equal(1, await reopened.CountAsync(TestContext.Current.CancellationToken));
var survived = await reopened.PeekBatchAsync(10, TestContext.Current.CancellationToken);
Assert.Equal(new[] { c.Id }, survived.Select(e => e.Id));
}
[Fact]
public async Task Capacity_full_drops_oldest_and_counts()
{