feat(historian): page within oversized tie clusters (#400) instead of loud-failing

This commit is contained in:
Joseph Doherty
2026-06-17 20:11:09 -04:00
parent 3699fc16a8
commit 2e6c6d3ab6
6 changed files with 368 additions and 31 deletions
@@ -146,6 +146,112 @@ public sealed class HistoryPagingTests
trimmed[0].Value.ShouldBe(11.0);
}
// --- SliceTieCluster ------------------------------------------------------------------------
[Fact]
public void SliceTieCluster_mid_cluster_emits_cp_at_same_timestamp()
{
// 10 ties at T; already emitted 2; cap 3. Slice [2,5) → next page resumes AT T, skip 5.
var t = new DateTime(2026, 1, 1, 0, 0, 5, DateTimeKind.Utc);
var end = t.AddHours(1);
HistoryPaging.SliceTieCluster(
clusterCount: 10, skip: 2, cap: 3, boundaryT: t, endUtc: end,
out var sliceStart, out var sliceCount, out var nextStartUtc, out var nextSkip);
sliceStart.ShouldBe(2);
sliceCount.ShouldBe(3);
nextStartUtc.ShouldBe(t); // still draining the cluster ⇒ resume AT T
nextSkip.ShouldBe(5); // 2 already emitted + 3 just emitted
}
[Fact]
public void SliceTieCluster_exact_drain_advances_one_tick_when_window_remains()
{
// 6 ties at T; skip 3; cap 3. Slice [3,6) drains the cluster exactly ⇒ advance to T+1tick.
var t = new DateTime(2026, 1, 1, 0, 0, 5, DateTimeKind.Utc);
var end = t.AddHours(1);
HistoryPaging.SliceTieCluster(
clusterCount: 6, skip: 3, cap: 3, boundaryT: t, endUtc: end,
out var sliceStart, out var sliceCount, out var nextStartUtc, out var nextSkip);
sliceStart.ShouldBe(3);
sliceCount.ShouldBe(3);
nextStartUtc.ShouldBe(t.AddTicks(1)); // cluster drained, window remains ⇒ next tick, fresh skip
nextSkip.ShouldBe(0);
}
[Fact]
public void SliceTieCluster_short_final_slice_still_emits_cp_when_window_remains()
{
// 5 ties at T; skip 3; cap 10. Slice [3,5) is SHORT (2 < cap) but it fully drains the cluster
// and the window still extends past T ⇒ we MUST emit a CP to read the rest of the window even
// though this page is short of the cap.
var t = new DateTime(2026, 1, 1, 0, 0, 5, DateTimeKind.Utc);
var end = t.AddHours(1);
HistoryPaging.SliceTieCluster(
clusterCount: 5, skip: 3, cap: 10, boundaryT: t, endUtc: end,
out var sliceStart, out var sliceCount, out var nextStartUtc, out var nextSkip);
sliceStart.ShouldBe(3);
sliceCount.ShouldBe(2); // short slice
nextStartUtc.ShouldBe(t.AddTicks(1)); // but CP emitted so the rest of the window is read
nextSkip.ShouldBe(0);
}
[Fact]
public void SliceTieCluster_drained_at_window_end_terminates()
{
// The cluster ends exactly AT the window end (endUtc == T). Draining it cannot advance past the
// window ⇒ terminal (no CP).
var t = new DateTime(2026, 1, 1, 0, 0, 5, DateTimeKind.Utc);
HistoryPaging.SliceTieCluster(
clusterCount: 4, skip: 0, cap: 10, boundaryT: t, endUtc: t,
out var sliceStart, out var sliceCount, out var nextStartUtc, out var nextSkip);
sliceStart.ShouldBe(0);
sliceCount.ShouldBe(4);
nextStartUtc.ShouldBeNull(); // T+1tick > endUtc ⇒ window exhausted ⇒ terminal
nextSkip.ShouldBe(0);
}
[Fact]
public void SliceTieCluster_self_heals_when_skip_exceeds_cluster()
{
// Defensive: a stale skip points past the (re-read, possibly shrunk) cluster. The slice is empty
// (count 0) and, since emitted == clusterCount, the cursor advances/terminates rather than looping.
var t = new DateTime(2026, 1, 1, 0, 0, 5, DateTimeKind.Utc);
var end = t.AddHours(1);
HistoryPaging.SliceTieCluster(
clusterCount: 4, skip: 4, cap: 10, boundaryT: t, endUtc: end,
out var sliceStart, out var sliceCount, out var nextStartUtc, out var nextSkip);
sliceStart.ShouldBe(4);
sliceCount.ShouldBe(0); // nothing left in the cluster to emit
nextStartUtc.ShouldBe(t.AddTicks(1)); // emitted (4) == clusterCount (4) ⇒ advance, don't loop
nextSkip.ShouldBe(0);
}
[Fact]
public void SliceTieCluster_self_heals_to_terminal_at_window_end()
{
// Same stale-skip self-heal, but the window ends at T ⇒ advancing terminates instead of looping.
var t = new DateTime(2026, 1, 1, 0, 0, 5, DateTimeKind.Utc);
HistoryPaging.SliceTieCluster(
clusterCount: 4, skip: 9, cap: 10, boundaryT: t, endUtc: t,
out var sliceStart, out var sliceCount, out var nextStartUtc, out var nextSkip);
sliceStart.ShouldBe(4); // clamped to clusterCount
sliceCount.ShouldBe(0);
nextStartUtc.ShouldBeNull();
nextSkip.ShouldBe(0);
}
// --- InMemoryHistoryContinuationStore (mirrors the production session store contract) --------
[Fact]
@@ -146,38 +146,83 @@ public sealed class NodeManagerHistoryReadPagingTests : IDisposable
await host.DisposeAsync();
}
/// <summary>Degenerate tie cluster larger than the page cap: a single timestamp carrying MORE ties
/// than <c>NumValuesPerNode</c> cannot be paged past by a (timestamp, skip) cursor — the fixed-(start,
/// end,cap) backend keeps returning the same first <c>cap</c> ties. Rather than silently truncate to
/// GoodNoData (permanently dropping the un-emitted ties), the resume read FAILS LOUDLY for that node
/// with <c>BadHistoryOperationUnsupported</c>. (Regression for the data-loss path the carry-offset
/// cursor cannot resolve; the operator's remedy is a larger NumValuesPerNode.)</summary>
/// <summary>Oversized tie cluster (more ties at one timestamp than the page cap): the (timestamp, skip)
/// resume cursor alone cannot advance past it — the fixed-(start,end,cap) backend keeps returning the
/// same first <c>cap</c> ties. The paging now over-fetches the WHOLE cluster (a <c>start == end</c> read,
/// bounded by <c>MaxTieClusterOverfetch</c>) and pages WITHIN the timestamp, so the read drains the
/// cluster (and any data after it) across continuation points with no dup/skip. Here a tie cluster of 3
/// (indices 3..7 share one timestamp) sits between distinct samples; with cap 2 the union must be all 10
/// values in order.</summary>
[Fact]
public async Task Raw_tie_cluster_larger_than_page_fails_loudly_not_silently()
public async Task Raw_oversized_tie_cluster_pages_within_the_timestamp()
{
var (host, server) = await BootAsync();
var nm = server.NodeManager!;
nm.HistoryContinuationStore = new InMemoryHistoryContinuationStore();
// 6 samples ALL sharing one timestamp (Epoch+2s) — a tie cluster of 6 with a page cap of 4.
var t = Epoch.AddSeconds(2);
var series = Enumerable.Range(0, 6)
.Select(i => new DataValueSnapshot((double)i, StatusCodes.Good, t, t)).ToArray();
// 3 distinct, then a tie cluster of 5 (all at Epoch+3s), then 2 more distinct — 10 total. The tie
// cluster (5) is larger than the page cap (2), the case that used to stall the cursor.
var series = MakeSeriesWithTieCluster(distinctBefore: 3, tieCount: 5, distinctAfter: 2);
series.Length.ShouldBe(10);
nm.HistorianDataSource = new SeriesHistorianDataSource(series);
nm.EnsureVariable("eq-1/burst", parentFolderNodeId: null, displayName: "Burst", dataType: "Double",
writable: false, historianTagname: "WW.Burst");
var nodeId = nm.TryGetVariable("eq-1/burst")!.NodeId;
// Page 1: a full page of the first 4 ties, with a continuation point.
var (r1, e1, cp1) = ReadRaw(nm, nodeId, Epoch, Epoch.AddHours(1), max: 4, inboundCp: null);
var collected = new List<double>();
byte[]? cp = null;
var pageCount = 0;
do
{
var (values, error, nextCp) = ReadRaw(nm, nodeId, Epoch, Epoch.AddHours(1),
max: 2, inboundCp: cp);
error.StatusCode.Code.ShouldBe(StatusCodes.Good);
collected.AddRange(values);
cp = nextCp;
pageCount++;
pageCount.ShouldBeLessThan(20, "paging must terminate, not loop, even through a tie cluster");
}
while (cp is not null);
// Every value, once, in order — the cluster was paged through, not dropped or duplicated.
collected.Count.ShouldBe(10);
collected.ShouldBe(Enumerable.Range(0, 10).Select(i => (double)i));
await host.DisposeAsync();
}
/// <summary>The absurd-burst backstop is preserved: a tie cluster STRICTLY larger than the configured
/// <see cref="OtOpcUaNodeManager.MaxTieClusterOverfetch"/> still surfaces
/// <c>BadHistoryOperationUnsupported</c> for that node rather than buffering an unbounded burst. With
/// the bound set to 3 and a 5-way tie cluster, the resume read that hits the cluster fails loudly.</summary>
[Fact]
public async Task Raw_tie_cluster_beyond_overfetch_bound_fails_loudly()
{
var (host, server) = await BootAsync();
var nm = server.NodeManager!;
nm.HistoryContinuationStore = new InMemoryHistoryContinuationStore();
nm.MaxTieClusterOverfetch = 3; // cluster of 5 will exceed this ⇒ backstop.
// 5 samples ALL sharing one timestamp (Epoch+2s) — a tie cluster of 5 with a page cap of 2.
var t = Epoch.AddSeconds(2);
var series = Enumerable.Range(0, 5)
.Select(i => new DataValueSnapshot((double)i, StatusCodes.Good, t, t)).ToArray();
nm.HistorianDataSource = new SeriesHistorianDataSource(series);
nm.EnsureVariable("eq-1/absurd", parentFolderNodeId: null, displayName: "Absurd", dataType: "Double",
writable: false, historianTagname: "WW.Absurd");
var nodeId = nm.TryGetVariable("eq-1/absurd")!.NodeId;
// Page 1: a full page of the first 2 ties, with a continuation point.
var (r1, e1, cp1) = ReadRaw(nm, nodeId, Epoch, Epoch.AddHours(1), max: 2, inboundCp: null);
e1.StatusCode.Code.ShouldBe(StatusCodes.Good);
r1.ShouldBe(new[] { 0.0, 1.0, 2.0, 3.0 });
r1.ShouldBe(new[] { 0.0, 1.0 });
cp1.ShouldNotBeNull();
// Page 2: the cursor cannot advance past the oversized cluster ⇒ a clear error, NOT a silent
// GoodNoData that would drop samples 4 and 5.
var (r2, e2, cp2) = ReadRaw(nm, nodeId, Epoch, Epoch.AddHours(1), max: 4, inboundCp: cp1);
// Page 2: the resume read hits the cluster; over-fetch finds 5 > bound 3 ⇒ a clear error, NOT a
// silent GoodNoData that would drop the un-emitted ties.
var (r2, e2, cp2) = ReadRaw(nm, nodeId, Epoch, Epoch.AddHours(1), max: 2, inboundCp: cp1);
e2.StatusCode.Code.ShouldBe(StatusCodes.BadHistoryOperationUnsupported);
r2.ShouldBeEmpty();
cp2.ShouldBeNull();
@@ -330,6 +375,35 @@ public sealed class NodeManagerHistoryReadPagingTests : IDisposable
})
.ToArray();
/// <summary>Build a sorted series: <paramref name="distinctBefore"/> distinct-second samples, then a
/// run of <paramref name="tieCount"/> samples ALL at the same timestamp (the next second after the
/// last distinct one — the oversized tie cluster), then <paramref name="distinctAfter"/> distinct
/// samples each a second later. Values are 0..N-1 in series order so the union assertion is exact.</summary>
private static DataValueSnapshot[] MakeSeriesWithTieCluster(int distinctBefore, int tieCount, int distinctAfter)
{
var samples = new List<DataValueSnapshot>(distinctBefore + tieCount + distinctAfter);
var value = 0;
var second = 0;
for (var i = 0; i < distinctBefore; i++, second++)
{
var t = Epoch.AddSeconds(second);
samples.Add(new DataValueSnapshot((double)value++, StatusCodes.Good, t, t));
}
var tieT = Epoch.AddSeconds(second++); // the single shared timestamp for the whole cluster
for (var i = 0; i < tieCount; i++)
samples.Add(new DataValueSnapshot((double)value++, StatusCodes.Good, tieT, tieT));
for (var i = 0; i < distinctAfter; i++, second++)
{
var t = Epoch.AddSeconds(second);
samples.Add(new DataValueSnapshot((double)value++, StatusCodes.Good, t, t));
}
return samples.ToArray();
}
/// <summary>A series-backed fake historian: holds a full sorted series and serves a Raw read by
/// returning the samples in [start, end] capped at maxValuesPerNode (start inclusive — exactly the
/// resume-cursor contract). Processed / AtTime / Events are not exercised here.</summary>