fix(historian): address code review on Raw HistoryRead paging
C1 (critical): a boundary tie cluster larger than NumValuesPerNode could silently truncate a resumed read to GoodNoData, permanently dropping the un-emitted ties — the (timestamp, skip) cursor cannot advance past a single timestamp the fixed-(start,end,cap) backend keeps re-returning. Now detected and failed LOUDLY per node with BadHistoryOperationUnsupported + a log naming the tag/timestamp/cap; documented in Historian.md with the larger-cap remedy. Regression test Raw_tie_cluster_larger_than_page_fails_loudly_not_silently. I3: build HistoryData before Save() so a projection failure can never orphan a stored continuation cursor. N1 (YAGNI): drop the never-produced HistoryReadKind enum + Processed-only Aggregate/IntervalTicks fields from HistoryContinuationState — only Raw pages. N3: ComputeResumeCursor guards its documented non-empty precondition. I1: document InMemoryHistoryContinuationStore's eventual-consistency (test double). Build clean, 182/182 OpcUaServer tests pass.
This commit is contained in:
@@ -105,7 +105,11 @@ internal sealed class InMemoryHistoryContinuationStore(int capacity = 100) : IHi
|
||||
{
|
||||
private readonly object _gate = new();
|
||||
private readonly Dictionary<Guid, HistoryContinuationState> _states = new();
|
||||
// Insertion order, so we can evict the OLDEST when over capacity (matches the SDK store).
|
||||
// Insertion order, so we can evict the OLDEST when over capacity (matches the SDK store). Eventually
|
||||
// consistent: a Guid taken/released stays in _order until the eviction loop reaches it and finds it
|
||||
// already gone from _states (a harmless no-op dequeue). _states is the source of truth for liveness;
|
||||
// _order only ever over-approximates it, so eviction never drops a LIVE entry early. Fine for a
|
||||
// bounded test double — production uses the SDK's own per-session store, not this class.
|
||||
private readonly Queue<Guid> _order = new();
|
||||
private readonly int _capacity = capacity < 1 ? 1 : capacity;
|
||||
|
||||
|
||||
@@ -3,25 +3,12 @@ using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
namespace ZB.MOM.WW.OtOpcUa.OpcUaServer;
|
||||
|
||||
/// <summary>
|
||||
/// The kind of variable-history read a continuation point resumes. Only the two count-capped,
|
||||
/// time-range arms page server-side (see <see cref="HistoryPaging"/>); AtTime is single-shot
|
||||
/// (no client count cap, so there is never a "full page" signal to page on) and never produces a
|
||||
/// continuation point, so it has no entry here.
|
||||
/// </summary>
|
||||
internal enum HistoryReadKind
|
||||
{
|
||||
/// <summary>HistoryRead-Raw — resumes via <see cref="IHistorianDataSource.ReadRawAsync"/>.</summary>
|
||||
Raw,
|
||||
|
||||
/// <summary>HistoryRead-Processed — resumes via <see cref="IHistorianDataSource.ReadProcessedAsync"/>.</summary>
|
||||
Processed,
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// The server-side resume state stored behind an opaque continuation point for a single
|
||||
/// paged variable-history read. Captures exactly enough to continue the SAME logical read from
|
||||
/// where the previous page stopped: the read kind + tagname, the original (inclusive) end of the
|
||||
/// window, the next start of the window, and — for Processed — the aggregate + interval.
|
||||
/// The server-side resume state stored behind an opaque continuation point for a single paged
|
||||
/// HistoryRead-Raw read. Captures exactly enough to continue the SAME logical read from where the
|
||||
/// previous page stopped: the tagname, the original (inclusive) end of the window, the next start of
|
||||
/// the window, and the tie-safe boundary skip. (Only Raw pages server-side — Processed and AtTime
|
||||
/// carry no client count cap, so they are single-shot and never produce a continuation point; see
|
||||
/// <see cref="HistoryPaging"/>.)
|
||||
/// <para>
|
||||
/// The boundary fields (<see cref="NextStartUtc"/> + <see cref="BoundarySkipCount"/>) encode a
|
||||
/// tie-safe resume cursor: the next page reads from <see cref="NextStartUtc"/> INCLUSIVE and
|
||||
@@ -34,7 +21,6 @@ internal enum HistoryReadKind
|
||||
/// cheap value that unit tests can drive directly.
|
||||
/// </para>
|
||||
/// </summary>
|
||||
/// <param name="Kind">Which variable-history arm this state resumes.</param>
|
||||
/// <param name="Tagname">The resolved historian tagname (NOT the NodeId) to read from.</param>
|
||||
/// <param name="NextStartUtc">
|
||||
/// Inclusive lower bound for the next page — the boundary timestamp the previous page stopped on.
|
||||
@@ -45,17 +31,12 @@ internal enum HistoryReadKind
|
||||
/// prior pages and must be dropped from the head of the next page (tie de-dup).
|
||||
/// </param>
|
||||
/// <param name="NumValuesPerNode">The client's per-page cap; re-applied to every resumed page.</param>
|
||||
/// <param name="Aggregate">The aggregate for a Processed read; ignored for Raw.</param>
|
||||
/// <param name="IntervalTicks">The Processed bucketing interval in ticks; ignored for Raw.</param>
|
||||
internal sealed record HistoryContinuationState(
|
||||
HistoryReadKind Kind,
|
||||
string Tagname,
|
||||
DateTime NextStartUtc,
|
||||
DateTime EndUtc,
|
||||
int BoundarySkipCount,
|
||||
uint NumValuesPerNode,
|
||||
HistoryAggregateType Aggregate,
|
||||
long IntervalTicks);
|
||||
uint NumValuesPerNode);
|
||||
|
||||
/// <summary>
|
||||
/// Pure server-side continuation-point paging decisions for the count-capped variable-history arms
|
||||
@@ -111,6 +92,11 @@ internal static class HistoryPaging
|
||||
out DateTime nextStartUtc,
|
||||
out int boundarySkipCount)
|
||||
{
|
||||
// Enforce the documented precondition at the API boundary rather than relying on the caller's guard
|
||||
// (the only caller only pages a full, non-empty page, but this is a public static helper).
|
||||
if (page.Count == 0)
|
||||
throw new ArgumentOutOfRangeException(nameof(page), "ComputeResumeCursor requires a non-empty page.");
|
||||
|
||||
// The boundary is the last returned sample's SourceTimestamp. A sample whose SourceTimestamp is
|
||||
// null (Bad/unset) cannot anchor a time cursor; fall back to MinValue so the next read covers the
|
||||
// whole remaining window rather than silently dropping data — duplicates are then de-duped by the
|
||||
|
||||
@@ -1751,28 +1751,53 @@ public sealed class OtOpcUaNodeManager : CustomNodeManager2
|
||||
.ReadRawAsync(tagname, startUtc, endUtc, numValuesPerNode, CancellationToken.None)
|
||||
.GetAwaiter().GetResult();
|
||||
|
||||
var backendFull = HistoryPaging.IsFullPage(sourceResult.Samples.Count, numValuesPerNode);
|
||||
|
||||
// On a resume read, drop the boundary ties already returned on the prior page.
|
||||
var samples = inboundCp is { Length: > 0 }
|
||||
? HistoryPaging.TrimBoundaryDuplicates(sourceResult.Samples, startUtc, boundarySkip)
|
||||
: sourceResult.Samples;
|
||||
|
||||
// Degenerate tie cluster: a resume read returned a FULL backend page that the boundary-tie trim
|
||||
// emptied entirely. That can only happen when more than NumValuesPerNode samples share the resume
|
||||
// boundary timestamp — a tie cluster larger than the page cap. The fixed-(start,end,cap) backend
|
||||
// can only ever return the first `cap` of those ties, so a (timestamp, skip) cursor can never
|
||||
// advance past the cluster. Fail LOUDLY for this node rather than silently truncate to GoodNoData
|
||||
// (which would permanently drop the un-emitted ties). The operator's remedy is a larger
|
||||
// NumValuesPerNode; see docs/Historian.md "Paging limitation".
|
||||
if (inboundCp is { Length: > 0 } && backendFull && samples.Count == 0)
|
||||
{
|
||||
#pragma warning disable CS0618 // Type or member is obsolete
|
||||
Utils.LogError(
|
||||
"OtOpcUaNodeManager: HistoryReadRaw paging stalled — tie cluster at {0:O} for tag '{1}' " +
|
||||
"exceeds NumValuesPerNode={2}; cannot page past it. Increase NumValuesPerNode.",
|
||||
startUtc, tagname, numValuesPerNode);
|
||||
#pragma warning restore CS0618
|
||||
errors[handle.Index] = StatusCodes.BadHistoryOperationUnsupported;
|
||||
results[handle.Index] = new SdkHistoryReadResult { StatusCode = StatusCodes.BadHistoryOperationUnsupported };
|
||||
return;
|
||||
}
|
||||
|
||||
// The "full page" test is against the RAW backend count (before trimming): the backend honoured
|
||||
// the cap, so a full backend page ⇒ there may be more even if we trimmed some boundary ties.
|
||||
var historyData = ToHistoryDataFromSamples(samples);
|
||||
|
||||
byte[]? outboundCp = null;
|
||||
if (HistoryPaging.IsFullPage(sourceResult.Samples.Count, numValuesPerNode) && samples.Count > 0)
|
||||
if (backendFull && samples.Count > 0)
|
||||
{
|
||||
HistoryPaging.ComputeResumeCursor(samples, out var nextStart, out var skip);
|
||||
var nextState = new HistoryContinuationState(
|
||||
HistoryReadKind.Raw, tagname, nextStart, endUtc, skip, numValuesPerNode,
|
||||
Aggregate: default, IntervalTicks: 0);
|
||||
tagname, nextStart, endUtc, skip, numValuesPerNode);
|
||||
// Save may return null (no session on this request) ⇒ degrade to single-shot for this node.
|
||||
// Built AFTER historyData so a failure projecting samples can never orphan a stored cursor.
|
||||
outboundCp = _historyContinuationStore.Save(session, nextState);
|
||||
}
|
||||
|
||||
var historyData = ToHistoryDataFromSamples(samples);
|
||||
results[handle.Index] = new SdkHistoryReadResult
|
||||
{
|
||||
// No samples ⇒ GoodNoData (the node is historized, the window just held no data).
|
||||
// No samples ⇒ GoodNoData (the node is historized, the window just held no data). With the
|
||||
// degenerate-cluster guard above, a resumed empty page now only means the window/cluster is
|
||||
// genuinely drained — never silent data loss.
|
||||
StatusCode = samples.Count == 0 ? StatusCodes.GoodNoData : StatusCodes.Good,
|
||||
HistoryData = new ExtensionObject(historyData),
|
||||
ContinuationPoint = outboundCp,
|
||||
|
||||
Reference in New Issue
Block a user