feat(historian): page within oversized tie clusters (#400) instead of loud-failing
This commit is contained in:
@@ -179,6 +179,19 @@ public sealed class OtOpcUaNodeManager : CustomNodeManager2
|
||||
set => _historianDataSource = value ?? NullHistorianDataSource.Instance;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// The upper bound on the bounded over-fetch <see cref="ServeRawPaged"/> uses to page WITHIN an
|
||||
/// oversized "tie cluster" — more raw samples sharing one SourceTimestamp than the client's per-page
|
||||
/// cap. When a resume read stalls on such a cluster (the boundary-tie trim empties the page), the
|
||||
/// paging over-fetches up to <c>MaxTieClusterOverfetch + 1</c> ties at that single timestamp (a
|
||||
/// <c>start == end</c> read) and slices through them via <see cref="HistoryPaging.SliceTieCluster"/>.
|
||||
/// A cluster strictly larger than this still surfaces <c>BadHistoryOperationUnsupported</c> for that
|
||||
/// node (the absurd-burst backstop). Mirrors the configured
|
||||
/// <c>ServerHistorianOptions.MaxTieClusterOverfetch</c>; the Host sets it at <c>StartAsync</c>. The
|
||||
/// default (65536) survives when the historian section is absent.
|
||||
/// </summary>
|
||||
public int MaxTieClusterOverfetch { get; set; } = 65536;
|
||||
|
||||
private volatile IHistoryContinuationStore _historyContinuationStore = new SessionHistoryContinuationStore();
|
||||
|
||||
/// <summary>
|
||||
@@ -1871,23 +1884,70 @@ public sealed class OtOpcUaNodeManager : CustomNodeManager2
|
||||
? HistoryPaging.TrimBoundaryDuplicates(sourceResult.Samples, startUtc, boundarySkip)
|
||||
: sourceResult.Samples;
|
||||
|
||||
// Degenerate tie cluster: a resume read returned a FULL backend page that the boundary-tie trim
|
||||
// emptied entirely. That can only happen when more than NumValuesPerNode samples share the resume
|
||||
// boundary timestamp — a tie cluster larger than the page cap. The fixed-(start,end,cap) backend
|
||||
// can only ever return the first `cap` of those ties, so a (timestamp, skip) cursor can never
|
||||
// advance past the cluster. Fail LOUDLY for this node rather than silently truncate to GoodNoData
|
||||
// (which would permanently drop the un-emitted ties). The operator's remedy is a larger
|
||||
// NumValuesPerNode; see docs/Historian.md "Paging limitation".
|
||||
if (inboundCp is { Length: > 0 } && backendFull && samples.Count == 0)
|
||||
// Oversized tie cluster: a resume read whose FULL backend page does not advance past the resume
|
||||
// boundary timestamp `startUtc`. That happens when more samples share `startUtc` than the page
|
||||
// cap — a tie cluster larger than NumValuesPerNode. The fixed-(start,end,cap) backend can only
|
||||
// ever return the first `cap` of those ties, so the (timestamp, skip) cursor alone can never page
|
||||
// past (or even reliably within) the cluster: either the boundary-tie trim empties the page, or
|
||||
// it re-emits the same head ties forever. Detect BOTH stall shapes — an empty trimmed page, or a
|
||||
// non-empty page whose LAST sample is still at `startUtc` (no forward progress) — and, rather
|
||||
// than fail the read, over-fetch the WHOLE cluster with an explicit, BOUNDED cap (a start==end
|
||||
// read at the boundary timestamp) and page WITHIN the timestamp via SliceTieCluster.
|
||||
var stalledOnTieCluster = inboundCp is { Length: > 0 }
|
||||
&& backendFull
|
||||
&& (samples.Count == 0
|
||||
|| (samples[^1].SourceTimestampUtc ?? DateTime.MinValue) == startUtc);
|
||||
if (stalledOnTieCluster)
|
||||
{
|
||||
// The over-fetch cap MUST be explicit and non-zero: a cap of 0 falls back to the backend's
|
||||
// MaxValuesPerRead, which would re-introduce the very stall we're escaping. +1 over the bound
|
||||
// lets us DETECT a cluster strictly larger than the bound (the absurd-burst backstop below).
|
||||
var overfetchCap = (uint)(MaxTieClusterOverfetch + 1);
|
||||
var cluster = HistorianDataSource
|
||||
.ReadRawAsync(tagname, startUtc, startUtc, overfetchCap, CancellationToken.None)
|
||||
.GetAwaiter().GetResult().Samples;
|
||||
|
||||
// Absurd burst: more ties than we're willing to buffer in memory. Preserve today's loud-fail
|
||||
// for that node rather than over-fetch an unbounded cluster; the operator's remedy is a
|
||||
// larger ServerHistorian:MaxTieClusterOverfetch (or NumValuesPerNode).
|
||||
if (cluster.Count > MaxTieClusterOverfetch)
|
||||
{
|
||||
#pragma warning disable CS0618 // Type or member is obsolete
|
||||
Utils.LogError(
|
||||
"OtOpcUaNodeManager: HistoryReadRaw paging stalled — tie cluster at {0:O} for tag '{1}' " +
|
||||
"exceeds NumValuesPerNode={2}; cannot page past it. Increase NumValuesPerNode.",
|
||||
startUtc, tagname, numValuesPerNode);
|
||||
Utils.LogError(
|
||||
"OtOpcUaNodeManager: HistoryReadRaw tie cluster at {0:O} for tag '{1}' has {2} samples, " +
|
||||
"exceeding MaxTieClusterOverfetch={3}; cannot page within it. Increase MaxTieClusterOverfetch.",
|
||||
startUtc, tagname, cluster.Count, MaxTieClusterOverfetch);
|
||||
#pragma warning restore CS0618
|
||||
errors[handle.Index] = StatusCodes.BadHistoryOperationUnsupported;
|
||||
results[handle.Index] = new SdkHistoryReadResult { StatusCode = StatusCodes.BadHistoryOperationUnsupported };
|
||||
errors[handle.Index] = StatusCodes.BadHistoryOperationUnsupported;
|
||||
results[handle.Index] = new SdkHistoryReadResult { StatusCode = StatusCodes.BadHistoryOperationUnsupported };
|
||||
return;
|
||||
}
|
||||
|
||||
HistoryPaging.SliceTieCluster(
|
||||
cluster.Count, boundarySkip, numValuesPerNode, startUtc, endUtc,
|
||||
out var sliceStart, out var sliceCount, out var nextStartUtc, out var nextSkip);
|
||||
|
||||
var slice = new List<DataValueSnapshot>(sliceCount);
|
||||
for (var i = sliceStart; i < sliceStart + sliceCount; i++) slice.Add(cluster[i]);
|
||||
|
||||
// Emit a continuation point only when SliceTieCluster says the read continues (within the
|
||||
// cluster, or past it while the window remains). nextSkip is the boundary skip for the next
|
||||
// page — within the cluster it counts the ties already emitted at startUtc; past it it's 0.
|
||||
byte[]? clusterCp = null;
|
||||
if (nextStartUtc is { } resumeAt)
|
||||
{
|
||||
var clusterState = new HistoryContinuationState(
|
||||
tagname, resumeAt, endUtc, nextSkip, numValuesPerNode);
|
||||
clusterCp = _historyContinuationStore.Save(session, clusterState);
|
||||
}
|
||||
|
||||
results[handle.Index] = new SdkHistoryReadResult
|
||||
{
|
||||
StatusCode = slice.Count == 0 ? StatusCodes.GoodNoData : StatusCodes.Good,
|
||||
HistoryData = new ExtensionObject(ToHistoryDataFromSamples(slice)),
|
||||
ContinuationPoint = clusterCp,
|
||||
};
|
||||
errors[handle.Index] = ServiceResult.Good;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user