Files
histsdk/src/AVEVA.Historian.Client/Grpc/HistorianGrpcEventOrchestrator.cs
T
Joseph Doherty f1fd3691ba feat(grpc): route ReadEvents over gRPC + extract shared CM_EVENT registration
Adds HistorianGrpcEventOrchestrator: opens a read-only gRPC session, replays the
CM_EVENT registration (UpdateClientStatus -> 6 GetSystemParameter -> RegisterTags
-> cross-service version probes -> EnsureTags), then StartEventQuery -> loop
GetNextEventQueryResultBuffer -> EndEventQuery, reusing the WCF query builder and
row parser verbatim. Routed in Historian2020ProtocolDialect on UseGrpc.

The captured registration buffers (CmEventTagId, UpdC3 blob, RTag2 buffer, GETHI
builder, pre-register param list, native-error decode) are extracted into a shared
HistorianEventRegistrationProtocol so the WCF and gRPC paths can't drift; the WCF
orchestrator is refactored onto it with no behavior change.

Live finding (2026-06-22): the chain runs and StartEventQuery succeeds, but the
gRPC server long-polls GetNextEventQueryResultBuffer on no data (it blocks to the
deadline instead of returning the synchronous 5-byte code-85 terminal the WCF op
returns). Per-call gRPC-Web deadlines proved unreliable over a tunnel, so the read
is hard-bounded by an overall linked-CTS budget (<=30s; gRPC honors token
cancellation). On the no-row path it throws ProtocolEvidenceMissingException rather
than assert a false-empty list. Row-level retrieval awaits an event-bearing 2023 R2
server (the dev box holds no events).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_01B6mcaT2PjRFKcogzp9UkfC
2026-06-22 04:58:25 -04:00

388 lines
21 KiB
C#

using System.Runtime.CompilerServices;
using Google.Protobuf;
using Grpc.Core;
using AVEVA.Historian.Client.Models;
using AVEVA.Historian.Client.Wcf;
using GrpcHistory = ArchestrA.Grpc.Contract.History;
using GrpcRetrieval = ArchestrA.Grpc.Contract.Retrieval;
using GrpcStatus = ArchestrA.Grpc.Contract.Status;
using GrpcTransaction = ArchestrA.Grpc.Contract.Transaction;
namespace AVEVA.Historian.Client.Grpc;
/// <summary>
/// 2023 R2 gRPC event-read orchestrator. Mirrors <see cref="HistorianWcfEventOrchestrator"/> over the
/// gRPC transport: the same CM_EVENT registration sequence and the same event request/row buffers
/// travel inside protobuf <c>bytes</c> fields, reusing the proven WCF serializers/parsers verbatim.
///
/// Operation mapping (2020 WCF → 2023 R2 gRPC):
/// Hist.UpdateClientStatus3 → HistoryService.UpdateClientStatus
/// Hist.RegisterTags2 → HistoryService.RegisterTags
/// Hist.EnsureTags2 → HistoryService.EnsureTags
/// Stat.GetHistorianInfo → StatusService.GetHistorianInfo
/// Stat.GetSystemParameter → StatusService.GetSystemParameter
/// Retr.StartEventQuery → RetrievalService.StartEventQuery
/// Retr.GetNextEventQueryResultBuffer (loop) → RetrievalService.GetNextEventQueryResultBuffer
/// Retr.EndEventQuery → RetrievalService.EndEventQuery
///
/// <para>
/// The CM_EVENT registration replay (<see cref="RegisterCmEventTag"/>) is the hard part: without it
/// the server returns native error type=4 code=85 from GetNextEventQueryResultBuffer. The captured
/// registration buffers are shared with the WCF path via
/// <see cref="HistorianEventRegistrationProtocol"/> so the two transports cannot drift. The gRPC
/// RetrievalService event ops do NOT need the WCF <c>Retr.GetV</c>/<c>IsOriginalAllowed</c> prime
/// (the read path proved the front-door session is sufficient over gRPC).
/// </para>
/// <para>
/// <b>Live status (2026-06-22):</b> the chain runs end-to-end and <c>StartEventQuery</c> succeeds, but
/// <c>GetNextEventQueryResultBuffer</c> <b>long-polls</b> when the query has no rows — it blocks to the
/// call deadline instead of returning the synchronous 5-byte code-85 terminal the 2020 WCF op returns.
/// A poll-deadline expiry is therefore treated as the no-data terminal (see the loop). The idle dev box
/// holds no events, so <b>row-level retrieval is not yet live-verified</b>; verifying parsed rows over
/// gRPC awaits an event-bearing 2023 R2 server. This is tooled + completes cleanly, NOT proven to
/// return rows.
/// </para>
/// </summary>
internal sealed class HistorianGrpcEventOrchestrator
{
private readonly HistorianClientOptions _options;
public HistorianGrpcEventOrchestrator(HistorianClientOptions options)
{
_options = options ?? throw new ArgumentNullException(nameof(options));
}
/// <summary>Diagnostic: length of the most recent event-row result buffer the server sent.</summary>
public int LastResultBufferLength { get; private set; }
/// <summary>Diagnostic: type+code description of the most recent error/terminal buffer.</summary>
public string LastErrorBufferDescription { get; private set; } = string.Empty;
public async IAsyncEnumerable<HistorianEvent> ReadEventsAsync(
DateTime startUtc,
DateTime endUtc,
HistorianEventFilter? filter,
[EnumeratorCancellation] CancellationToken cancellationToken)
{
if (!_options.IntegratedSecurity && string.IsNullOrEmpty(_options.UserName))
{
throw new ProtocolEvidenceMissingException(
"Managed gRPC event flow currently requires IntegratedSecurity or an explicit UserName + Password.");
}
cancellationToken.ThrowIfCancellationRequested();
// Hard overall cap. The per-call gRPC-Web deadlines are NOT honored reliably over a tunnelled
// link (observed live 2026-06-22: a chain with 4s per-call deadlines still ran >90s because the
// server stalls several registration RPCs and long-polls GetNext). gRPC DOES honor token
// cancellation, so a linked CTS firing at OverallBudget bounds the whole read deterministically.
// A budget timeout on the unverified no-row path is surfaced as ProtocolEvidenceMissing, not as
// a raw cancellation, so callers get the same honest "not row-verified over gRPC" signal.
using CancellationTokenSource linked = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
linked.CancelAfter(OverallBudget);
IReadOnlyList<HistorianEvent> events;
try
{
events = await Task.Run(
() => RunEventChain(startUtc, endUtc, filter, linked.Token),
linked.Token).ConfigureAwait(false);
}
catch (Exception ex) when (IsBudgetCancellation(ex, linked, cancellationToken))
{
throw new ProtocolEvidenceMissingException(
$"ReadEvents over gRPC did not return rows within {OverallBudget.TotalSeconds:0}s: StartEventQuery " +
"succeeds but the CM_EVENT registration replay stalls and GetNextEventQueryResultBuffer long-polls " +
"(no synchronous code-85 terminal over gRPC). Row-level retrieval is not yet verified over gRPC " +
"(the dev box holds no events) — use the WCF transport for event reads.");
}
foreach (HistorianEvent evt in events)
{
cancellationToken.ThrowIfCancellationRequested();
yield return evt;
}
}
/// <summary>
/// Hard wall-clock budget for the entire gRPC event read. Bounds the chain deterministically since
/// per-call gRPC-Web deadlines are unreliable over a tunnel. Scaled off the request timeout but
/// capped so a long default timeout cannot make the (currently row-unverified) read stall for minutes.
/// </summary>
private TimeSpan OverallBudget
{
get
{
TimeSpan cap = TimeSpan.FromSeconds(30);
return _options.RequestTimeout < cap ? _options.RequestTimeout : cap;
}
}
/// <summary>
/// True when an exception was caused by the overall-budget linked CTS firing (not by the caller's
/// own cancellation). The budget surfaces either as an <see cref="OperationCanceledException"/>
/// (Task.Run / token checks) or a gRPC <see cref="RpcException"/> with
/// <see cref="StatusCode.Cancelled"/> from an in-flight RPC.
/// </summary>
private static bool IsBudgetCancellation(Exception ex, CancellationTokenSource linked, CancellationToken caller)
{
if (caller.IsCancellationRequested || !linked.IsCancellationRequested)
{
return false;
}
return ex is OperationCanceledException
|| (ex is RpcException rpc && rpc.StatusCode is StatusCode.Cancelled or StatusCode.DeadlineExceeded);
}
private List<HistorianEvent> RunEventChain(DateTime startUtc, DateTime endUtc, HistorianEventFilter? filter, CancellationToken cancellationToken)
{
using HistorianGrpcConnection connection = HistorianGrpcChannelFactory.Create(_options);
HistorianGrpcHandshake.Session session = HistorianGrpcHandshake.OpenSession(connection, _options, cancellationToken);
RegisterCmEventTag(connection, session, cancellationToken);
List<HistorianEvent> events = RunEventQuery(connection, session, startUtc, endUtc, filter, cancellationToken);
// Honest no-data handling: when the query returns real rows, hand them back. When it instead
// reaches the no-data terminal with ZERO rows (the gRPC server long-polls GetNext rather than
// returning the WCF code-85 terminal), we cannot distinguish "genuinely no events in range"
// from "the CM_EVENT registration replay didn't fully land over gRPC" — so we refuse to return
// a possibly-false empty list and surface the unverified state instead. An event-bearing 2023 R2
// server will return rows here and exercise the parse path; flip this once that is captured.
if (events.Count == 0)
{
throw new ProtocolEvidenceMissingException(
"ReadEvents over gRPC: the chain completes and StartEventQuery succeeds, but " +
"GetNextEventQueryResultBuffer returns no rows (it long-polls to the no-data terminal " +
$"after the CM_EVENT registration replay; last={LastErrorBufferDescription}). Row-level " +
"retrieval is not yet verified over gRPC (the dev box holds no events) — use the WCF " +
"transport for event reads until a capture against an event-bearing 2023 R2 server confirms it.");
}
return events;
}
private DateTime Deadline() => DateTime.UtcNow.Add(_options.RequestTimeout);
/// <summary>
/// Deadline for the GetNextEventQueryResultBuffer long-poll. Bounded to at most 10s (or the
/// configured <see cref="HistorianClientOptions.RequestTimeout"/> if shorter) so the no-data
/// terminal — a deadline expiry over gRPC — is reached promptly instead of stalling the read for
/// the full request timeout. When rows are available the server returns them well before this.
/// </summary>
private DateTime EventPollDeadline()
{
TimeSpan cap = TimeSpan.FromSeconds(10);
TimeSpan poll = _options.RequestTimeout < cap ? _options.RequestTimeout : cap;
return DateTime.UtcNow.Add(poll);
}
/// <summary>
/// Deadline for the best-effort registration RPCs. Bounded to at most 5s: several of these
/// (RegisterTags / EnsureTags / GetHistorianInfo) <b>stall server-side</b> on the remote 2023 R2
/// box (observed live 2026-06-22) and only return at their deadline, so an unbounded
/// <see cref="RequestTimeout"/> would make the registration phase dominate the read. They are
/// swallowed via <see cref="TryRun"/> regardless of outcome.
/// </summary>
private DateTime RegistrationDeadline()
{
TimeSpan cap = TimeSpan.FromSeconds(5);
TimeSpan d = _options.RequestTimeout < cap ? _options.RequestTimeout : cap;
return DateTime.UtcNow.Add(d);
}
/// <summary>
/// Replays the native event-tag registration sequence (UpdC3 → 6 system params → RTag2 → 1 more
/// system param → cross-service GetV probes → EnsT2) over the gRPC services. Best-effort: each
/// call is wrapped so an individual rejection on this server does not abort the chain — the goal
/// is to drive the server-side session into the state StartEventQuery / GetNextEventQueryResultBuffer
/// expect. Buffers come from <see cref="HistorianEventRegistrationProtocol"/>.
/// </summary>
private void RegisterCmEventTag(HistorianGrpcConnection connection, HistorianGrpcHandshake.Session session, CancellationToken cancellationToken)
{
var historyClient = new GrpcHistory.HistoryService.HistoryServiceClient(connection.Channel);
var statusClient = new GrpcStatus.StatusService.StatusServiceClient(connection.Channel);
var retrievalClient = new GrpcRetrieval.RetrievalService.RetrievalServiceClient(connection.Channel);
var transactionClient = new GrpcTransaction.TransactionService.TransactionServiceClient(connection.Channel);
// Discovery dance the native event flow runs between Open2 and EnsT2. All bounded by the
// short RegistrationDeadline (several stall server-side on the remote box).
TryRun(() => statusClient.GetStatusInterfaceVersion(new GrpcStatus.GetStatusInterfaceVersionRequest(), connection.Metadata, RegistrationDeadline(), cancellationToken));
TryRun(() => statusClient.GetStatusInterfaceVersion(new GrpcStatus.GetStatusInterfaceVersionRequest(), connection.Metadata, RegistrationDeadline(), cancellationToken));
byte[] historianVersionRequest = HistorianEventRegistrationProtocol.BuildGetHistorianInfoRequest("HistorianVersion");
TryRun(() => statusClient.GetHistorianInfo(
new GrpcStatus.GetHistorianInfoRequest { StrHandle = session.StringHandle, BtRequest = ByteString.CopyFrom(historianVersionRequest) },
connection.Metadata, RegistrationDeadline(), cancellationToken));
TryRun(() => statusClient.GetHistorianInfo(
new GrpcStatus.GetHistorianInfoRequest { StrHandle = session.StringHandle, BtRequest = ByteString.CopyFrom(historianVersionRequest) },
connection.Metadata, RegistrationDeadline(), cancellationToken));
byte[] clientStatus = HistorianEventRegistrationProtocol.BuildUpdateClientStatusBlob();
TryRun(() => historyClient.UpdateClientStatus(
new GrpcHistory.UpdateClientStatusRequest { StrHandle = session.StringHandle, BtClientStatus = ByteString.CopyFrom(clientStatus) },
connection.Metadata, RegistrationDeadline(), cancellationToken));
// Records 11-16: 6 system-parameter queries before RTag2.
foreach (string parameterName in HistorianEventRegistrationProtocol.StatusParametersBeforeRegister)
{
TryRun(() => statusClient.GetSystemParameter(
new GrpcStatus.GetSystemParameterRequest { UiHandle = session.ClientHandle, StrParameterName = parameterName },
connection.Metadata, RegistrationDeadline(), cancellationToken));
}
byte[] registerBuffer = HistorianEventRegistrationProtocol.BuildRegisterCmEventInputBuffer();
TryRun(() => historyClient.RegisterTags(
new GrpcHistory.RegisterTagsRequest { StrHandle = session.StringHandle, BtTagInfos = ByteString.CopyFrom(registerBuffer) },
connection.Metadata, RegistrationDeadline(), cancellationToken));
// Record 18: one more system-parameter query after RTag2 before EnsT2.
TryRun(() => statusClient.GetSystemParameter(
new GrpcStatus.GetSystemParameterRequest { UiHandle = session.ClientHandle, StrParameterName = "AllowRenameTags" },
connection.Metadata, RegistrationDeadline(), cancellationToken));
// Records 19-21: cross-service version probes between RTag2 and EnsT2 (session-table registration).
TryRun(() => transactionClient.GetTransactionInterfaceVersion(new GrpcTransaction.GetTransactionInterfaceVersionRequest(), connection.Metadata, RegistrationDeadline(), cancellationToken));
TryRun(() => statusClient.GetStatusInterfaceVersion(new GrpcStatus.GetStatusInterfaceVersionRequest(), connection.Metadata, RegistrationDeadline(), cancellationToken));
TryRun(() => retrievalClient.GetRetrievalInterfaceVersion(new GrpcRetrieval.GetRetrievalInterfaceVersionRequest(), connection.Metadata, RegistrationDeadline(), cancellationToken));
byte[] payload = HistorianAddTagsProtocol.SerializeCmEventCTagMetadata(DateTime.UtcNow);
TryRun(() => historyClient.EnsureTags(
new GrpcHistory.EnsureTagsRequest { StrHandle = session.StringHandle, BtTagInfos = ByteString.CopyFrom(payload), ElementCount = 1 },
connection.Metadata, RegistrationDeadline(), cancellationToken));
}
private List<HistorianEvent> RunEventQuery(
HistorianGrpcConnection connection,
HistorianGrpcHandshake.Session session,
DateTime startUtc,
DateTime endUtc,
HistorianEventFilter? filter,
CancellationToken cancellationToken)
{
var retrievalClient = new GrpcRetrieval.RetrievalService.RetrievalServiceClient(connection.Channel);
GrpcRetrieval.GetRetrievalInterfaceVersionResponse retrievalVersion = retrievalClient.GetRetrievalInterfaceVersion(
new GrpcRetrieval.GetRetrievalInterfaceVersionRequest(), connection.Metadata, Deadline(), cancellationToken);
HistorianServerVersionGate.Validate(HistorianServiceInterface.Retrieval, retrievalVersion.UiVersion, _options);
IReadOnlyList<HistorianEventQueryAttempt> attempts = HistorianEventQueryProtocol.CreateStartEventQueryAttempts(
startUtc.ToUniversalTime(),
endUtc.ToUniversalTime(),
eventCount: 5,
filter);
byte[] requestBuffer = attempts[0].RequestBuffer;
GrpcRetrieval.StartEventQueryResponse startResponse = retrievalClient.StartEventQuery(
new GrpcRetrieval.StartEventQueryRequest
{
UiHandle = session.ClientHandle,
UiQueryRequestType = HistorianEventQueryProtocol.QueryRequestTypeEvent,
BtRequest = ByteString.CopyFrom(requestBuffer)
},
connection.Metadata,
Deadline(),
cancellationToken);
byte[] startError = startResponse.Status?.BtError?.ToByteArray() ?? [];
if (!(startResponse.Status?.BSuccess ?? false))
{
throw new InvalidOperationException(
$"gRPC StartEventQuery failed (errorLen={startError.Length}, error5={HistorianEventRegistrationProtocol.DescribeNativeError(startError)}).");
}
uint queryHandle = startResponse.UiQueryHandle;
try
{
List<HistorianEvent> events = [];
while (true)
{
cancellationToken.ThrowIfCancellationRequested();
GrpcRetrieval.GetNextEventQueryResultBufferResponse nextResponse;
try
{
nextResponse = retrievalClient.GetNextEventQueryResultBuffer(
new GrpcRetrieval.GetNextEventQueryResultBufferRequest { UiHandle = session.ClientHandle, UiQueryHandle = queryHandle },
connection.Metadata,
EventPollDeadline(),
cancellationToken);
}
catch (RpcException ex) when (ex.StatusCode == StatusCode.DeadlineExceeded)
{
// No-data terminal. Over gRPC the 2023 R2 server LONG-POLLS GetNextEventQueryResultBuffer
// when the query has no (more) rows to hand back, rather than returning the 5-byte
// type=4 code=85 terminal the 2020 WCF op returns synchronously. A poll-deadline
// expiry is therefore the gRPC equivalent of that soft terminal: stop reading and
// return whatever rows were already collected. (Confirmed live 2026-06-22: the chain
// runs and StartEventQuery succeeds, but GetNext blocks to the deadline on the idle
// dev box, which holds no events.) See class remarks.
LastErrorBufferDescription = "GetNext long-poll deadline (no-data terminal)";
return events;
}
byte[] resultBuffer = nextResponse.BtResult?.ToByteArray() ?? [];
byte[] errorBuffer = nextResponse.Status?.BtError?.ToByteArray() ?? [];
bool nextSuccess = nextResponse.Status?.BSuccess ?? false;
LastResultBufferLength = resultBuffer.Length;
LastErrorBufferDescription = HistorianEventRegistrationProtocol.DescribeNativeError(errorBuffer);
// Any 5-byte type=4 error is a soft terminal (code 30 NoMoreData is canonical; code
// 85 / 0x55 is the missing-registration signal seen on early runs). Mirror the WCF
// orchestrator: stop reading and surface the diagnostic rather than throw.
if (errorBuffer.Length == 5 && errorBuffer[0] == 4)
{
return events;
}
if (!nextSuccess)
{
throw new InvalidOperationException(
$"gRPC GetNextEventQueryResultBuffer failed (errorLen={errorBuffer.Length}, error5={HistorianEventRegistrationProtocol.DescribeNativeError(errorBuffer)}).");
}
if (resultBuffer.Length > 0)
{
events.AddRange(HistorianEventRowProtocol.Parse(resultBuffer));
}
if (resultBuffer.Length == 0 && errorBuffer.Length == 0)
{
return events;
}
}
}
finally
{
EndEventQuerySafely(retrievalClient, connection, session.ClientHandle, queryHandle);
}
}
private void EndEventQuerySafely(
GrpcRetrieval.RetrievalService.RetrievalServiceClient client,
HistorianGrpcConnection connection,
uint clientHandle,
uint queryHandle)
{
try
{
client.EndEventQuery(
new GrpcRetrieval.EndEventQueryRequest { UiHandle = clientHandle, UiQueryHandle = queryHandle },
connection.Metadata,
Deadline(),
CancellationToken.None);
}
catch
{
// Best-effort cleanup; the read result is already collected.
}
}
private static void TryRun(Action action)
{
try { action(); }
catch { }
}
}