Resolve Worker-009..015 code-review findings

Worker-009: WorkerFrameWriter serialized twice and WorkerFrameReader
allocated a payload byte[] per frame. The writer now serializes once into a
single prefix+payload buffer; the reader rents the payload buffer from
ArrayPool and honors the logical frame length.

Worker-010: VariantConverter projected a uint+Time value as a full FILETIME,
producing a near-1601 timestamp. The FILETIME projection is now gated on
`value is long`; uint falls through to the integer projection.

Worker-011: replaced the opaque retryAttempts formula in WorkerPipeClient
with MaxRetryAttempts = int.MaxValue, leaving the connect deadline as the
sole bound.

Worker-012: rewrote stale "future PR / polls on a Timer" comments in
AlarmDispatcher, AlarmCommandHandler, MxAccessAlarmEventSink and
MxAccessEventMapper to match the shipped, post-Worker-001 behavior.

Worker-013 (re-triaged): already resolved — StaMessagePumpTests and
MxAccessStaSessionTests cover the pump and poll loop directly.

Worker-014: moved IAlarmCommandHandler into its own file so
AlarmCommandHandler.cs declares one public type.

Worker-015: clarified the MxAccessBaseEventSink.EnqueueEvent overflow-catch
comment explaining the deliberate double RecordFault no-op.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Joseph Doherty
2026-05-18 22:42:17 -04:00
parent fe9044115b
commit 1764eff1cf
13 changed files with 229 additions and 127 deletions
@@ -60,6 +60,26 @@ public sealed class VariantConverterTests
Assert.Equal("VT_I8", converted.VariantType);
}
/// <summary>
/// Worker-010 regression: a 32-bit <see cref="uint"/> with an expected
/// data type of <see cref="MxDataType.Time"/> must not be projected as a
/// Windows FILETIME. A uint can only hold the low 32 bits of a FILETIME,
/// which would silently render as a near-1601 timestamp; the converter
/// must fall through to an integer projection instead.
/// </summary>
[Fact]
public void Convert_WithUInt32AndExpectedTime_DoesNotProjectFileTime()
{
const uint value = 123456789u;
MxValue converted = _converter.Convert(value, MxDataType.Time);
Assert.Equal(MxDataType.Integer, converted.DataType);
Assert.Equal(MxValue.KindOneofCase.Int64Value, converted.KindCase);
Assert.Equal(value, converted.Int64Value);
Assert.Equal("VT_UI4", converted.VariantType);
}
/// <summary>Verifies that null-like values preserve their null semantics and variant type.</summary>
/// <param name="value">Null-like value to convert.</param>
/// <param name="expectedVariantType">Expected variant type string.</param>
@@ -118,6 +118,39 @@ public sealed class WorkerFrameProtocolTests
Assert.Equal(new ulong[] { 1, 2, 3 }, new[] { first.Sequence, second.Sequence, third.Sequence }.OrderBy(sequence => sequence));
}
/// <summary>
/// Worker-009 regression: the reader rents its payload buffer from a
/// shared pool, so a rented buffer can be larger than the current frame
/// and may carry bytes from a previous, larger frame. Reading frames of
/// differing sizes back-to-back through one reader must parse each frame
/// using only its own payload length, never trailing pooled bytes.
/// </summary>
[Fact]
public async Task ReadAsync_WithVaryingFrameSizes_ParsesEachFrameExactly()
{
WorkerFrameProtocolOptions options = CreateOptions();
using MemoryStream stream = new();
WorkerFrameWriter writer = new(stream, options);
// A large-payload frame followed by a small-payload frame: if the
// reader reused a pooled buffer without honouring the second frame's
// length, the small frame would parse with stale trailing bytes.
WorkerEnvelope large = CreateGatewayHelloEnvelope(sequence: 1);
large.GatewayHello.GatewayVersion = new string('x', 4096);
WorkerEnvelope small = CreateGatewayHelloEnvelope(sequence: 2);
await writer.WriteAsync(large);
await writer.WriteAsync(small);
stream.Position = 0;
WorkerFrameReader reader = new(stream, options);
WorkerEnvelope firstParsed = await reader.ReadAsync();
WorkerEnvelope secondParsed = await reader.ReadAsync();
Assert.Equal(large, firstParsed);
Assert.Equal(small, secondParsed);
}
private static WorkerFrameProtocolOptions CreateOptions()
{
return new WorkerFrameProtocolOptions(
@@ -207,7 +207,14 @@ public sealed class VariantConverter
MxDataType expectedDataType)
{
long longValue = System.Convert.ToInt64(value, CultureInfo.InvariantCulture);
if (expectedDataType == MxDataType.Time)
// The MxDataType.Time projection treats the source as a Windows FILETIME
// (a 64-bit 100-ns tick count since 1601). Only a genuine 64-bit source
// (long) can carry a valid full FILETIME; a uint can only hold the low
// 32 bits, which DateTime.FromFileTimeUtc would silently render as a
// near-1601 timestamp. For uint sources fall through to the integer
// projection rather than producing a bogus timestamp.
if (expectedDataType == MxDataType.Time && value is long)
{
return new MxValue
{
+26 -12
View File
@@ -1,4 +1,5 @@
using System;
using System.Buffers;
using System.IO;
using System.Threading;
using System.Threading.Tasks;
@@ -29,7 +30,7 @@ public sealed class WorkerFrameReader
public async Task<WorkerEnvelope> ReadAsync(CancellationToken cancellationToken = default)
{
byte[] lengthPrefix = new byte[sizeof(uint)];
await ReadExactlyOrThrowAsync(lengthPrefix, cancellationToken).ConfigureAwait(false);
await ReadExactlyOrThrowAsync(lengthPrefix, lengthPrefix.Length, cancellationToken).ConfigureAwait(false);
uint payloadLength = ReadUInt32LittleEndian(lengthPrefix);
if (payloadLength == 0)
@@ -46,20 +47,32 @@ public sealed class WorkerFrameReader
$"Worker frame payload length {payloadLength} exceeds the configured maximum of {_options.MaxMessageBytes} bytes.");
}
byte[] payload = new byte[payloadLength];
await ReadExactlyOrThrowAsync(payload, cancellationToken).ConfigureAwait(false);
// Rent the payload buffer from the shared pool rather than allocating
// a fresh byte[] per frame. ParseFrom copies whatever it needs into
// the parsed message, so the rented buffer can be returned as soon as
// parsing completes.
int length = checked((int)payloadLength);
byte[] payload = ArrayPool<byte>.Shared.Rent(length);
WorkerEnvelope envelope;
try
{
envelope = WorkerEnvelope.Parser.ParseFrom(payload);
await ReadExactlyOrThrowAsync(payload, length, cancellationToken).ConfigureAwait(false);
try
{
envelope = WorkerEnvelope.Parser.ParseFrom(payload, 0, length);
}
catch (InvalidProtocolBufferException exception)
{
throw new WorkerFrameProtocolException(
WorkerFrameProtocolErrorCode.InvalidEnvelope,
"Worker frame payload is not a valid WorkerEnvelope protobuf message.",
exception);
}
}
catch (InvalidProtocolBufferException exception)
finally
{
throw new WorkerFrameProtocolException(
WorkerFrameProtocolErrorCode.InvalidEnvelope,
"Worker frame payload is not a valid WorkerEnvelope protobuf message.",
exception);
ArrayPool<byte>.Shared.Return(payload);
}
WorkerEnvelopeValidator.Validate(envelope, _options);
@@ -77,13 +90,14 @@ public sealed class WorkerFrameReader
private async Task ReadExactlyOrThrowAsync(
byte[] buffer,
int count,
CancellationToken cancellationToken)
{
int offset = 0;
while (offset < buffer.Length)
while (offset < count)
{
int bytesRead = await _stream
.ReadAsync(buffer, offset, buffer.Length - offset, cancellationToken)
.ReadAsync(buffer, offset, count - offset, cancellationToken)
.ConfigureAwait(false);
if (bytesRead == 0)
+10 -5
View File
@@ -54,15 +54,20 @@ public sealed class WorkerFrameWriter
$"Worker envelope payload length {payloadLength} exceeds the configured maximum of {_options.MaxMessageBytes} bytes.");
}
byte[] payload = envelope.ToByteArray();
byte[] lengthPrefix = new byte[sizeof(uint)];
WriteUInt32LittleEndian(lengthPrefix, (uint)payloadLength);
// Serialize once into a single buffer that carries the 4-byte
// length prefix followed by the payload, then issue one stream write.
// This avoids a second serialization pass (envelope.ToByteArray()
// would re-run CalculateSize internally), a separate prefix array,
// and a separate prefix write.
int frameLength = sizeof(uint) + payloadLength;
byte[] frame = new byte[frameLength];
WriteUInt32LittleEndian(frame, (uint)payloadLength);
envelope.WriteTo(new Span<byte>(frame, sizeof(uint), payloadLength));
await _writeLock.WaitAsync(cancellationToken).ConfigureAwait(false);
try
{
await _stream.WriteAsync(lengthPrefix, 0, lengthPrefix.Length, cancellationToken).ConfigureAwait(false);
await _stream.WriteAsync(payload, 0, payload.Length, cancellationToken).ConfigureAwait(false);
await _stream.WriteAsync(frame, 0, frameLength, cancellationToken).ConfigureAwait(false);
await _stream.FlushAsync(cancellationToken).ConfigureAwait(false);
}
finally
+8 -5
View File
@@ -166,14 +166,17 @@ public sealed class WorkerPipeClient : IWorkerPipeClient
string pipeName,
CancellationToken cancellationToken)
{
int retryAttempts = Math.Max(
0,
(_connectTimeoutMilliseconds / Math.Min(_connectTimeoutMilliseconds, _connectAttemptTimeoutMilliseconds)) - 1);
// The real bound on connection attempts is the connectDeadline token
// below (CancelAfter(connectTimeout)): Polly stops retrying as soon as
// that token is cancelled. Driving retries purely off the deadline —
// rather than a fragile attempt-count formula that ignored the
// exponential backoff between attempts — keeps the time budget the
// single source of truth. MaxRetryAttempts is set to its maximum so it
// never ends the retry loop before the deadline does.
ResiliencePipeline<NamedPipeClientStream> pipeline = new ResiliencePipelineBuilder<NamedPipeClientStream>()
.AddRetry(new RetryStrategyOptions<NamedPipeClientStream>
{
MaxRetryAttempts = retryAttempts,
MaxRetryAttempts = int.MaxValue,
BackoffType = DelayBackoffType.Exponential,
UseJitter = true,
Delay = TimeSpan.FromMilliseconds(250),
@@ -24,10 +24,12 @@ namespace MxGateway.Worker.MxAccess;
/// </para>
/// <para>
/// Threading: invoked from <see cref="MxAccessCommandExecutor"/>
/// which runs on the STA. The wnwrap consumer's polling timer
/// fires on a thread-pool thread; the only cross-thread surface
/// is the <see cref="AlarmDispatcher"/>'s event handler, which
/// hand-offs into the thread-safe <see cref="MxAccessEventQueue"/>.
/// which runs on the STA. The wnwrap consumer owns no internal
/// timer — the worker's STA drives <see cref="PollOnce"/> via
/// <c>StaRuntime.InvokeAsync</c>, so the consumer's transition
/// events fire on the same STA. The
/// <see cref="AlarmDispatcher"/>'s event handler hands transitions
/// into the thread-safe <see cref="MxAccessEventQueue"/>.
/// </para>
/// </remarks>
public sealed class AlarmCommandHandler : IAlarmCommandHandler
@@ -191,58 +193,3 @@ public sealed class AlarmCommandHandler : IAlarmCommandHandler
Unsubscribe();
}
}
/// <summary>
/// Per-session interface routing the worker's alarm IPC commands —
/// <c>SubscribeAlarmsCommand</c>, <c>AcknowledgeAlarmCommand</c>,
/// <c>QueryActiveAlarmsCommand</c>, <c>UnsubscribeAlarmsCommand</c> —
/// to the underlying <see cref="AlarmDispatcher"/>. Production binding
/// is <see cref="AlarmCommandHandler"/>; tests substitute a fake.
/// </summary>
public interface IAlarmCommandHandler : IDisposable
{
/// <summary>Begin a subscription against the supplied AVEVA alarm-provider expression.</summary>
void Subscribe(string subscription, string sessionId);
/// <summary>Tear down the active subscription. No-op if not subscribed.</summary>
void Unsubscribe();
/// <summary>Acknowledge a single alarm by GUID. Returns AVEVA's native status (0 = success).</summary>
int Acknowledge(
Guid alarmGuid,
string comment,
string operatorUser,
string operatorNode,
string operatorDomain,
string operatorFullName);
/// <summary>
/// Acknowledge a single alarm by (name, provider, group) — used when
/// the caller has the human-readable reference but not the GUID.
/// </summary>
int AcknowledgeByName(
string alarmName,
string providerName,
string groupName,
string comment,
string operatorUser,
string operatorNode,
string operatorDomain,
string operatorFullName);
/// <summary>
/// Snapshot the currently-active alarm set, optionally scoped to a
/// prefix matched against <c>AlarmFullReference</c>.
/// </summary>
IReadOnlyList<ActiveAlarmSnapshot> QueryActive(string? alarmFilterPrefix);
/// <summary>
/// Drives a single poll of the underlying alarm consumer on the
/// caller's thread. This is a no-op when there is no active
/// subscription. In production the caller is the worker's STA
/// (marshalled via <c>StaRuntime.InvokeAsync</c>), which satisfies
/// the <c>ThreadingModel=Apartment</c> requirement of
/// <c>wwAlarmConsumerClass</c>.
/// </summary>
void PollOnce();
}
@@ -14,22 +14,20 @@ namespace MxGateway.Worker.MxAccess;
/// </summary>
/// <remarks>
/// <para>
/// This is the in-process slice of A.3 — it proves the
/// consumer→sink→queue pipeline end-to-end without touching the
/// worker's IPC command framing. The companion follow-up PR adds
/// <c>SubscribeAlarmsCommand</c> / <c>AcknowledgeAlarmCommand</c> /
/// <c>QueryActiveAlarmsCommand</c> proto entries plus the gateway-
/// side <c>WorkerAlarmRpcDispatcher</c> that issues them.
/// The dispatcher carries the consumer→sink→queue pipeline. The
/// worker's IPC layer issues <c>SubscribeAlarmsCommand</c> /
/// <c>AcknowledgeAlarmCommand</c> / <c>QueryActiveAlarmsCommand</c>
/// through <see cref="AlarmCommandHandler"/>, which owns one
/// dispatcher per session.
/// </para>
/// <para>
/// Threading: <see cref="WnWrapAlarmConsumer"/> polls on a
/// <see cref="System.Threading.Timer"/> thread today; production
/// hosting should marshal the consumer onto the worker's STA via
/// <c>StaRuntime.InvokeAsync</c>. The dispatcher itself is purely
/// a pass-through, so it inherits whatever thread the consumer's
/// event handler fires on. Fan-out into <c>EnqueueTransition</c>
/// uses <see cref="MxAccessEventQueue.Enqueue"/> which is
/// thread-safe.
/// Threading: <see cref="WnWrapAlarmConsumer"/> owns no internal
/// timer — the worker's STA drives polling via
/// <c>StaRuntime.InvokeAsync(() =&gt; PollOnce())</c>, so the
/// consumer's <c>AlarmTransitionEmitted</c> event fires on the STA.
/// The dispatcher is purely a pass-through, so it inherits that
/// thread. Fan-out into <c>EnqueueTransition</c> uses the
/// thread-safe <see cref="MxAccessEventQueue.Enqueue"/>.
/// </para>
/// </remarks>
public sealed class AlarmDispatcher : IDisposable
@@ -0,0 +1,60 @@
using System;
using System.Collections.Generic;
using MxGateway.Contracts.Proto;
namespace MxGateway.Worker.MxAccess;
/// <summary>
/// Per-session interface routing the worker's alarm IPC commands —
/// <c>SubscribeAlarmsCommand</c>, <c>AcknowledgeAlarmCommand</c>,
/// <c>QueryActiveAlarmsCommand</c>, <c>UnsubscribeAlarmsCommand</c> —
/// to the underlying <see cref="AlarmDispatcher"/>. Production binding
/// is <see cref="AlarmCommandHandler"/>; tests substitute a fake.
/// </summary>
public interface IAlarmCommandHandler : IDisposable
{
/// <summary>Begin a subscription against the supplied AVEVA alarm-provider expression.</summary>
void Subscribe(string subscription, string sessionId);
/// <summary>Tear down the active subscription. No-op if not subscribed.</summary>
void Unsubscribe();
/// <summary>Acknowledge a single alarm by GUID. Returns AVEVA's native status (0 = success).</summary>
int Acknowledge(
Guid alarmGuid,
string comment,
string operatorUser,
string operatorNode,
string operatorDomain,
string operatorFullName);
/// <summary>
/// Acknowledge a single alarm by (name, provider, group) — used when
/// the caller has the human-readable reference but not the GUID.
/// </summary>
int AcknowledgeByName(
string alarmName,
string providerName,
string groupName,
string comment,
string operatorUser,
string operatorNode,
string operatorDomain,
string operatorFullName);
/// <summary>
/// Snapshot the currently-active alarm set, optionally scoped to a
/// prefix matched against <c>AlarmFullReference</c>.
/// </summary>
IReadOnlyList<ActiveAlarmSnapshot> QueryActive(string? alarmFilterPrefix);
/// <summary>
/// Drives a single poll of the underlying alarm consumer on the
/// caller's thread. This is a no-op when there is no active
/// subscription. In production the caller is the worker's STA
/// (marshalled via <c>StaRuntime.InvokeAsync</c>), which satisfies
/// the <c>ThreadingModel=Apartment</c> requirement of
/// <c>wwAlarmConsumerClass</c>.
/// </summary>
void PollOnce();
}
@@ -11,13 +11,15 @@ namespace MxGateway.Worker.MxAccess;
/// </summary>
/// <remarks>
/// <para>
/// The dispatcher subscribes the consumer's
/// <see cref="AlarmDispatcher"/> owns the wire-up: it constructs the
/// consumer/sink pair, calls <see cref="Attach"/> to propagate the
/// session id, and subscribes the consumer's
/// <see cref="IMxAccessAlarmConsumer.AlarmTransitionEmitted"/> event
/// to <see cref="EnqueueTransition"/> at session attach time. The
/// <see cref="Attach"/> override here is a stub kept for the data-
/// session shape; the actual wire-up between consumer and sink
/// lives in the A.3 dispatcher (one step up the stack). Captured
/// payload schema and consumer threading discipline are described in
/// so each decoded transition reaches <see cref="EnqueueTransition"/>.
/// The <see cref="Attach"/> method here carries only the session id —
/// the alarm path needs no COM-event subscription of its own because
/// the consumer already polls and raises transition events. The
/// captured payload schema is described in
/// <c>docs/AlarmClientDiscovery.md</c> "Option A — captured".
/// </para>
/// </remarks>
@@ -47,10 +49,10 @@ public sealed class MxAccessAlarmEventSink : IMxAccessEventSink
if (mxAccessComObject is null) throw new ArgumentNullException(nameof(mxAccessComObject));
this.sessionId = sessionId ?? string.Empty;
// PR A.2 — COM-side subscription scaffold. The MXAccess Toolkit alarm
// event source is pinned during dev-rig validation. Until then, the
// worker advertises no alarm subscription; data-change behaviour is
// unaffected.
// The alarm path needs no COM-event subscription here: the wnwrap
// consumer is polled by the worker's STA and raises transition events
// that AlarmDispatcher routes into EnqueueTransition. Attach only
// records the session id stamped onto every emitted MxEvent.
attached = true;
}
@@ -158,6 +158,16 @@ public sealed class MxAccessBaseEventSink : IMxAccessEventSink
}
catch (Exception exception)
{
// Two distinct failures land here, both intentionally fail-fast:
// - A conversion failure from createEvent() — recorded here as an
// MxaccessEventConversionFailed fault.
// - An MxAccessEventQueueOverflowException from Enqueue when the
// queue is at capacity. Per the fail-fast backpressure design
// (docs/DesignDecisions.md) the event is dropped and the queue
// has *already* self-recorded a QueueOverflow fault. Because
// MxAccessEventQueue.RecordFault keeps only the first fault,
// this catch's RecordFault call is then a deliberate near
// no-op rather than a second, conflicting fault.
eventQueue.RecordFault(CreateEventConversionFault(exception));
}
}
@@ -103,12 +103,11 @@ public sealed class MxAccessEventMapper
}
/// <summary>
/// Creates an OnAlarmTransition event from MXAccess COM alarm-event arguments.
/// PR A.2 — proto-build path is mechanical and unit-testable; the COM-side
/// subscription that calls into this method (registering an
/// <c>IAlarmEventSink</c> against the MXAccess Toolkit's alarm provider) is
/// pinned during dev-rig validation since the exact MXAccess Toolkit version
/// installed on the worker host determines the API shape.
/// Creates an OnAlarmTransition event from MXAccess alarm-event arguments.
/// The worker's alarm path drives this method from
/// <see cref="MxAccessAlarmEventSink.EnqueueTransition"/> once
/// <see cref="AlarmDispatcher"/> decodes a transition raised by the
/// wnwrap-backed <see cref="WnWrapAlarmConsumer"/>.
/// </summary>
/// <param name="sessionId">Identifier of the session.</param>
/// <param name="alarmFullReference">Fully-qualified MxAccess alarm reference (e.g. "Tank01.Level.HiHi").</param>