rename: prefix gateway projects/namespaces with ZB.MOM.WW + sln→slnx
Apply the ZB.MOM.WW. prefix to all gateway-side projects, folders,
.csproj/.sln contents, C# namespaces, using directives, generated proto
C# (csharp_namespace + checked-in generated files), InternalsVisibleTo
attributes, project-name string literals (LoadProject, .sln lookups,
worker exe paths, staticwebassets manifest), and the install/script/doc
references that point at any of the above. Migrate the solution from
.sln to .slnx via `dotnet sln migrate` and delete the old file.
External-runtime identifiers are intentionally NOT prefixed so external
configuration keeps working:
- GatewayMetrics.cs MeterName ("MxGateway.Server")
- DashboardAuthenticationDefaults Scheme/Policy ("MxGateway.Dashboard")
- GatewayRequestLoggingMiddleware logger category ("MxGateway.Request")
- StaRuntime thread name ("MxGateway.Worker.STA")
- appsettings.json root section "MxGateway" + env-var prefix
MxGateway__... and secret-name MxGateway:ApiKeyPepper
- C:\ProgramData\MxGateway\ data dir paths
Also fixes two tests that were not rename-related but became visible
while validating the rename:
- WorkerLiveMxAccessSmokeTests.ShutDownAsync: cancellation that the
gateway service correctly maps to RpcException(Cancelled) per gRPC
convention was being misclassified as a stream fault. Added a sibling
catch on RpcException with StatusCode.Cancelled.
- IntegrationTestEnvironment.ResolveRepositoryRoot: extracted IsRepositoryRoot
and made it accept either a .git marker OR a .sln/.slnx next to src/
so the worker-exe walker works in non-git working copies.
clients/proto/proto-inputs.json's protoRoot updated to point at
src/ZB.MOM.WW.MxGateway.Contracts/Protos.
Verified by `dotnet build` and a full `dotnet test` of the .slnx with
MXGATEWAY_RUN_LIVE_{MXACCESS,LDAP,GALAXY}_TESTS=1:
Tests: 472/472 pass
Worker.Tests: 280/280 pass (4 dev-rig [Fact(Skip=...)] skipped)
IntegrationTests: 18/18 pass
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,16 @@
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using ZB.MOM.WW.MxGateway.Worker.Bootstrap;
|
||||
|
||||
namespace ZB.MOM.WW.MxGateway.Worker.Ipc;
|
||||
|
||||
/// <summary>Manages the worker's named pipe connection to the gateway.</summary>
|
||||
public interface IWorkerPipeClient
|
||||
{
|
||||
/// <summary>Connects to the gateway and runs the worker until the session ends or is cancelled.</summary>
|
||||
/// <param name="options">Configuration options.</param>
|
||||
/// <param name="cancellationToken">Token to cancel the asynchronous operation.</param>
|
||||
Task RunAsync(
|
||||
WorkerOptions options,
|
||||
CancellationToken cancellationToken = default);
|
||||
}
|
||||
@@ -0,0 +1,13 @@
|
||||
using ZB.MOM.WW.MxGateway.Contracts;
|
||||
using ZB.MOM.WW.MxGateway.Contracts.Proto;
|
||||
|
||||
namespace ZB.MOM.WW.MxGateway.Worker.Ipc;
|
||||
|
||||
public static class WorkerContractInfo
|
||||
{
|
||||
/// <summary>The worker protocol version supported by this contract.</summary>
|
||||
public static uint SupportedProtocolVersion => GatewayContractInfo.WorkerProtocolVersion;
|
||||
|
||||
/// <summary>The fully qualified name of the WorkerEnvelope message descriptor.</summary>
|
||||
public static string WorkerEnvelopeDescriptorName => WorkerEnvelope.Descriptor.FullName;
|
||||
}
|
||||
@@ -0,0 +1,37 @@
|
||||
using System;
|
||||
using ZB.MOM.WW.MxGateway.Contracts.Proto;
|
||||
|
||||
namespace ZB.MOM.WW.MxGateway.Worker.Ipc;
|
||||
|
||||
/// <summary>Validates worker envelope frames against protocol options.</summary>
|
||||
internal static class WorkerEnvelopeValidator
|
||||
{
|
||||
/// <summary>Validates a worker envelope for protocol compliance.</summary>
|
||||
/// <param name="envelope">The envelope to validate.</param>
|
||||
/// <param name="options">The frame protocol configuration.</param>
|
||||
public static void Validate(
|
||||
WorkerEnvelope envelope,
|
||||
WorkerFrameProtocolOptions options)
|
||||
{
|
||||
if (envelope.ProtocolVersion != options.ProtocolVersion)
|
||||
{
|
||||
throw new WorkerFrameProtocolException(
|
||||
WorkerFrameProtocolErrorCode.ProtocolVersionMismatch,
|
||||
$"Worker envelope protocol version {envelope.ProtocolVersion} does not match expected version {options.ProtocolVersion}.");
|
||||
}
|
||||
|
||||
if (!string.Equals(envelope.SessionId, options.SessionId, StringComparison.Ordinal))
|
||||
{
|
||||
throw new WorkerFrameProtocolException(
|
||||
WorkerFrameProtocolErrorCode.SessionMismatch,
|
||||
"Worker envelope session id does not match the owning worker session.");
|
||||
}
|
||||
|
||||
if (envelope.BodyCase == WorkerEnvelope.BodyOneofCase.None)
|
||||
{
|
||||
throw new WorkerFrameProtocolException(
|
||||
WorkerFrameProtocolErrorCode.InvalidEnvelope,
|
||||
"Worker envelope must include a typed body.");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
namespace ZB.MOM.WW.MxGateway.Worker.Ipc;
|
||||
|
||||
public enum WorkerFrameProtocolErrorCode
|
||||
{
|
||||
Unknown = 0,
|
||||
InvalidConfiguration = 1,
|
||||
EndOfStream = 2,
|
||||
MalformedLength = 3,
|
||||
MessageTooLarge = 4,
|
||||
InvalidEnvelope = 5,
|
||||
ProtocolVersionMismatch = 6,
|
||||
SessionMismatch = 7,
|
||||
NonceMismatch = 8,
|
||||
UnexpectedEnvelopeBody = 9,
|
||||
}
|
||||
@@ -0,0 +1,42 @@
|
||||
using System;
|
||||
|
||||
namespace ZB.MOM.WW.MxGateway.Worker.Ipc;
|
||||
|
||||
/// <summary>
|
||||
/// Exception raised when the named-pipe frame protocol encounters an error.
|
||||
/// </summary>
|
||||
public sealed class WorkerFrameProtocolException : Exception
|
||||
{
|
||||
/// <summary>
|
||||
/// Initializes with an error code and message.
|
||||
/// </summary>
|
||||
/// <param name="errorCode">Protocol error classification.</param>
|
||||
/// <param name="message">Exception message.</param>
|
||||
public WorkerFrameProtocolException(
|
||||
WorkerFrameProtocolErrorCode errorCode,
|
||||
string message)
|
||||
: base(message)
|
||||
{
|
||||
ErrorCode = errorCode;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Initializes with an error code, message, and inner exception.
|
||||
/// </summary>
|
||||
/// <param name="errorCode">Protocol error classification.</param>
|
||||
/// <param name="message">Exception message.</param>
|
||||
/// <param name="innerException">Underlying cause.</param>
|
||||
public WorkerFrameProtocolException(
|
||||
WorkerFrameProtocolErrorCode errorCode,
|
||||
string message,
|
||||
Exception innerException)
|
||||
: base(message, innerException)
|
||||
{
|
||||
ErrorCode = errorCode;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// The protocol error code classifying the failure.
|
||||
/// </summary>
|
||||
public WorkerFrameProtocolErrorCode ErrorCode { get; }
|
||||
}
|
||||
@@ -0,0 +1,103 @@
|
||||
using System;
|
||||
using ZB.MOM.WW.MxGateway.Contracts;
|
||||
using ZB.MOM.WW.MxGateway.Worker.Bootstrap;
|
||||
|
||||
namespace ZB.MOM.WW.MxGateway.Worker.Ipc;
|
||||
|
||||
/// <summary>Configuration options for the worker frame protocol.</summary>
|
||||
public sealed class WorkerFrameProtocolOptions
|
||||
{
|
||||
/// <summary>Default maximum message size in bytes (16 MB).</summary>
|
||||
public const int DefaultMaxMessageBytes = 16 * 1024 * 1024;
|
||||
|
||||
/// <summary>Initializes a new instance of the WorkerFrameProtocolOptions class from WorkerOptions.</summary>
|
||||
/// <param name="options">Worker initialization options.</param>
|
||||
public WorkerFrameProtocolOptions(WorkerOptions options)
|
||||
: this(
|
||||
options?.SessionId ?? throw new ArgumentNullException(nameof(options)),
|
||||
options.ProtocolVersion,
|
||||
options.Nonce,
|
||||
DefaultMaxMessageBytes)
|
||||
{
|
||||
}
|
||||
|
||||
/// <summary>Initializes a new instance of the WorkerFrameProtocolOptions class with default max message bytes.</summary>
|
||||
/// <param name="sessionId">Identifier of the session.</param>
|
||||
/// <param name="protocolVersion">Protocol version.</param>
|
||||
/// <param name="nonce">Nonce for startup validation.</param>
|
||||
public WorkerFrameProtocolOptions(
|
||||
string sessionId,
|
||||
uint protocolVersion,
|
||||
string nonce)
|
||||
: this(
|
||||
sessionId,
|
||||
protocolVersion,
|
||||
nonce,
|
||||
DefaultMaxMessageBytes)
|
||||
{
|
||||
}
|
||||
|
||||
/// <summary>Initializes a new instance of the WorkerFrameProtocolOptions class with all parameters.</summary>
|
||||
/// <param name="sessionId">Identifier of the session.</param>
|
||||
/// <param name="protocolVersion">Protocol version.</param>
|
||||
/// <param name="nonce">Nonce for startup validation.</param>
|
||||
/// <param name="maxMessageBytes">Maximum message size in bytes.</param>
|
||||
public WorkerFrameProtocolOptions(
|
||||
string sessionId,
|
||||
uint protocolVersion,
|
||||
string nonce,
|
||||
int maxMessageBytes)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(sessionId))
|
||||
{
|
||||
throw new WorkerFrameProtocolException(
|
||||
WorkerFrameProtocolErrorCode.InvalidConfiguration,
|
||||
"Worker frame protocol requires a session id.");
|
||||
}
|
||||
|
||||
if (protocolVersion == 0)
|
||||
{
|
||||
throw new WorkerFrameProtocolException(
|
||||
WorkerFrameProtocolErrorCode.InvalidConfiguration,
|
||||
"Worker frame protocol requires a non-zero protocol version.");
|
||||
}
|
||||
|
||||
if (protocolVersion != GatewayContractInfo.WorkerProtocolVersion)
|
||||
{
|
||||
throw new WorkerFrameProtocolException(
|
||||
WorkerFrameProtocolErrorCode.ProtocolVersionMismatch,
|
||||
$"Worker frame protocol version {protocolVersion} is not supported.");
|
||||
}
|
||||
|
||||
if (string.IsNullOrWhiteSpace(nonce))
|
||||
{
|
||||
throw new WorkerFrameProtocolException(
|
||||
WorkerFrameProtocolErrorCode.InvalidConfiguration,
|
||||
"Worker frame protocol requires a nonce.");
|
||||
}
|
||||
|
||||
if (maxMessageBytes <= 0)
|
||||
{
|
||||
throw new WorkerFrameProtocolException(
|
||||
WorkerFrameProtocolErrorCode.InvalidConfiguration,
|
||||
"Worker frame protocol max message size must be greater than zero.");
|
||||
}
|
||||
|
||||
SessionId = sessionId;
|
||||
ProtocolVersion = protocolVersion;
|
||||
Nonce = nonce;
|
||||
MaxMessageBytes = maxMessageBytes;
|
||||
}
|
||||
|
||||
/// <summary>Gets the session ID for the worker protocol.</summary>
|
||||
public string SessionId { get; }
|
||||
|
||||
/// <summary>Gets the protocol version.</summary>
|
||||
public uint ProtocolVersion { get; }
|
||||
|
||||
/// <summary>Gets the nonce for startup validation.</summary>
|
||||
public string Nonce { get; }
|
||||
|
||||
/// <summary>Gets the maximum message size in bytes.</summary>
|
||||
public int MaxMessageBytes { get; }
|
||||
}
|
||||
@@ -0,0 +1,113 @@
|
||||
using System;
|
||||
using System.Buffers;
|
||||
using System.IO;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Google.Protobuf;
|
||||
using ZB.MOM.WW.MxGateway.Contracts.Proto;
|
||||
|
||||
namespace ZB.MOM.WW.MxGateway.Worker.Ipc;
|
||||
|
||||
/// <summary>Reads length-prefixed WorkerEnvelope protobuf frames from a stream.</summary>
|
||||
public sealed class WorkerFrameReader
|
||||
{
|
||||
private readonly WorkerFrameProtocolOptions _options;
|
||||
private readonly Stream _stream;
|
||||
|
||||
/// <summary>Initializes the reader with a stream and protocol options.</summary>
|
||||
/// <param name="stream">Stream to read frames from.</param>
|
||||
/// <param name="options">Protocol options for frame validation.</param>
|
||||
public WorkerFrameReader(
|
||||
Stream stream,
|
||||
WorkerFrameProtocolOptions options)
|
||||
{
|
||||
_stream = stream ?? throw new ArgumentNullException(nameof(stream));
|
||||
_options = options ?? throw new ArgumentNullException(nameof(options));
|
||||
}
|
||||
|
||||
/// <summary>Reads and validates a single length-prefixed frame from the stream.</summary>
|
||||
/// <param name="cancellationToken">Token to cancel the asynchronous operation.</param>
|
||||
public async Task<WorkerEnvelope> ReadAsync(CancellationToken cancellationToken = default)
|
||||
{
|
||||
byte[] lengthPrefix = new byte[sizeof(uint)];
|
||||
await ReadExactlyOrThrowAsync(lengthPrefix, lengthPrefix.Length, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
uint payloadLength = ReadUInt32LittleEndian(lengthPrefix);
|
||||
if (payloadLength == 0)
|
||||
{
|
||||
throw new WorkerFrameProtocolException(
|
||||
WorkerFrameProtocolErrorCode.MalformedLength,
|
||||
"Worker frame payload length must be greater than zero.");
|
||||
}
|
||||
|
||||
if (payloadLength > _options.MaxMessageBytes)
|
||||
{
|
||||
throw new WorkerFrameProtocolException(
|
||||
WorkerFrameProtocolErrorCode.MessageTooLarge,
|
||||
$"Worker frame payload length {payloadLength} exceeds the configured maximum of {_options.MaxMessageBytes} bytes.");
|
||||
}
|
||||
|
||||
// Rent the payload buffer from the shared pool rather than allocating
|
||||
// a fresh byte[] per frame. ParseFrom copies whatever it needs into
|
||||
// the parsed message, so the rented buffer can be returned as soon as
|
||||
// parsing completes.
|
||||
int length = checked((int)payloadLength);
|
||||
byte[] payload = ArrayPool<byte>.Shared.Rent(length);
|
||||
WorkerEnvelope envelope;
|
||||
try
|
||||
{
|
||||
await ReadExactlyOrThrowAsync(payload, length, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
try
|
||||
{
|
||||
envelope = WorkerEnvelope.Parser.ParseFrom(payload, 0, length);
|
||||
}
|
||||
catch (InvalidProtocolBufferException exception)
|
||||
{
|
||||
throw new WorkerFrameProtocolException(
|
||||
WorkerFrameProtocolErrorCode.InvalidEnvelope,
|
||||
"Worker frame payload is not a valid WorkerEnvelope protobuf message.",
|
||||
exception);
|
||||
}
|
||||
}
|
||||
finally
|
||||
{
|
||||
ArrayPool<byte>.Shared.Return(payload);
|
||||
}
|
||||
|
||||
WorkerEnvelopeValidator.Validate(envelope, _options);
|
||||
|
||||
return envelope;
|
||||
}
|
||||
|
||||
private static uint ReadUInt32LittleEndian(byte[] buffer)
|
||||
{
|
||||
return (uint)buffer[0]
|
||||
| ((uint)buffer[1] << 8)
|
||||
| ((uint)buffer[2] << 16)
|
||||
| ((uint)buffer[3] << 24);
|
||||
}
|
||||
|
||||
private async Task ReadExactlyOrThrowAsync(
|
||||
byte[] buffer,
|
||||
int count,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
int offset = 0;
|
||||
while (offset < count)
|
||||
{
|
||||
int bytesRead = await _stream
|
||||
.ReadAsync(buffer, offset, count - offset, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
if (bytesRead == 0)
|
||||
{
|
||||
throw new WorkerFrameProtocolException(
|
||||
WorkerFrameProtocolErrorCode.EndOfStream,
|
||||
"Worker frame ended before the expected number of bytes were read.");
|
||||
}
|
||||
|
||||
offset += bytesRead;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,88 @@
|
||||
using System;
|
||||
using System.IO;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Google.Protobuf;
|
||||
using ZB.MOM.WW.MxGateway.Contracts.Proto;
|
||||
|
||||
namespace ZB.MOM.WW.MxGateway.Worker.Ipc;
|
||||
|
||||
/// <summary>Writes worker frames to a stream with length-prefixed protobuf serialization.</summary>
|
||||
public sealed class WorkerFrameWriter
|
||||
{
|
||||
private readonly WorkerFrameProtocolOptions _options;
|
||||
private readonly SemaphoreSlim _writeLock = new(1, 1);
|
||||
private readonly Stream _stream;
|
||||
|
||||
/// <summary>Initializes a new instance of the WorkerFrameWriter class.</summary>
|
||||
/// <param name="stream">Stream to write frames to.</param>
|
||||
/// <param name="options">Protocol options for frame encoding.</param>
|
||||
public WorkerFrameWriter(
|
||||
Stream stream,
|
||||
WorkerFrameProtocolOptions options)
|
||||
{
|
||||
_stream = stream ?? throw new ArgumentNullException(nameof(stream));
|
||||
_options = options ?? throw new ArgumentNullException(nameof(options));
|
||||
}
|
||||
|
||||
/// <summary>Writes a worker envelope frame to the stream with length prefix.</summary>
|
||||
/// <param name="envelope">Worker envelope to write.</param>
|
||||
/// <param name="cancellationToken">Token to cancel the asynchronous operation.</param>
|
||||
public async Task WriteAsync(
|
||||
WorkerEnvelope envelope,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
if (envelope is null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(envelope));
|
||||
}
|
||||
|
||||
WorkerEnvelopeValidator.Validate(envelope, _options);
|
||||
|
||||
int payloadLength = envelope.CalculateSize();
|
||||
if (payloadLength == 0)
|
||||
{
|
||||
throw new WorkerFrameProtocolException(
|
||||
WorkerFrameProtocolErrorCode.InvalidEnvelope,
|
||||
"Worker envelope cannot serialize to an empty payload.");
|
||||
}
|
||||
|
||||
if (payloadLength > _options.MaxMessageBytes)
|
||||
{
|
||||
throw new WorkerFrameProtocolException(
|
||||
WorkerFrameProtocolErrorCode.MessageTooLarge,
|
||||
$"Worker envelope payload length {payloadLength} exceeds the configured maximum of {_options.MaxMessageBytes} bytes.");
|
||||
}
|
||||
|
||||
// Serialize once into a single buffer that carries the 4-byte
|
||||
// length prefix followed by the payload, then issue one stream write.
|
||||
// This avoids a second serialization pass (envelope.ToByteArray()
|
||||
// would re-run CalculateSize internally), a separate prefix array,
|
||||
// and a separate prefix write.
|
||||
int frameLength = sizeof(uint) + payloadLength;
|
||||
byte[] frame = new byte[frameLength];
|
||||
WriteUInt32LittleEndian(frame, (uint)payloadLength);
|
||||
envelope.WriteTo(new Span<byte>(frame, sizeof(uint), payloadLength));
|
||||
|
||||
await _writeLock.WaitAsync(cancellationToken).ConfigureAwait(false);
|
||||
try
|
||||
{
|
||||
await _stream.WriteAsync(frame, 0, frameLength, cancellationToken).ConfigureAwait(false);
|
||||
await _stream.FlushAsync(cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
finally
|
||||
{
|
||||
_writeLock.Release();
|
||||
}
|
||||
}
|
||||
|
||||
private static void WriteUInt32LittleEndian(
|
||||
byte[] buffer,
|
||||
uint value)
|
||||
{
|
||||
buffer[0] = (byte)value;
|
||||
buffer[1] = (byte)(value >> 8);
|
||||
buffer[2] = (byte)(value >> 16);
|
||||
buffer[3] = (byte)(value >> 24);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,260 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.IO.Pipes;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using ZB.MOM.WW.MxGateway.Worker.Bootstrap;
|
||||
using Polly;
|
||||
using Polly.Retry;
|
||||
|
||||
namespace ZB.MOM.WW.MxGateway.Worker.Ipc;
|
||||
|
||||
/// <summary>
|
||||
/// Connects to the gateway via a named pipe and runs the worker frame protocol session.
|
||||
/// </summary>
|
||||
public sealed class WorkerPipeClient : IWorkerPipeClient
|
||||
{
|
||||
/// <summary>Default overall connection timeout in milliseconds.</summary>
|
||||
public const int DefaultConnectTimeoutMilliseconds = 30000;
|
||||
|
||||
/// <summary>Default per-attempt connection timeout in milliseconds.</summary>
|
||||
public const int DefaultConnectAttemptTimeoutMilliseconds = 2000;
|
||||
|
||||
/// <summary>Environment variable for overriding the per-attempt connection timeout.</summary>
|
||||
public const string ConnectAttemptTimeoutEnvironmentVariableName =
|
||||
"MXGATEWAY_WORKER_PIPE_CONNECT_ATTEMPT_TIMEOUT_MS";
|
||||
|
||||
private readonly int _connectTimeoutMilliseconds;
|
||||
private readonly int _connectAttemptTimeoutMilliseconds;
|
||||
private readonly Func<Stream, WorkerFrameProtocolOptions, IWorkerLogger?, WorkerPipeSession> _sessionFactory;
|
||||
private readonly IWorkerLogger? _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a worker pipe client with default timeouts.
|
||||
/// </summary>
|
||||
public WorkerPipeClient()
|
||||
: this(null, DefaultConnectTimeoutMilliseconds)
|
||||
{
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a worker pipe client with a logger and default timeouts.
|
||||
/// </summary>
|
||||
/// <param name="logger">Optional logger for diagnostic output.</param>
|
||||
public WorkerPipeClient(IWorkerLogger? logger)
|
||||
: this(logger, DefaultConnectTimeoutMilliseconds)
|
||||
{
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a worker pipe client with a custom overall connect timeout.
|
||||
/// </summary>
|
||||
/// <param name="connectTimeoutMilliseconds">Overall connection timeout in milliseconds.</param>
|
||||
public WorkerPipeClient(int connectTimeoutMilliseconds)
|
||||
: this(null, connectTimeoutMilliseconds)
|
||||
{
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a worker pipe client with custom timeouts and a session factory.
|
||||
/// </summary>
|
||||
/// <param name="connectTimeoutMilliseconds">Overall connection timeout in milliseconds.</param>
|
||||
/// <param name="sessionFactory">Factory creating the worker pipe session.</param>
|
||||
public WorkerPipeClient(
|
||||
int connectTimeoutMilliseconds,
|
||||
Func<Stream, WorkerFrameProtocolOptions, WorkerPipeSession> sessionFactory)
|
||||
: this(
|
||||
null,
|
||||
connectTimeoutMilliseconds,
|
||||
ResolveDefaultConnectAttemptTimeoutMilliseconds(),
|
||||
(stream, frameOptions, _) => sessionFactory(stream, frameOptions))
|
||||
{
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a worker pipe client with a logger and custom overall timeout.
|
||||
/// </summary>
|
||||
/// <param name="logger">Optional logger for diagnostic output.</param>
|
||||
/// <param name="connectTimeoutMilliseconds">Overall connection timeout in milliseconds.</param>
|
||||
public WorkerPipeClient(
|
||||
IWorkerLogger? logger,
|
||||
int connectTimeoutMilliseconds)
|
||||
: this(
|
||||
logger,
|
||||
connectTimeoutMilliseconds,
|
||||
ResolveDefaultConnectAttemptTimeoutMilliseconds(),
|
||||
(stream, frameOptions, workerLogger) => new WorkerPipeSession(stream, frameOptions, workerLogger))
|
||||
{
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a worker pipe client with logger, timeouts, and a session factory.
|
||||
/// </summary>
|
||||
/// <param name="logger">Optional logger for diagnostic output.</param>
|
||||
/// <param name="connectTimeoutMilliseconds">Overall connection timeout in milliseconds.</param>
|
||||
/// <param name="sessionFactory">Factory creating the worker pipe session.</param>
|
||||
public WorkerPipeClient(
|
||||
IWorkerLogger? logger,
|
||||
int connectTimeoutMilliseconds,
|
||||
Func<Stream, WorkerFrameProtocolOptions, IWorkerLogger?, WorkerPipeSession> sessionFactory)
|
||||
: this(
|
||||
logger,
|
||||
connectTimeoutMilliseconds,
|
||||
ResolveDefaultConnectAttemptTimeoutMilliseconds(),
|
||||
sessionFactory)
|
||||
{
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a worker pipe client with full configuration.
|
||||
/// </summary>
|
||||
/// <param name="logger">Optional logger for diagnostic output.</param>
|
||||
/// <param name="connectTimeoutMilliseconds">Overall connection timeout in milliseconds.</param>
|
||||
/// <param name="connectAttemptTimeoutMilliseconds">Per-attempt connection timeout in milliseconds.</param>
|
||||
/// <param name="sessionFactory">Factory creating the worker pipe session.</param>
|
||||
public WorkerPipeClient(
|
||||
IWorkerLogger? logger,
|
||||
int connectTimeoutMilliseconds,
|
||||
int connectAttemptTimeoutMilliseconds,
|
||||
Func<Stream, WorkerFrameProtocolOptions, IWorkerLogger?, WorkerPipeSession> sessionFactory)
|
||||
{
|
||||
if (connectTimeoutMilliseconds <= 0)
|
||||
{
|
||||
throw new ArgumentOutOfRangeException(
|
||||
nameof(connectTimeoutMilliseconds),
|
||||
"Worker pipe connect timeout must be greater than zero.");
|
||||
}
|
||||
|
||||
if (connectAttemptTimeoutMilliseconds <= 0)
|
||||
{
|
||||
throw new ArgumentOutOfRangeException(
|
||||
nameof(connectAttemptTimeoutMilliseconds),
|
||||
"Worker pipe connect attempt timeout must be greater than zero.");
|
||||
}
|
||||
|
||||
_logger = logger;
|
||||
_sessionFactory = sessionFactory ?? throw new ArgumentNullException(nameof(sessionFactory));
|
||||
_connectTimeoutMilliseconds = connectTimeoutMilliseconds;
|
||||
_connectAttemptTimeoutMilliseconds = connectAttemptTimeoutMilliseconds;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Runs the worker by connecting to the gateway and executing the frame protocol.
|
||||
/// </summary>
|
||||
/// <param name="options">Worker configuration options.</param>
|
||||
/// <param name="cancellationToken">Token to cancel the asynchronous operation.</param>
|
||||
public async Task RunAsync(
|
||||
WorkerOptions options,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
if (options is null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(options));
|
||||
}
|
||||
|
||||
WorkerFrameProtocolOptions frameOptions = new(options);
|
||||
|
||||
using NamedPipeClientStream pipe = await ConnectWithRetryAsync(options.PipeName, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
WorkerPipeSession session = _sessionFactory(pipe, frameOptions, _logger);
|
||||
await session.RunAsync(cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
private async Task<NamedPipeClientStream> ConnectWithRetryAsync(
|
||||
string pipeName,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
// The real bound on connection attempts is the connectDeadline token
|
||||
// below (CancelAfter(connectTimeout)): Polly stops retrying as soon as
|
||||
// that token is cancelled. Driving retries purely off the deadline —
|
||||
// rather than a fragile attempt-count formula that ignored the
|
||||
// exponential backoff between attempts — keeps the time budget the
|
||||
// single source of truth. MaxRetryAttempts is set to its maximum so it
|
||||
// never ends the retry loop before the deadline does.
|
||||
ResiliencePipeline<NamedPipeClientStream> pipeline = new ResiliencePipelineBuilder<NamedPipeClientStream>()
|
||||
.AddRetry(new RetryStrategyOptions<NamedPipeClientStream>
|
||||
{
|
||||
MaxRetryAttempts = int.MaxValue,
|
||||
BackoffType = DelayBackoffType.Exponential,
|
||||
UseJitter = true,
|
||||
Delay = TimeSpan.FromMilliseconds(250),
|
||||
MaxDelay = TimeSpan.FromSeconds(2),
|
||||
ShouldHandle = new PredicateBuilder<NamedPipeClientStream>()
|
||||
.Handle<Exception>(exception => exception is TimeoutException or IOException),
|
||||
OnRetry = args =>
|
||||
{
|
||||
args.Outcome.Result?.Dispose();
|
||||
_logger?.Information(
|
||||
"WorkerPipeConnectRetry",
|
||||
new Dictionary<string, object?>
|
||||
{
|
||||
["attempt"] = args.AttemptNumber + 1,
|
||||
["pipe_name"] = pipeName,
|
||||
});
|
||||
return default;
|
||||
},
|
||||
})
|
||||
.Build();
|
||||
|
||||
using CancellationTokenSource connectDeadline =
|
||||
CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
|
||||
connectDeadline.CancelAfter(_connectTimeoutMilliseconds);
|
||||
|
||||
try
|
||||
{
|
||||
return await pipeline.ExecuteAsync(
|
||||
async token => await ConnectSingleAttemptAsync(pipeName, token).ConfigureAwait(false),
|
||||
connectDeadline.Token)
|
||||
.ConfigureAwait(false);
|
||||
}
|
||||
catch (OperationCanceledException) when (!cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
throw new TimeoutException(
|
||||
$"Worker pipe {pipeName} did not connect within {_connectTimeoutMilliseconds}ms.");
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<NamedPipeClientStream> ConnectSingleAttemptAsync(
|
||||
string pipeName,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
NamedPipeClientStream pipe = new(
|
||||
".",
|
||||
pipeName,
|
||||
PipeDirection.InOut,
|
||||
PipeOptions.Asynchronous);
|
||||
|
||||
try
|
||||
{
|
||||
using CancellationTokenSource attemptTimeout =
|
||||
CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
|
||||
attemptTimeout.CancelAfter(_connectAttemptTimeoutMilliseconds);
|
||||
|
||||
await Task.Run(
|
||||
() =>
|
||||
{
|
||||
attemptTimeout.Token.ThrowIfCancellationRequested();
|
||||
pipe.Connect(_connectAttemptTimeoutMilliseconds);
|
||||
},
|
||||
attemptTimeout.Token)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
return pipe;
|
||||
}
|
||||
catch
|
||||
{
|
||||
pipe.Dispose();
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
private static int ResolveDefaultConnectAttemptTimeoutMilliseconds()
|
||||
{
|
||||
string? configuredValue = Environment.GetEnvironmentVariable(ConnectAttemptTimeoutEnvironmentVariableName);
|
||||
return int.TryParse(configuredValue, out int milliseconds) && milliseconds > 0
|
||||
? milliseconds
|
||||
: DefaultConnectAttemptTimeoutMilliseconds;
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,88 @@
|
||||
using System;
|
||||
|
||||
namespace ZB.MOM.WW.MxGateway.Worker.Ipc;
|
||||
|
||||
/// <summary>Configuration options for worker pipe sessions including heartbeat parameters.</summary>
|
||||
public sealed class WorkerPipeSessionOptions
|
||||
{
|
||||
/// <summary>Default heartbeat interval (5 seconds).</summary>
|
||||
public static readonly TimeSpan DefaultHeartbeatInterval = TimeSpan.FromSeconds(5);
|
||||
/// <summary>Default heartbeat grace period (15 seconds).</summary>
|
||||
public static readonly TimeSpan DefaultHeartbeatGrace = TimeSpan.FromSeconds(15);
|
||||
/// <summary>
|
||||
/// Default defensive ceiling beyond which the watchdog fires
|
||||
/// <see cref="ZB.MOM.WW.MxGateway.Contracts.Proto.WorkerFaultCategory.StaHung"/>
|
||||
/// even while a command is in flight (75 seconds = 5 ×
|
||||
/// <see cref="DefaultHeartbeatGrace"/>). See <see cref="HeartbeatStuckCeiling"/>
|
||||
/// for the rationale.
|
||||
/// </summary>
|
||||
public static readonly TimeSpan DefaultHeartbeatStuckCeiling = TimeSpan.FromSeconds(75);
|
||||
|
||||
/// <summary>Initializes a new instance of the WorkerPipeSessionOptions class with default values.</summary>
|
||||
public WorkerPipeSessionOptions()
|
||||
{
|
||||
HeartbeatInterval = DefaultHeartbeatInterval;
|
||||
HeartbeatGrace = DefaultHeartbeatGrace;
|
||||
HeartbeatStuckCeiling = DefaultHeartbeatStuckCeiling;
|
||||
}
|
||||
|
||||
/// <summary>Gets or sets the heartbeat interval.</summary>
|
||||
public TimeSpan HeartbeatInterval { get; set; }
|
||||
|
||||
/// <summary>Gets or sets the heartbeat grace period.</summary>
|
||||
public TimeSpan HeartbeatGrace { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the defensive upper bound on how long the watchdog
|
||||
/// will suppress its <c>StaHung</c> fault while a command is in
|
||||
/// flight. Worker-017 suppresses the watchdog when the heartbeat
|
||||
/// snapshot's <c>CurrentCommandCorrelationId</c> is non-empty so a
|
||||
/// legitimately slow command (e.g. <c>ReadBulk</c> against many
|
||||
/// uncached tags) does not self-fault — but a truly stuck
|
||||
/// synchronous COM call against a dead MXAccess provider leaves
|
||||
/// <c>CurrentCommandCorrelationId</c> non-empty forever and would
|
||||
/// permanently defeat the watchdog. <c>HeartbeatStuckCeiling</c> is
|
||||
/// the upper bound on that suppression: once
|
||||
/// <c>LastStaActivityUtc</c> has been stale for longer than this
|
||||
/// ceiling, the watchdog DOES fire <c>StaHung</c> even with a
|
||||
/// command in flight, on the assumption that no legitimate STA
|
||||
/// command should run that long without periodically refreshing
|
||||
/// activity. Default is <see cref="DefaultHeartbeatStuckCeiling"/>
|
||||
/// (75 seconds = 5 × <see cref="DefaultHeartbeatGrace"/>); raise
|
||||
/// for deployments that run very long bulk operations.
|
||||
/// </summary>
|
||||
public TimeSpan HeartbeatStuckCeiling { get; set; }
|
||||
|
||||
/// <summary>Validates the session options.</summary>
|
||||
public void Validate()
|
||||
{
|
||||
if (HeartbeatInterval <= TimeSpan.Zero)
|
||||
{
|
||||
throw new ArgumentOutOfRangeException(
|
||||
nameof(HeartbeatInterval),
|
||||
"Worker heartbeat interval must be greater than zero.");
|
||||
}
|
||||
|
||||
if (HeartbeatGrace <= TimeSpan.Zero)
|
||||
{
|
||||
throw new ArgumentOutOfRangeException(
|
||||
nameof(HeartbeatGrace),
|
||||
"Worker heartbeat grace must be greater than zero.");
|
||||
}
|
||||
|
||||
if (HeartbeatStuckCeiling <= TimeSpan.Zero)
|
||||
{
|
||||
throw new ArgumentOutOfRangeException(
|
||||
nameof(HeartbeatStuckCeiling),
|
||||
"Worker heartbeat stuck ceiling must be greater than zero.");
|
||||
}
|
||||
|
||||
if (HeartbeatStuckCeiling <= HeartbeatGrace)
|
||||
{
|
||||
throw new ArgumentOutOfRangeException(
|
||||
nameof(HeartbeatStuckCeiling),
|
||||
"Worker heartbeat stuck ceiling must be greater than HeartbeatGrace; "
|
||||
+ "otherwise it would fire before the in-flight-command suppression had any effect.");
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user