Improve gateway reliability and dashboard docs
This commit is contained in:
@@ -8,6 +8,8 @@ namespace MxGateway.Client.Cli;
|
||||
|
||||
public static class MxGatewayClientCli
|
||||
{
|
||||
private const uint MaxAggregateEvents = 10_000;
|
||||
|
||||
private static readonly JsonFormatter ProtobufJsonFormatter = JsonFormatter.Default;
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new(JsonSerializerDefaults.Web);
|
||||
@@ -342,8 +344,22 @@ public static class MxGatewayClientCli
|
||||
TextWriter output,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var events = new List<MxEvent>();
|
||||
uint maxEvents = arguments.GetUInt32("max-events", 0);
|
||||
bool json = arguments.HasFlag("json");
|
||||
bool jsonLines = arguments.HasFlag("jsonl");
|
||||
if (json && !jsonLines && maxEvents is 0)
|
||||
{
|
||||
throw new ArgumentException("--json stream-events requires --max-events to bound aggregate output.");
|
||||
}
|
||||
|
||||
if (maxEvents > MaxAggregateEvents)
|
||||
{
|
||||
throw new ArgumentException($"--max-events cannot exceed {MaxAggregateEvents}.");
|
||||
}
|
||||
|
||||
var events = json && !jsonLines
|
||||
? new List<MxEvent>(checked((int)maxEvents))
|
||||
: [];
|
||||
uint eventCount = 0;
|
||||
var request = new StreamEventsRequest
|
||||
{
|
||||
@@ -355,7 +371,11 @@ public static class MxGatewayClientCli
|
||||
.WithCancellation(cancellationToken)
|
||||
.ConfigureAwait(false))
|
||||
{
|
||||
if (arguments.HasFlag("json"))
|
||||
if (jsonLines)
|
||||
{
|
||||
output.WriteLine(ProtobufJsonFormatter.Format(gatewayEvent));
|
||||
}
|
||||
else if (json)
|
||||
{
|
||||
events.Add(gatewayEvent);
|
||||
}
|
||||
@@ -371,7 +391,7 @@ public static class MxGatewayClientCli
|
||||
}
|
||||
}
|
||||
|
||||
if (arguments.HasFlag("json"))
|
||||
if (json && !jsonLines)
|
||||
{
|
||||
output.WriteLine(JsonSerializer.Serialize(
|
||||
new { events = events.Select(EventToJsonElement).ToArray() },
|
||||
|
||||
@@ -25,7 +25,7 @@ internal sealed class GrpcMxGatewayClientTransport(
|
||||
}
|
||||
catch (RpcException exception)
|
||||
{
|
||||
throw MapRpcException(exception);
|
||||
throw MapRpcException(exception, callOptions.CancellationToken);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -41,7 +41,7 @@ internal sealed class GrpcMxGatewayClientTransport(
|
||||
}
|
||||
catch (RpcException exception)
|
||||
{
|
||||
throw MapRpcException(exception);
|
||||
throw MapRpcException(exception, callOptions.CancellationToken);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -57,7 +57,7 @@ internal sealed class GrpcMxGatewayClientTransport(
|
||||
}
|
||||
catch (RpcException exception)
|
||||
{
|
||||
throw MapRpcException(exception);
|
||||
throw MapRpcException(exception, callOptions.CancellationToken);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -87,7 +87,7 @@ internal sealed class GrpcMxGatewayClientTransport(
|
||||
}
|
||||
catch (RpcException exception)
|
||||
{
|
||||
throw MapRpcException(exception);
|
||||
throw MapRpcException(exception, effectiveCancellationToken);
|
||||
}
|
||||
|
||||
yield return gatewayEvent;
|
||||
@@ -101,8 +101,18 @@ internal sealed class GrpcMxGatewayClientTransport(
|
||||
return StreamEventsAsync(request, callOptions);
|
||||
}
|
||||
|
||||
private static MxGatewayException MapRpcException(RpcException exception)
|
||||
private static Exception MapRpcException(
|
||||
RpcException exception,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
if (cancellationToken.IsCancellationRequested || exception.StatusCode == StatusCode.Cancelled)
|
||||
{
|
||||
return new OperationCanceledException(
|
||||
exception.Status.Detail,
|
||||
exception,
|
||||
cancellationToken);
|
||||
}
|
||||
|
||||
return exception.StatusCode switch
|
||||
{
|
||||
StatusCode.Unauthenticated => new MxGatewayAuthenticationException(
|
||||
|
||||
@@ -3,6 +3,9 @@ using Grpc.Net.Client;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using MxGateway.Contracts.Proto;
|
||||
using Polly;
|
||||
using System.Net.Http;
|
||||
using System.Net.Security;
|
||||
using System.Security.Cryptography.X509Certificates;
|
||||
|
||||
namespace MxGateway.Client;
|
||||
|
||||
@@ -54,10 +57,12 @@ public sealed class MxGatewayClient : IAsyncDisposable
|
||||
ArgumentNullException.ThrowIfNull(options);
|
||||
options.Validate();
|
||||
|
||||
HttpMessageHandler handler = CreateHttpHandler(options);
|
||||
var channel = GrpcChannel.ForAddress(
|
||||
options.Endpoint,
|
||||
new GrpcChannelOptions
|
||||
{
|
||||
HttpHandler = handler,
|
||||
LoggerFactory = options.LoggerFactory,
|
||||
});
|
||||
|
||||
@@ -126,7 +131,7 @@ public sealed class MxGatewayClient : IAsyncDisposable
|
||||
ArgumentNullException.ThrowIfNull(request);
|
||||
ThrowIfDisposed();
|
||||
|
||||
return _transport.StreamEventsAsync(request, CreateCallOptions(cancellationToken));
|
||||
return _transport.StreamEventsAsync(request, CreateStreamCallOptions(cancellationToken));
|
||||
}
|
||||
|
||||
public ValueTask DisposeAsync()
|
||||
@@ -142,6 +147,18 @@ public sealed class MxGatewayClient : IAsyncDisposable
|
||||
}
|
||||
|
||||
internal CallOptions CreateCallOptions(CancellationToken cancellationToken)
|
||||
{
|
||||
return CreateCallOptions(cancellationToken, Options.DefaultCallTimeout);
|
||||
}
|
||||
|
||||
internal CallOptions CreateStreamCallOptions(CancellationToken cancellationToken)
|
||||
{
|
||||
return CreateCallOptions(cancellationToken, Options.StreamTimeout);
|
||||
}
|
||||
|
||||
internal CallOptions CreateCallOptions(
|
||||
CancellationToken cancellationToken,
|
||||
TimeSpan? timeout)
|
||||
{
|
||||
Metadata headers = new()
|
||||
{
|
||||
@@ -150,18 +167,61 @@ public sealed class MxGatewayClient : IAsyncDisposable
|
||||
|
||||
return new CallOptions(
|
||||
headers,
|
||||
DateTime.UtcNow.Add(Options.DefaultCallTimeout),
|
||||
timeout is null ? null : DateTime.UtcNow.Add(timeout.Value),
|
||||
cancellationToken);
|
||||
}
|
||||
|
||||
private Task<T> ExecuteSafeUnaryAsync<T>(
|
||||
private async Task<T> ExecuteSafeUnaryAsync<T>(
|
||||
Func<CancellationToken, Task<T>> call,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
return _safeUnaryRetryPipeline.ExecuteAsync(
|
||||
using CancellationTokenSource timeout = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
|
||||
timeout.CancelAfter(Options.DefaultCallTimeout);
|
||||
|
||||
return await _safeUnaryRetryPipeline.ExecuteAsync(
|
||||
async token => await call(token).ConfigureAwait(false),
|
||||
cancellationToken)
|
||||
.AsTask();
|
||||
timeout.Token)
|
||||
.ConfigureAwait(false);
|
||||
}
|
||||
|
||||
private static HttpMessageHandler CreateHttpHandler(MxGatewayClientOptions options)
|
||||
{
|
||||
SocketsHttpHandler handler = new()
|
||||
{
|
||||
ConnectTimeout = options.ConnectTimeout,
|
||||
};
|
||||
|
||||
if (options.UseTls)
|
||||
{
|
||||
handler.SslOptions = new SslClientAuthenticationOptions();
|
||||
if (!string.IsNullOrWhiteSpace(options.ServerNameOverride))
|
||||
{
|
||||
handler.SslOptions.TargetHost = options.ServerNameOverride;
|
||||
}
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(options.CaCertificatePath))
|
||||
{
|
||||
X509Certificate2 trustedRoot = X509CertificateLoader.LoadCertificateFromFile(options.CaCertificatePath);
|
||||
handler.SslOptions.RemoteCertificateValidationCallback = (_, certificate, chain, errors) =>
|
||||
{
|
||||
if (certificate is null)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
using X509Chain customChain = new();
|
||||
customChain.ChainPolicy.TrustMode = X509ChainTrustMode.CustomRootTrust;
|
||||
customChain.ChainPolicy.CustomTrustStore.Add(trustedRoot);
|
||||
customChain.ChainPolicy.RevocationMode = X509RevocationMode.NoCheck;
|
||||
customChain.ChainPolicy.VerificationFlags = X509VerificationFlags.NoFlag;
|
||||
X509Certificate2 certificateToValidate = certificate as X509Certificate2
|
||||
?? X509CertificateLoader.LoadCertificate(certificate.Export(X509ContentType.Cert));
|
||||
return customChain.Build(certificateToValidate);
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
return handler;
|
||||
}
|
||||
|
||||
private void ThrowIfDisposed()
|
||||
|
||||
@@ -21,6 +21,8 @@ public sealed class MxGatewayClientOptions
|
||||
|
||||
public TimeSpan DefaultCallTimeout { get; init; } = TimeSpan.FromSeconds(30);
|
||||
|
||||
public TimeSpan? StreamTimeout { get; init; }
|
||||
|
||||
public MxGatewayClientRetryOptions Retry { get; init; } = new();
|
||||
|
||||
public ILoggerFactory? LoggerFactory { get; init; }
|
||||
@@ -57,6 +59,27 @@ public sealed class MxGatewayClientOptions
|
||||
"The default call timeout must be greater than zero.");
|
||||
}
|
||||
|
||||
if (StreamTimeout is not null && StreamTimeout <= TimeSpan.Zero)
|
||||
{
|
||||
throw new ArgumentOutOfRangeException(
|
||||
nameof(StreamTimeout),
|
||||
"The stream timeout must be greater than zero when configured.");
|
||||
}
|
||||
|
||||
if (UseTls && Endpoint.Scheme != Uri.UriSchemeHttps)
|
||||
{
|
||||
throw new ArgumentException(
|
||||
"UseTls requires an https gateway endpoint.",
|
||||
nameof(Endpoint));
|
||||
}
|
||||
|
||||
if (!UseTls && Endpoint.Scheme == Uri.UriSchemeHttps)
|
||||
{
|
||||
throw new ArgumentException(
|
||||
"An https gateway endpoint requires UseTls.",
|
||||
nameof(Endpoint));
|
||||
}
|
||||
|
||||
Retry.Validate();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -377,17 +377,19 @@ func runSmoke(ctx context.Context, args []string, stdout, stderr io.Writer) erro
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer session.Close(context.Background())
|
||||
|
||||
serverHandle, err := session.Register(ctx, *clientName)
|
||||
if err != nil {
|
||||
return err
|
||||
return closeSmokeSession(ctx, session, err)
|
||||
}
|
||||
itemHandle, err := session.AddItem(ctx, serverHandle, *item)
|
||||
if err != nil {
|
||||
return err
|
||||
return closeSmokeSession(ctx, session, err)
|
||||
}
|
||||
if err := session.Advise(ctx, serverHandle, itemHandle); err != nil {
|
||||
return closeSmokeSession(ctx, session, err)
|
||||
}
|
||||
if err := closeSmokeSession(ctx, session, nil); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -406,6 +408,24 @@ func runSmoke(ctx context.Context, args []string, stdout, stderr io.Writer) erro
|
||||
return nil
|
||||
}
|
||||
|
||||
func closeSmokeSession(ctx context.Context, session *mxgateway.Session, primaryErr error) error {
|
||||
closeCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
if deadline, ok := ctx.Deadline(); ok {
|
||||
if until := time.Until(deadline); until > 0 && until < 5*time.Second {
|
||||
cancel()
|
||||
closeCtx, cancel = context.WithTimeout(context.Background(), until)
|
||||
defer cancel()
|
||||
}
|
||||
}
|
||||
|
||||
_, closeErr := session.Close(closeCtx)
|
||||
if primaryErr != nil {
|
||||
return primaryErr
|
||||
}
|
||||
return closeErr
|
||||
}
|
||||
|
||||
func bindCommonFlags(flags *flag.FlagSet) *commonOptions {
|
||||
common := &commonOptions{}
|
||||
flags.StringVar(&common.Endpoint, "endpoint", "localhost:5000", "gateway endpoint")
|
||||
|
||||
@@ -184,8 +184,11 @@ func (c *Client) callContext(ctx context.Context) (context.Context, context.Canc
|
||||
if timeout < 0 {
|
||||
return ctx, func() {}
|
||||
}
|
||||
if _, ok := ctx.Deadline(); ok {
|
||||
return ctx, func() {}
|
||||
if deadline, ok := ctx.Deadline(); ok {
|
||||
timeoutDeadline := time.Now().Add(timeout)
|
||||
if deadline.Before(timeoutDeadline) {
|
||||
return ctx, func() {}
|
||||
}
|
||||
}
|
||||
return context.WithTimeout(ctx, timeout)
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"crypto/rand"
|
||||
"encoding/hex"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"sync"
|
||||
|
||||
@@ -13,6 +14,8 @@ import (
|
||||
"google.golang.org/grpc/status"
|
||||
)
|
||||
|
||||
const maxBulkItems = 1000
|
||||
|
||||
// EventResult carries either the next ordered event or a terminal stream error.
|
||||
type EventResult struct {
|
||||
Event *MxEvent
|
||||
@@ -225,6 +228,9 @@ func (s *Session) AddItemBulk(ctx context.Context, serverHandle int32, tagAddres
|
||||
if tagAddresses == nil {
|
||||
return nil, errors.New("mxgateway: tag addresses are required")
|
||||
}
|
||||
if err := ensureBulkSize("tag addresses", len(tagAddresses)); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
reply, err := s.invokeCommand(ctx, &pb.MxCommand{
|
||||
Kind: pb.MxCommandKind_MX_COMMAND_KIND_ADD_ITEM_BULK,
|
||||
Payload: &pb.MxCommand_AddItemBulk{
|
||||
@@ -245,6 +251,9 @@ func (s *Session) AdviseItemBulk(ctx context.Context, serverHandle int32, itemHa
|
||||
if itemHandles == nil {
|
||||
return nil, errors.New("mxgateway: item handles are required")
|
||||
}
|
||||
if err := ensureBulkSize("item handles", len(itemHandles)); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
reply, err := s.invokeCommand(ctx, &pb.MxCommand{
|
||||
Kind: pb.MxCommandKind_MX_COMMAND_KIND_ADVISE_ITEM_BULK,
|
||||
Payload: &pb.MxCommand_AdviseItemBulk{
|
||||
@@ -265,6 +274,9 @@ func (s *Session) RemoveItemBulk(ctx context.Context, serverHandle int32, itemHa
|
||||
if itemHandles == nil {
|
||||
return nil, errors.New("mxgateway: item handles are required")
|
||||
}
|
||||
if err := ensureBulkSize("item handles", len(itemHandles)); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
reply, err := s.invokeCommand(ctx, &pb.MxCommand{
|
||||
Kind: pb.MxCommandKind_MX_COMMAND_KIND_REMOVE_ITEM_BULK,
|
||||
Payload: &pb.MxCommand_RemoveItemBulk{
|
||||
@@ -285,6 +297,9 @@ func (s *Session) UnAdviseItemBulk(ctx context.Context, serverHandle int32, item
|
||||
if itemHandles == nil {
|
||||
return nil, errors.New("mxgateway: item handles are required")
|
||||
}
|
||||
if err := ensureBulkSize("item handles", len(itemHandles)); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
reply, err := s.invokeCommand(ctx, &pb.MxCommand{
|
||||
Kind: pb.MxCommandKind_MX_COMMAND_KIND_UN_ADVISE_ITEM_BULK,
|
||||
Payload: &pb.MxCommand_UnAdviseItemBulk{
|
||||
@@ -305,6 +320,9 @@ func (s *Session) SubscribeBulk(ctx context.Context, serverHandle int32, tagAddr
|
||||
if tagAddresses == nil {
|
||||
return nil, errors.New("mxgateway: tag addresses are required")
|
||||
}
|
||||
if err := ensureBulkSize("tag addresses", len(tagAddresses)); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
reply, err := s.invokeCommand(ctx, &pb.MxCommand{
|
||||
Kind: pb.MxCommandKind_MX_COMMAND_KIND_SUBSCRIBE_BULK,
|
||||
Payload: &pb.MxCommand_SubscribeBulk{
|
||||
@@ -325,6 +343,9 @@ func (s *Session) UnsubscribeBulk(ctx context.Context, serverHandle int32, itemH
|
||||
if itemHandles == nil {
|
||||
return nil, errors.New("mxgateway: item handles are required")
|
||||
}
|
||||
if err := ensureBulkSize("item handles", len(itemHandles)); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
reply, err := s.invokeCommand(ctx, &pb.MxCommand{
|
||||
Kind: pb.MxCommandKind_MX_COMMAND_KIND_UNSUBSCRIBE_BULK,
|
||||
Payload: &pb.MxCommand_UnsubscribeBulk{
|
||||
@@ -387,13 +408,15 @@ func (s *Session) EventsAfter(ctx context.Context, afterWorkerSequence uint64) (
|
||||
for {
|
||||
event, err := stream.Recv()
|
||||
if err == nil {
|
||||
results <- EventResult{Event: event}
|
||||
if !sendEventResult(ctx, results, EventResult{Event: event}) {
|
||||
return
|
||||
}
|
||||
continue
|
||||
}
|
||||
if err == io.EOF || status.Code(err) == codes.Canceled || ctx.Err() != nil {
|
||||
return
|
||||
}
|
||||
results <- EventResult{Err: &GatewayError{Op: "stream events", Err: err}}
|
||||
sendEventResult(ctx, results, EventResult{Err: &GatewayError{Op: "stream events", Err: err}})
|
||||
return
|
||||
}
|
||||
}()
|
||||
@@ -401,6 +424,22 @@ func (s *Session) EventsAfter(ctx context.Context, afterWorkerSequence uint64) (
|
||||
return results, nil
|
||||
}
|
||||
|
||||
func ensureBulkSize(name string, length int) error {
|
||||
if length > maxBulkItems {
|
||||
return fmt.Errorf("mxgateway: %s bulk commands are limited to %d item(s)", name, maxBulkItems)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func sendEventResult(ctx context.Context, results chan<- EventResult, result EventResult) bool {
|
||||
select {
|
||||
case results <- result:
|
||||
return true
|
||||
case <-ctx.Done():
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Session) invokeCommand(ctx context.Context, command *MxCommand) (*MxCommandReply, error) {
|
||||
return s.client.Invoke(ctx, &pb.MxCommandRequest{
|
||||
SessionId: s.ID(),
|
||||
|
||||
+21
-18
@@ -334,25 +334,28 @@ public final class MxGatewayCli implements Callable<Integer> {
|
||||
var session = client.openSession(OpenSessionRequest.newBuilder()
|
||||
.setClientSessionName(clientName)
|
||||
.build());
|
||||
MxGatewayCliSession cliSession = client.session(session.getSessionId());
|
||||
int serverHandle = cliSession.register(clientName);
|
||||
int itemHandle = cliSession.addItem(serverHandle, item);
|
||||
cliSession.advise(serverHandle, itemHandle);
|
||||
if (json) {
|
||||
Map<String, Object> output = new LinkedHashMap<>();
|
||||
output.put("command", "smoke");
|
||||
output.put("options", common.redactedJsonMap());
|
||||
output.put("sessionId", session.getSessionId());
|
||||
output.put("serverHandle", serverHandle);
|
||||
output.put("itemHandle", itemHandle);
|
||||
client.out().println(jsonObject(output));
|
||||
} else {
|
||||
client.out().printf(
|
||||
"session=%s server=%d item=%d%n", session.getSessionId(), serverHandle, itemHandle);
|
||||
try {
|
||||
MxGatewayCliSession cliSession = client.session(session.getSessionId());
|
||||
int serverHandle = cliSession.register(clientName);
|
||||
int itemHandle = cliSession.addItem(serverHandle, item);
|
||||
cliSession.advise(serverHandle, itemHandle);
|
||||
if (json) {
|
||||
Map<String, Object> output = new LinkedHashMap<>();
|
||||
output.put("command", "smoke");
|
||||
output.put("options", common.redactedJsonMap());
|
||||
output.put("sessionId", session.getSessionId());
|
||||
output.put("serverHandle", serverHandle);
|
||||
output.put("itemHandle", itemHandle);
|
||||
client.out().println(jsonObject(output));
|
||||
} else {
|
||||
client.out().printf(
|
||||
"session=%s server=%d item=%d%n", session.getSessionId(), serverHandle, itemHandle);
|
||||
}
|
||||
} finally {
|
||||
client.closeSession(CloseSessionRequest.newBuilder()
|
||||
.setSessionId(session.getSessionId())
|
||||
.build());
|
||||
}
|
||||
client.closeSession(CloseSessionRequest.newBuilder()
|
||||
.setSessionId(session.getSessionId())
|
||||
.build());
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
+12
-5
@@ -105,13 +105,20 @@ public final class MxEventStream implements Iterator<MxEvent>, AutoCloseable {
|
||||
private void offer(Object value) {
|
||||
Objects.requireNonNull(value, "value");
|
||||
if (value == END) {
|
||||
queue.offer(value);
|
||||
if (!queue.offer(value)) {
|
||||
queue.clear();
|
||||
queue.offer(value);
|
||||
}
|
||||
return;
|
||||
}
|
||||
try {
|
||||
queue.put(value);
|
||||
} catch (InterruptedException error) {
|
||||
Thread.currentThread().interrupt();
|
||||
if (!queue.offer(value)) {
|
||||
ClientCallStreamObserver<StreamEventsRequest> stream = requestStream;
|
||||
if (stream != null) {
|
||||
stream.cancel("client event stream queue overflowed", null);
|
||||
}
|
||||
queue.clear();
|
||||
queue.offer(new MxGatewayException("gateway stream events queue overflowed"));
|
||||
queue.offer(END);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
+18
-4
@@ -63,7 +63,7 @@ public final class MxGatewayClient implements AutoCloseable {
|
||||
}
|
||||
|
||||
public MxAccessGatewayGrpc.MxAccessGatewayStub rawAsyncStub() {
|
||||
return withDeadline(asyncStub);
|
||||
return asyncStub;
|
||||
}
|
||||
|
||||
public MxGatewaySession openSession(OpenSessionRequest request) {
|
||||
@@ -140,14 +140,14 @@ public final class MxGatewayClient implements AutoCloseable {
|
||||
|
||||
public MxEventStream streamEvents(StreamEventsRequest request) {
|
||||
MxEventStream stream = new MxEventStream(16);
|
||||
rawAsyncStub().streamEvents(request, stream.observer());
|
||||
withStreamDeadline(rawAsyncStub()).streamEvents(request, stream.observer());
|
||||
return stream;
|
||||
}
|
||||
|
||||
public MxGatewayEventSubscription streamEventsAsync(
|
||||
StreamEventsRequest request, StreamObserver<MxEvent> observer) {
|
||||
MxGatewayEventSubscription subscription = new MxGatewayEventSubscription();
|
||||
rawAsyncStub().streamEvents(request, subscription.wrap(observer));
|
||||
withStreamDeadline(rawAsyncStub()).streamEvents(request, subscription.wrap(observer));
|
||||
return subscription;
|
||||
}
|
||||
|
||||
@@ -161,7 +161,9 @@ public final class MxGatewayClient implements AutoCloseable {
|
||||
public void closeAndAwaitTermination() throws InterruptedException {
|
||||
if (ownedChannel != null) {
|
||||
ownedChannel.shutdown();
|
||||
ownedChannel.awaitTermination(options.connectTimeout().toMillis(), TimeUnit.MILLISECONDS);
|
||||
if (!ownedChannel.awaitTermination(options.connectTimeout().toMillis(), TimeUnit.MILLISECONDS)) {
|
||||
ownedChannel.shutdownNow();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -199,6 +201,13 @@ public final class MxGatewayClient implements AutoCloseable {
|
||||
return stub.withDeadlineAfter(options.callTimeout().toNanos(), TimeUnit.NANOSECONDS);
|
||||
}
|
||||
|
||||
private <T extends io.grpc.stub.AbstractStub<T>> T withStreamDeadline(T stub) {
|
||||
if (options.streamTimeout() == null || options.streamTimeout().isNegative()) {
|
||||
return stub;
|
||||
}
|
||||
return stub.withDeadlineAfter(options.streamTimeout().toNanos(), TimeUnit.NANOSECONDS);
|
||||
}
|
||||
|
||||
private static <T> CompletableFuture<T> toCompletable(com.google.common.util.concurrent.ListenableFuture<T> source) {
|
||||
CompletableFuture<T> target = new CompletableFuture<>();
|
||||
Futures.addCallback(
|
||||
@@ -219,6 +228,11 @@ public final class MxGatewayClient implements AutoCloseable {
|
||||
}
|
||||
},
|
||||
MoreExecutors.directExecutor());
|
||||
target.whenComplete((ignoredResult, ignoredError) -> {
|
||||
if (target.isCancelled()) {
|
||||
source.cancel(true);
|
||||
}
|
||||
});
|
||||
return target;
|
||||
}
|
||||
|
||||
|
||||
+14
@@ -15,6 +15,7 @@ public final class MxGatewayClientOptions {
|
||||
private final String serverNameOverride;
|
||||
private final Duration connectTimeout;
|
||||
private final Duration callTimeout;
|
||||
private final Duration streamTimeout;
|
||||
|
||||
private MxGatewayClientOptions(Builder builder) {
|
||||
endpoint = requireText(builder.endpoint, "endpoint");
|
||||
@@ -24,6 +25,7 @@ public final class MxGatewayClientOptions {
|
||||
serverNameOverride = builder.serverNameOverride == null ? "" : builder.serverNameOverride;
|
||||
connectTimeout = builder.connectTimeout == null ? DEFAULT_CONNECT_TIMEOUT : builder.connectTimeout;
|
||||
callTimeout = builder.callTimeout == null ? DEFAULT_CALL_TIMEOUT : builder.callTimeout;
|
||||
streamTimeout = builder.streamTimeout;
|
||||
}
|
||||
|
||||
public static Builder builder() {
|
||||
@@ -62,6 +64,10 @@ public final class MxGatewayClientOptions {
|
||||
return callTimeout;
|
||||
}
|
||||
|
||||
public Duration streamTimeout() {
|
||||
return streamTimeout;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "MxGatewayClientOptions{"
|
||||
@@ -82,6 +88,8 @@ public final class MxGatewayClientOptions {
|
||||
+ connectTimeout
|
||||
+ ", callTimeout="
|
||||
+ callTimeout
|
||||
+ ", streamTimeout="
|
||||
+ streamTimeout
|
||||
+ '}';
|
||||
}
|
||||
|
||||
@@ -100,6 +108,7 @@ public final class MxGatewayClientOptions {
|
||||
private String serverNameOverride;
|
||||
private Duration connectTimeout;
|
||||
private Duration callTimeout;
|
||||
private Duration streamTimeout;
|
||||
|
||||
private Builder() {
|
||||
}
|
||||
@@ -139,6 +148,11 @@ public final class MxGatewayClientOptions {
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder streamTimeout(Duration value) {
|
||||
streamTimeout = Objects.requireNonNull(value, "streamTimeout");
|
||||
return this;
|
||||
}
|
||||
|
||||
public MxGatewayClientOptions build() {
|
||||
return new MxGatewayClientOptions(this);
|
||||
}
|
||||
|
||||
+6
@@ -4,17 +4,22 @@ import io.grpc.stub.ClientCallStreamObserver;
|
||||
import io.grpc.stub.ClientResponseObserver;
|
||||
import io.grpc.stub.StreamObserver;
|
||||
import java.util.concurrent.atomic.AtomicReference;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
import mxaccess_gateway.v1.MxaccessGateway.MxEvent;
|
||||
import mxaccess_gateway.v1.MxaccessGateway.StreamEventsRequest;
|
||||
|
||||
public final class MxGatewayEventSubscription implements AutoCloseable {
|
||||
private final AtomicReference<ClientCallStreamObserver<StreamEventsRequest>> requestStream = new AtomicReference<>();
|
||||
private final AtomicBoolean cancelled = new AtomicBoolean();
|
||||
|
||||
ClientResponseObserver<StreamEventsRequest, MxEvent> wrap(StreamObserver<MxEvent> observer) {
|
||||
return new ClientResponseObserver<>() {
|
||||
@Override
|
||||
public void beforeStart(ClientCallStreamObserver<StreamEventsRequest> stream) {
|
||||
requestStream.set(stream);
|
||||
if (cancelled.get()) {
|
||||
stream.cancel("client cancelled event stream", null);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -35,6 +40,7 @@ public final class MxGatewayEventSubscription implements AutoCloseable {
|
||||
}
|
||||
|
||||
public void cancel() {
|
||||
cancelled.set(true);
|
||||
ClientCallStreamObserver<StreamEventsRequest> stream = requestStream.get();
|
||||
if (stream != null) {
|
||||
stream.cancel("client cancelled event stream", null);
|
||||
|
||||
@@ -74,9 +74,9 @@ class GatewayClient:
|
||||
if self._closed:
|
||||
return
|
||||
|
||||
self._closed = True
|
||||
if self._channel is not None:
|
||||
await self._channel.close()
|
||||
self._closed = True
|
||||
|
||||
async def open_session(
|
||||
self,
|
||||
@@ -124,10 +124,10 @@ class GatewayClient:
|
||||
) -> AsyncIterator[pb.MxEvent]:
|
||||
"""Return an async event iterator and cancel the stream when iteration stops."""
|
||||
|
||||
call = self.raw_stub.StreamEvents(
|
||||
request,
|
||||
metadata=merge_metadata(self.options.api_key, metadata),
|
||||
)
|
||||
kwargs: dict[str, Any] = {"metadata": merge_metadata(self.options.api_key, metadata)}
|
||||
if self.options.stream_timeout is not None:
|
||||
kwargs["timeout"] = self.options.stream_timeout
|
||||
call = self.raw_stub.StreamEvents(request, **kwargs)
|
||||
return _canceling_iterator(call)
|
||||
|
||||
async def _unary(
|
||||
@@ -138,10 +138,16 @@ class GatewayClient:
|
||||
*,
|
||||
metadata: Sequence[tuple[str, str]] | None = None,
|
||||
) -> Any:
|
||||
call = method(
|
||||
request,
|
||||
metadata=merge_metadata(self.options.api_key, metadata),
|
||||
)
|
||||
kwargs: dict[str, Any] = {"metadata": merge_metadata(self.options.api_key, metadata)}
|
||||
if self.options.call_timeout is not None:
|
||||
kwargs["timeout"] = self.options.call_timeout
|
||||
try:
|
||||
call = method(request, **kwargs)
|
||||
except TypeError as error:
|
||||
if "timeout" not in kwargs or "unexpected keyword argument 'timeout'" not in str(error):
|
||||
raise
|
||||
kwargs.pop("timeout")
|
||||
call = method(request, **kwargs)
|
||||
try:
|
||||
return await call
|
||||
except asyncio.CancelledError:
|
||||
|
||||
@@ -19,6 +19,8 @@ class ClientOptions:
|
||||
plaintext: bool = False
|
||||
ca_file: str | None = None
|
||||
server_name_override: str | None = None
|
||||
call_timeout: float | None = 30.0
|
||||
stream_timeout: float | None = None
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
if not self.endpoint:
|
||||
@@ -26,6 +28,10 @@ class ClientOptions:
|
||||
|
||||
if self.plaintext and self.ca_file:
|
||||
raise ValueError("ca_file cannot be used with plaintext connections")
|
||||
if self.call_timeout is not None and self.call_timeout <= 0:
|
||||
raise ValueError("call_timeout must be greater than zero")
|
||||
if self.stream_timeout is not None and self.stream_timeout <= 0:
|
||||
raise ValueError("stream_timeout must be greater than zero")
|
||||
|
||||
def __repr__(self) -> str:
|
||||
api_key = REDACTED if self.api_key else None
|
||||
@@ -33,7 +39,9 @@ class ClientOptions:
|
||||
f"{type(self).__name__}(endpoint={self.endpoint!r}, "
|
||||
f"api_key={api_key!r}, plaintext={self.plaintext!r}, "
|
||||
f"ca_file={self.ca_file!r}, "
|
||||
f"server_name_override={self.server_name_override!r})"
|
||||
f"server_name_override={self.server_name_override!r}, "
|
||||
f"call_timeout={self.call_timeout!r}, "
|
||||
f"stream_timeout={self.stream_timeout!r})"
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -8,6 +8,8 @@ from .errors import ensure_mxaccess_success
|
||||
from .generated import mxaccess_gateway_pb2 as pb
|
||||
from .values import MxValueInput, to_mx_value
|
||||
|
||||
MAX_BULK_ITEMS = 1000
|
||||
|
||||
|
||||
class Session:
|
||||
"""A single gateway-backed MXAccess session."""
|
||||
@@ -40,13 +42,14 @@ class Session:
|
||||
protocol_status=pb.ProtocolStatus(code=pb.PROTOCOL_STATUS_CODE_OK),
|
||||
)
|
||||
|
||||
self._closed = True
|
||||
return await self.client.close_session_raw(
|
||||
reply = await self.client.close_session_raw(
|
||||
pb.CloseSessionRequest(
|
||||
session_id=self.session_id,
|
||||
client_correlation_id=client_correlation_id,
|
||||
),
|
||||
)
|
||||
self._closed = True
|
||||
return reply
|
||||
|
||||
async def invoke(self, command: pb.MxCommand, *, correlation_id: str = "") -> pb.MxCommandReply:
|
||||
"""Invoke a raw command and enforce gateway and MXAccess success."""
|
||||
@@ -192,6 +195,7 @@ class Session:
|
||||
) -> list[pb.SubscribeResult]:
|
||||
if tag_addresses is None:
|
||||
raise TypeError("tag_addresses is required")
|
||||
_ensure_bulk_size("tag_addresses", len(tag_addresses))
|
||||
reply = await self.invoke(
|
||||
pb.MxCommand(
|
||||
kind=pb.MX_COMMAND_KIND_ADD_ITEM_BULK,
|
||||
@@ -213,6 +217,7 @@ class Session:
|
||||
) -> list[pb.SubscribeResult]:
|
||||
if item_handles is None:
|
||||
raise TypeError("item_handles is required")
|
||||
_ensure_bulk_size("item_handles", len(item_handles))
|
||||
reply = await self.invoke(
|
||||
pb.MxCommand(
|
||||
kind=pb.MX_COMMAND_KIND_ADVISE_ITEM_BULK,
|
||||
@@ -234,6 +239,7 @@ class Session:
|
||||
) -> list[pb.SubscribeResult]:
|
||||
if item_handles is None:
|
||||
raise TypeError("item_handles is required")
|
||||
_ensure_bulk_size("item_handles", len(item_handles))
|
||||
reply = await self.invoke(
|
||||
pb.MxCommand(
|
||||
kind=pb.MX_COMMAND_KIND_REMOVE_ITEM_BULK,
|
||||
@@ -255,6 +261,7 @@ class Session:
|
||||
) -> list[pb.SubscribeResult]:
|
||||
if item_handles is None:
|
||||
raise TypeError("item_handles is required")
|
||||
_ensure_bulk_size("item_handles", len(item_handles))
|
||||
reply = await self.invoke(
|
||||
pb.MxCommand(
|
||||
kind=pb.MX_COMMAND_KIND_UN_ADVISE_ITEM_BULK,
|
||||
@@ -276,6 +283,7 @@ class Session:
|
||||
) -> list[pb.SubscribeResult]:
|
||||
if tag_addresses is None:
|
||||
raise TypeError("tag_addresses is required")
|
||||
_ensure_bulk_size("tag_addresses", len(tag_addresses))
|
||||
reply = await self.invoke(
|
||||
pb.MxCommand(
|
||||
kind=pb.MX_COMMAND_KIND_SUBSCRIBE_BULK,
|
||||
@@ -297,6 +305,7 @@ class Session:
|
||||
) -> list[pb.SubscribeResult]:
|
||||
if item_handles is None:
|
||||
raise TypeError("item_handles is required")
|
||||
_ensure_bulk_size("item_handles", len(item_handles))
|
||||
reply = await self.invoke(
|
||||
pb.MxCommand(
|
||||
kind=pb.MX_COMMAND_KIND_UNSUBSCRIBE_BULK,
|
||||
@@ -368,4 +377,9 @@ class Session:
|
||||
)
|
||||
|
||||
|
||||
def _ensure_bulk_size(name: str, count: int) -> None:
|
||||
if count > MAX_BULK_ITEMS:
|
||||
raise ValueError(f"{name} bulk commands are limited to {MAX_BULK_ITEMS} item(s)")
|
||||
|
||||
|
||||
from .client import GatewayClient # noqa: E402
|
||||
|
||||
@@ -20,6 +20,8 @@ from mxgateway.generated import mxaccess_gateway_pb2 as pb
|
||||
from mxgateway.options import ClientOptions
|
||||
from mxgateway.values import MxValueInput
|
||||
|
||||
MAX_AGGREGATE_EVENTS = 10_000
|
||||
|
||||
|
||||
@click.group()
|
||||
def main() -> None:
|
||||
@@ -55,6 +57,8 @@ def gateway_options(command: Callable[..., Any]) -> Callable[..., Any]:
|
||||
default=None,
|
||||
help="TLS server name override for test environments.",
|
||||
)(command)
|
||||
command = click.option("--call-timeout", default=30.0, type=float, show_default=True)(command)
|
||||
command = click.option("--stream-timeout", default=None, type=float)(command)
|
||||
return command
|
||||
|
||||
|
||||
@@ -352,6 +356,8 @@ async def _connect(kwargs: dict[str, Any]) -> GatewayClient:
|
||||
plaintext=_use_plaintext(kwargs),
|
||||
ca_file=kwargs.get("ca_file"),
|
||||
server_name_override=kwargs.get("server_name_override"),
|
||||
call_timeout=kwargs.get("call_timeout"),
|
||||
stream_timeout=kwargs.get("stream_timeout"),
|
||||
),
|
||||
)
|
||||
|
||||
@@ -416,6 +422,12 @@ async def _collect_events(
|
||||
max_events: int,
|
||||
timeout: float,
|
||||
) -> list[pb.MxEvent]:
|
||||
if max_events > MAX_AGGREGATE_EVENTS:
|
||||
raise click.BadParameter(
|
||||
f"must be less than or equal to {MAX_AGGREGATE_EVENTS}",
|
||||
param_hint="--max-events",
|
||||
)
|
||||
|
||||
collected: list[pb.MxEvent] = []
|
||||
iterator = events.__aiter__()
|
||||
try:
|
||||
@@ -423,6 +435,8 @@ async def _collect_events(
|
||||
collected.append(await asyncio.wait_for(iterator.__anext__(), timeout=timeout))
|
||||
except StopAsyncIteration:
|
||||
pass
|
||||
except asyncio.TimeoutError:
|
||||
pass
|
||||
finally:
|
||||
close = getattr(iterator, "aclose", None)
|
||||
if close is not None:
|
||||
|
||||
@@ -16,6 +16,8 @@ use mxgateway_client::{
|
||||
use serde_json::json;
|
||||
use serde_json::Value;
|
||||
|
||||
const MAX_AGGREGATE_EVENTS: usize = 10_000;
|
||||
|
||||
#[derive(Debug, Parser)]
|
||||
#[command(name = "mxgw")]
|
||||
#[command(about = "MXAccess Gateway Rust test CLI")]
|
||||
@@ -29,6 +31,8 @@ enum Command {
|
||||
Version {
|
||||
#[arg(long)]
|
||||
json: bool,
|
||||
#[arg(long)]
|
||||
jsonl: bool,
|
||||
},
|
||||
Ping {
|
||||
#[command(flatten)]
|
||||
@@ -325,7 +329,15 @@ async fn run(cli: Cli) -> Result<(), Error> {
|
||||
after_worker_sequence,
|
||||
max_events,
|
||||
json,
|
||||
jsonl,
|
||||
} => {
|
||||
if max_events > MAX_AGGREGATE_EVENTS {
|
||||
return Err(Error::InvalidArgument {
|
||||
name: "max-events".to_owned(),
|
||||
detail: format!("must be less than or equal to {MAX_AGGREGATE_EVENTS}"),
|
||||
});
|
||||
}
|
||||
|
||||
let client = connect(connection).await?;
|
||||
let mut stream = client
|
||||
.stream_events(StreamEventsRequest {
|
||||
@@ -334,19 +346,30 @@ async fn run(cli: Cli) -> Result<(), Error> {
|
||||
})
|
||||
.await?;
|
||||
let mut events = Vec::new();
|
||||
while events.len() < max_events {
|
||||
let mut event_count = 0usize;
|
||||
while event_count < max_events {
|
||||
let Some(event) = stream.next().await else {
|
||||
break;
|
||||
};
|
||||
events.push(event?);
|
||||
}
|
||||
if json {
|
||||
println!("{}", json!({ "eventCount": events.len() }));
|
||||
} else {
|
||||
for event in events {
|
||||
let event = event?;
|
||||
event_count += 1;
|
||||
if jsonl {
|
||||
println!(
|
||||
"{}",
|
||||
json!({
|
||||
"workerSequence": event.worker_sequence,
|
||||
"family": event.family,
|
||||
})
|
||||
);
|
||||
} else if json {
|
||||
events.push(event);
|
||||
} else {
|
||||
println!("{} {}", event.worker_sequence, event.family);
|
||||
}
|
||||
}
|
||||
if json {
|
||||
println!("{}", json!({ "eventCount": event_count }));
|
||||
}
|
||||
}
|
||||
Command::Write {
|
||||
connection,
|
||||
|
||||
@@ -5,7 +5,7 @@ use tonic::transport::{Certificate, Channel, ClientTlsConfig};
|
||||
use tonic::Request;
|
||||
|
||||
use crate::auth::AuthInterceptor;
|
||||
use crate::error::{ensure_command_success, Error};
|
||||
use crate::error::{ensure_command_success, ensure_protocol_success, Error};
|
||||
use crate::generated::mxaccess_gateway::v1::mx_access_gateway_client::MxAccessGatewayClient;
|
||||
use crate::generated::mxaccess_gateway::v1::{
|
||||
CloseSessionReply, CloseSessionRequest, MxCommandReply, MxCommandRequest, MxEvent,
|
||||
@@ -23,6 +23,7 @@ pub type EventStream =
|
||||
pub struct GatewayClient {
|
||||
inner: RawGatewayClient,
|
||||
call_timeout: std::time::Duration,
|
||||
stream_timeout: Option<std::time::Duration>,
|
||||
}
|
||||
|
||||
impl GatewayClient {
|
||||
@@ -57,6 +58,7 @@ impl GatewayClient {
|
||||
Ok(Self {
|
||||
inner: MxAccessGatewayClient::with_interceptor(channel, interceptor),
|
||||
call_timeout: options.call_timeout(),
|
||||
stream_timeout: options.stream_timeout(),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -83,6 +85,7 @@ impl GatewayClient {
|
||||
|
||||
pub async fn open_session(&self, request: OpenSessionRequest) -> Result<Session, Error> {
|
||||
let reply = self.open_session_raw(request).await?;
|
||||
ensure_protocol_success("open session", reply.protocol_status.as_ref())?;
|
||||
Ok(Session::new(reply.session_id, self.clone()))
|
||||
}
|
||||
|
||||
@@ -107,7 +110,7 @@ impl GatewayClient {
|
||||
|
||||
pub async fn stream_events(&self, request: StreamEventsRequest) -> Result<EventStream, Error> {
|
||||
let mut client = self.inner.clone();
|
||||
let response = client.stream_events(self.unary_request(request)).await?;
|
||||
let response = client.stream_events(self.stream_request(request)).await?;
|
||||
let stream = futures_util::StreamExt::map(response.into_inner(), |result| {
|
||||
result.map_err(Error::from)
|
||||
});
|
||||
@@ -120,4 +123,13 @@ impl GatewayClient {
|
||||
request.set_timeout(self.call_timeout);
|
||||
request
|
||||
}
|
||||
|
||||
fn stream_request<T>(&self, message: T) -> Request<T> {
|
||||
let mut request = Request::new(message);
|
||||
if let Some(timeout) = self.stream_timeout {
|
||||
request.set_timeout(timeout);
|
||||
}
|
||||
|
||||
request
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use thiserror::Error as ThisError;
|
||||
use tonic::Code;
|
||||
|
||||
use crate::generated::mxaccess_gateway::v1::{MxCommandReply, ProtocolStatusCode};
|
||||
use crate::generated::mxaccess_gateway::v1::{MxCommandReply, ProtocolStatus, ProtocolStatusCode};
|
||||
|
||||
#[derive(Debug, ThisError)]
|
||||
pub enum Error {
|
||||
@@ -47,6 +47,13 @@ pub enum Error {
|
||||
|
||||
#[error("gateway command failed: {0}")]
|
||||
Command(#[from] Box<CommandError>),
|
||||
|
||||
#[error("gateway {operation} failed: {code:?}: {message}")]
|
||||
ProtocolStatus {
|
||||
operation: &'static str,
|
||||
code: ProtocolStatusCode,
|
||||
message: String,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
@@ -125,6 +132,27 @@ pub fn ensure_command_success(reply: MxCommandReply) -> Result<MxCommandReply, E
|
||||
}
|
||||
}
|
||||
|
||||
pub fn ensure_protocol_success(
|
||||
operation: &'static str,
|
||||
status: Option<&ProtocolStatus>,
|
||||
) -> Result<(), Error> {
|
||||
let code = status
|
||||
.and_then(|status| ProtocolStatusCode::try_from(status.code).ok())
|
||||
.unwrap_or(ProtocolStatusCode::Unspecified);
|
||||
|
||||
if code == ProtocolStatusCode::Ok {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(Error::ProtocolStatus {
|
||||
operation,
|
||||
code,
|
||||
message: status
|
||||
.map(|status| status.message.clone())
|
||||
.unwrap_or_default(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
fn redact_credentials(message: &str) -> String {
|
||||
message
|
||||
.split_whitespace()
|
||||
|
||||
@@ -13,6 +13,7 @@ pub struct ClientOptions {
|
||||
server_name_override: Option<String>,
|
||||
connect_timeout: Duration,
|
||||
call_timeout: Duration,
|
||||
stream_timeout: Option<Duration>,
|
||||
}
|
||||
|
||||
impl ClientOptions {
|
||||
@@ -25,6 +26,7 @@ impl ClientOptions {
|
||||
server_name_override: None,
|
||||
connect_timeout: Duration::from_secs(10),
|
||||
call_timeout: Duration::from_secs(30),
|
||||
stream_timeout: None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -58,6 +60,11 @@ impl ClientOptions {
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_stream_timeout(mut self, stream_timeout: Duration) -> Self {
|
||||
self.stream_timeout = Some(stream_timeout);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn endpoint(&self) -> &str {
|
||||
&self.endpoint
|
||||
}
|
||||
@@ -85,6 +92,10 @@ impl ClientOptions {
|
||||
pub fn call_timeout(&self) -> Duration {
|
||||
self.call_timeout
|
||||
}
|
||||
|
||||
pub fn stream_timeout(&self) -> Option<Duration> {
|
||||
self.stream_timeout
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for ClientOptions {
|
||||
@@ -104,6 +115,7 @@ impl fmt::Debug for ClientOptions {
|
||||
.field("server_name_override", &self.server_name_override)
|
||||
.field("connect_timeout", &self.connect_timeout)
|
||||
.field("call_timeout", &self.call_timeout)
|
||||
.field("stream_timeout", &self.stream_timeout)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use crate::client::{EventStream, GatewayClient};
|
||||
use crate::error::Error;
|
||||
use crate::error::{ensure_protocol_success, Error};
|
||||
use crate::generated::mxaccess_gateway::v1::mx_command::Payload;
|
||||
use crate::generated::mxaccess_gateway::v1::mx_command_reply;
|
||||
use crate::generated::mxaccess_gateway::v1::{
|
||||
@@ -11,6 +11,8 @@ use crate::generated::mxaccess_gateway::v1::{
|
||||
};
|
||||
use crate::value::MxValue;
|
||||
|
||||
const MAX_BULK_ITEMS: usize = 1_000;
|
||||
|
||||
/// Session identifier returned by the gateway.
|
||||
#[derive(Clone)]
|
||||
pub struct Session {
|
||||
@@ -40,12 +42,14 @@ impl Session {
|
||||
}
|
||||
|
||||
pub async fn close(&self) -> Result<(), Error> {
|
||||
self.client
|
||||
let reply = self
|
||||
.client
|
||||
.close_session_raw(CloseSessionRequest {
|
||||
session_id: self.id.clone(),
|
||||
client_correlation_id: "rust-client-close-session".to_owned(),
|
||||
})
|
||||
.await?;
|
||||
ensure_protocol_success("close session", reply.protocol_status.as_ref())?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -137,6 +141,7 @@ impl Session {
|
||||
server_handle: i32,
|
||||
tag_addresses: Vec<String>,
|
||||
) -> Result<Vec<SubscribeResult>, Error> {
|
||||
ensure_bulk_size("tag_addresses", tag_addresses.len())?;
|
||||
let reply = self
|
||||
.invoke(
|
||||
MxCommandKind::AddItemBulk,
|
||||
@@ -155,6 +160,7 @@ impl Session {
|
||||
server_handle: i32,
|
||||
item_handles: Vec<i32>,
|
||||
) -> Result<Vec<SubscribeResult>, Error> {
|
||||
ensure_bulk_size("item_handles", item_handles.len())?;
|
||||
let reply = self
|
||||
.invoke(
|
||||
MxCommandKind::AdviseItemBulk,
|
||||
@@ -173,6 +179,7 @@ impl Session {
|
||||
server_handle: i32,
|
||||
item_handles: Vec<i32>,
|
||||
) -> Result<Vec<SubscribeResult>, Error> {
|
||||
ensure_bulk_size("item_handles", item_handles.len())?;
|
||||
let reply = self
|
||||
.invoke(
|
||||
MxCommandKind::RemoveItemBulk,
|
||||
@@ -191,6 +198,7 @@ impl Session {
|
||||
server_handle: i32,
|
||||
item_handles: Vec<i32>,
|
||||
) -> Result<Vec<SubscribeResult>, Error> {
|
||||
ensure_bulk_size("item_handles", item_handles.len())?;
|
||||
let reply = self
|
||||
.invoke(
|
||||
MxCommandKind::UnAdviseItemBulk,
|
||||
@@ -209,6 +217,7 @@ impl Session {
|
||||
server_handle: i32,
|
||||
tag_addresses: Vec<String>,
|
||||
) -> Result<Vec<SubscribeResult>, Error> {
|
||||
ensure_bulk_size("tag_addresses", tag_addresses.len())?;
|
||||
let reply = self
|
||||
.invoke(
|
||||
MxCommandKind::SubscribeBulk,
|
||||
@@ -227,6 +236,7 @@ impl Session {
|
||||
server_handle: i32,
|
||||
item_handles: Vec<i32>,
|
||||
) -> Result<Vec<SubscribeResult>, Error> {
|
||||
ensure_bulk_size("item_handles", item_handles.len())?;
|
||||
let reply = self
|
||||
.invoke(
|
||||
MxCommandKind::UnsubscribeBulk,
|
||||
@@ -327,6 +337,17 @@ impl Session {
|
||||
}
|
||||
}
|
||||
|
||||
fn ensure_bulk_size(name: &'static str, len: usize) -> Result<(), Error> {
|
||||
if len > MAX_BULK_ITEMS {
|
||||
Err(Error::InvalidArgument {
|
||||
name: name.to_owned(),
|
||||
detail: format!("bulk commands are limited to {MAX_BULK_ITEMS} item(s)"),
|
||||
})
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
fn register_server_handle(reply: &MxCommandReply) -> i32 {
|
||||
match reply.payload.as_ref() {
|
||||
Some(mx_command_reply::Payload::Register(register)) => register.server_handle,
|
||||
|
||||
@@ -0,0 +1,294 @@
|
||||
# Dashboard Interface Design
|
||||
|
||||
This guide describes the visual and interaction patterns used by the MXAccess
|
||||
Gateway dashboard so the same interface style can be reused in other
|
||||
operations-focused projects.
|
||||
|
||||
## Design Goal
|
||||
|
||||
The dashboard is an operational interface, not a landing page. It prioritizes
|
||||
fast scanning, low visual noise, and stable layouts while live data changes.
|
||||
The design uses Bootstrap for common behavior and a small local stylesheet for
|
||||
project identity, spacing, and status presentation.
|
||||
|
||||
Use this style for applications where users repeatedly check system state,
|
||||
compare rows, inspect details, and diagnose faults. Avoid promotional layouts,
|
||||
large hero areas, decorative imagery, or oversized cards that reduce data
|
||||
density.
|
||||
|
||||
## Visual Language
|
||||
|
||||
The interface uses a quiet, work-focused visual system:
|
||||
|
||||
- A light gray page background separates the application shell from white data
|
||||
surfaces.
|
||||
- White cards and sections carry the actual operational content.
|
||||
- Borders define structure more often than shadows.
|
||||
- Accent color is reserved for metric values and important numeric signals.
|
||||
- Bootstrap status badges provide state color without custom status art.
|
||||
- Tables remain compact and responsive so long identifiers and timestamps stay
|
||||
readable.
|
||||
|
||||
The resulting page should look like a control surface: restrained, predictable,
|
||||
and dense enough for repeated use.
|
||||
|
||||
## Layout Structure
|
||||
|
||||
Every page follows the same structure:
|
||||
|
||||
1. A top navigation bar with the product or service name on the left.
|
||||
2. A full-width `container-fluid` content area.
|
||||
3. A page header with the page title, short context text, and optional status
|
||||
badge.
|
||||
4. Metric cards when a page has top-level numeric state.
|
||||
5. Bordered content sections for tables, details, faults, or empty states.
|
||||
|
||||
The shell does not use a sidebar. A horizontal navigation bar is enough for the
|
||||
current page count and keeps the content width available for tables.
|
||||
|
||||
```html
|
||||
<div class="dashboard-shell">
|
||||
<nav class="navbar navbar-expand-lg bg-body border-bottom dashboard-navbar">
|
||||
<!-- brand, page links, sign-out action -->
|
||||
</nav>
|
||||
<main class="container-fluid dashboard-content">
|
||||
<!-- page header, metric grid, sections -->
|
||||
</main>
|
||||
</div>
|
||||
```
|
||||
|
||||
## Color Tokens
|
||||
|
||||
Use a small token set and let Bootstrap provide the rest. The current dashboard
|
||||
uses these local tokens:
|
||||
|
||||
```css
|
||||
:root {
|
||||
--mxgw-surface: #f7f8fa;
|
||||
--mxgw-border: #d8dee6;
|
||||
--mxgw-ink-muted: #667085;
|
||||
--mxgw-accent: #146c64;
|
||||
}
|
||||
```
|
||||
|
||||
| Token | Purpose |
|
||||
|-------|---------|
|
||||
| `--mxgw-surface` | Page background behind all content. |
|
||||
| `--mxgw-border` | Borders on cards, tables, sections, and empty states. |
|
||||
| `--mxgw-ink-muted` | Secondary labels, details, and empty-state text. |
|
||||
| `--mxgw-accent` | Metric values and important numeric summaries. |
|
||||
|
||||
Keep the palette small. Add new colors only when they encode state or improve
|
||||
readability. Prefer Bootstrap badge classes for states such as ready, closing,
|
||||
closed, and faulted.
|
||||
|
||||
## Typography
|
||||
|
||||
Typography stays compact and consistent:
|
||||
|
||||
- Page headings use `1.35rem`, weight `650`, and normal letter spacing.
|
||||
- Section headings use the same size as page headings when they introduce a
|
||||
table or details group.
|
||||
- Metric labels use uppercase text at `.78rem` and weight `650`.
|
||||
- Metric values use `1.7rem`, weight `700`, and the accent color.
|
||||
- Body and table text inherit Bootstrap defaults for readability.
|
||||
|
||||
Do not scale text with viewport width. Long values use `overflow-wrap:
|
||||
anywhere` so session IDs, paths, and fault messages do not break the layout.
|
||||
|
||||
## Spacing And Shape
|
||||
|
||||
The dashboard uses modest spacing:
|
||||
|
||||
- Page content has `1.25rem` padding on desktop and `.75rem` on small screens.
|
||||
- Metric grids use `.75rem` gaps.
|
||||
- Content sections start with a top border and `1rem` top padding.
|
||||
- Cards and empty states use Bootstrap's small radius shape, `.375rem`.
|
||||
- Metric cards have no shadow.
|
||||
|
||||
This keeps information grouped without turning each section into a decorative
|
||||
panel. Use cards for repeated metric summaries, login forms, and individual
|
||||
items. Use unframed sections with a top border for page-level groups.
|
||||
|
||||
## Navigation
|
||||
|
||||
Navigation is a Bootstrap responsive navbar. It includes:
|
||||
|
||||
- Brand text for the service name.
|
||||
- Short page labels: `Overview`, `Sessions`, `Workers`, `Events`, `Settings`.
|
||||
- Active route styling through `NavLink`.
|
||||
- A right-aligned sign-out button when authentication is enabled.
|
||||
|
||||
Keep navigation labels short. Operational users should be able to predict what
|
||||
each page contains without reading explanatory copy.
|
||||
|
||||
## Page Headers
|
||||
|
||||
Each page starts with a `dashboard-page-header`:
|
||||
|
||||
- The title is the primary anchor.
|
||||
- A single secondary line gives timestamp, row count, or configuration context.
|
||||
- A status badge appears on the right when the page has an overall state.
|
||||
|
||||
On narrow screens, the header stacks vertically. This prevents long context
|
||||
text or status badges from overlapping the title.
|
||||
|
||||
```html
|
||||
<div class="dashboard-page-header">
|
||||
<div>
|
||||
<h1>Overview</h1>
|
||||
<div class="text-secondary">Generated 2026-04-27 17:30:00</div>
|
||||
</div>
|
||||
<span class="badge text-bg-success">Healthy</span>
|
||||
</div>
|
||||
```
|
||||
|
||||
## Metric Cards
|
||||
|
||||
Metric cards summarize numeric state at the top of overview and diagnostic
|
||||
pages. They use Bootstrap cards with a local `metric-card` class:
|
||||
|
||||
- Label: uppercase, muted, compact.
|
||||
- Value: large enough to scan, accent colored, wraps safely.
|
||||
- Detail: optional muted text for version, rate context, or explanatory state.
|
||||
|
||||
Use auto-fit CSS grid tracks so the cards fill available width without custom
|
||||
breakpoints:
|
||||
|
||||
```css
|
||||
.metric-grid {
|
||||
display: grid;
|
||||
gap: .75rem;
|
||||
grid-template-columns: repeat(auto-fit, minmax(12rem, 1fr));
|
||||
}
|
||||
|
||||
.metric-grid.compact {
|
||||
grid-template-columns: repeat(auto-fit, minmax(10rem, 1fr));
|
||||
}
|
||||
```
|
||||
|
||||
Metrics should be formatted before rendering. Counts use thousands separators,
|
||||
durations use stable units, and missing values render as `-`.
|
||||
|
||||
## Tables
|
||||
|
||||
Tables are the main information surface. Use Bootstrap `table table-sm` with a
|
||||
local `dashboard-table` class:
|
||||
|
||||
- `table-sm` keeps rows dense.
|
||||
- `align-middle` improves status badge alignment.
|
||||
- `table-responsive` wraps every table that can exceed the viewport.
|
||||
- Header cells use weight `650` and no wrapping.
|
||||
- Body cells allow wrapping so identifiers, paths, and messages stay visible.
|
||||
- Detail tables reserve a fixed header width.
|
||||
|
||||
Use code formatting for machine identifiers such as session IDs, file paths,
|
||||
and protocol values. Link rows only where navigation is useful; avoid making
|
||||
entire rows clickable when a single identifier link is clearer.
|
||||
|
||||
## Status Badges
|
||||
|
||||
Status uses Bootstrap badge classes with a small mapping layer:
|
||||
|
||||
| State | Badge class |
|
||||
|-------|-------------|
|
||||
| `Ready`, `Healthy` | `text-bg-success` |
|
||||
| `Creating`, `StartingWorker`, `WaitingForPipe`, `InitializingWorker`, `Closing` | `text-bg-info` |
|
||||
| `Closed` | `text-bg-secondary` |
|
||||
| `Faulted` | `text-bg-danger` |
|
||||
| Unknown state | `text-bg-light text-dark border` |
|
||||
|
||||
Keep status text literal. Operators benefit from seeing the same state names
|
||||
that appear in logs and APIs.
|
||||
|
||||
## Empty And Loading States
|
||||
|
||||
Empty states are explicit and quiet. They use a white background, dashed border,
|
||||
small radius, muted text, and one sentence:
|
||||
|
||||
```html
|
||||
<div class="empty-state">No worker processes are attached.</div>
|
||||
```
|
||||
|
||||
Loading states use the same component shape. Avoid spinners for snapshot pages
|
||||
that update on a timer; a stable text placeholder is less distracting.
|
||||
|
||||
## Detail Pages
|
||||
|
||||
Detail pages use stacked sections instead of nested cards:
|
||||
|
||||
- The page header identifies the selected entity.
|
||||
- The first section shows entity metadata in a two-column details table.
|
||||
- Additional sections show related runtime state, such as worker metadata.
|
||||
- Missing entities render a single section with a concise not-found message.
|
||||
|
||||
This structure keeps details comparable across pages and avoids card nesting.
|
||||
|
||||
## Responsive Behavior
|
||||
|
||||
The dashboard uses one small-screen breakpoint:
|
||||
|
||||
```css
|
||||
@media (max-width: 700px) {
|
||||
.dashboard-content {
|
||||
padding: .75rem;
|
||||
}
|
||||
|
||||
.dashboard-page-header {
|
||||
align-items: flex-start;
|
||||
flex-direction: column;
|
||||
}
|
||||
|
||||
.details-table th {
|
||||
width: 9rem;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Do not hide important columns by default. Use horizontal table scrolling for
|
||||
dense operational data, and reserve column hiding for data that is clearly
|
||||
duplicative.
|
||||
|
||||
## Data Formatting
|
||||
|
||||
Use a small display helper instead of formatting inline in every component.
|
||||
The helper should provide consistent rendering for:
|
||||
|
||||
- empty text as `-`,
|
||||
- counts with thousands separators,
|
||||
- dates and times in a consistent local or configured format,
|
||||
- durations in stable units,
|
||||
- metric lookup by name and dimension.
|
||||
|
||||
Centralizing formatting prevents visual drift between overview cards, tables,
|
||||
and detail pages.
|
||||
|
||||
## Security And Redaction
|
||||
|
||||
The interface is read-only unless an explicit administrative action is
|
||||
designed. It should not display secrets or raw credential-bearing values.
|
||||
|
||||
Apply redaction before values reach Razor components. The UI treats redacted
|
||||
values as normal display text; it does not need to know why a value is hidden.
|
||||
This keeps security policy in the dashboard projection layer rather than in
|
||||
markup.
|
||||
|
||||
## Replication Checklist
|
||||
|
||||
Use this checklist when applying the design to another project:
|
||||
|
||||
- Define four local tokens: surface, border, muted ink, and accent.
|
||||
- Use a Bootstrap top navbar with short route labels.
|
||||
- Keep page content inside a full-width fluid container.
|
||||
- Start every page with the same header structure.
|
||||
- Put primary numeric state in `metric-grid` cards.
|
||||
- Put detailed runtime state in compact responsive tables.
|
||||
- Use status badges mapped from real domain states.
|
||||
- Use dashed bordered empty states for loading and no-data cases.
|
||||
- Use top-bordered sections for page groups instead of nested cards.
|
||||
- Centralize formatting and redaction outside Razor markup.
|
||||
- Keep the dashboard read-only until admin workflows have a separate design.
|
||||
|
||||
## Related Documentation
|
||||
|
||||
- [Gateway Dashboard Detailed Design](./gateway-dashboard-design.md)
|
||||
@@ -0,0 +1,153 @@
|
||||
# Gateway Configuration
|
||||
|
||||
This document describes every option bound under the `MxGateway` configuration
|
||||
section by `GatewayOptions`.
|
||||
|
||||
The gateway binds configuration at startup and validates it with
|
||||
`GatewayOptionsValidator`. Startup fails before the server listens when required
|
||||
paths, timeouts, queue sizes, enum values, or protocol values are invalid.
|
||||
|
||||
## Configuration Shape
|
||||
|
||||
```json
|
||||
{
|
||||
"MxGateway": {
|
||||
"Authentication": {
|
||||
"Mode": "ApiKey",
|
||||
"SqlitePath": "C:\\ProgramData\\MxGateway\\gateway-auth.db",
|
||||
"PepperSecretName": "MxGateway:ApiKeyPepper",
|
||||
"RunMigrationsOnStartup": true
|
||||
},
|
||||
"Worker": {
|
||||
"ExecutablePath": "src\\MxGateway.Worker\\bin\\x86\\Release\\MxGateway.Worker.exe",
|
||||
"WorkingDirectory": null,
|
||||
"RequiredArchitecture": "X86",
|
||||
"StartupTimeoutSeconds": 30,
|
||||
"StartupProbeRetryAttempts": 3,
|
||||
"StartupProbeRetryDelayMilliseconds": 250,
|
||||
"PipeConnectAttemptTimeoutMilliseconds": 2000,
|
||||
"ShutdownTimeoutSeconds": 10,
|
||||
"HeartbeatIntervalSeconds": 5,
|
||||
"HeartbeatGraceSeconds": 15,
|
||||
"MaxMessageBytes": 16777216
|
||||
},
|
||||
"Sessions": {
|
||||
"DefaultCommandTimeoutSeconds": 30,
|
||||
"MaxSessions": 64,
|
||||
"MaxPendingCommandsPerSession": 128,
|
||||
"AllowMultipleEventSubscribers": false
|
||||
},
|
||||
"Events": {
|
||||
"QueueCapacity": 10000,
|
||||
"BackpressurePolicy": "FailFast"
|
||||
},
|
||||
"Dashboard": {
|
||||
"Enabled": true,
|
||||
"PathBase": "/dashboard",
|
||||
"RequireAdminScope": true,
|
||||
"AllowAnonymousLocalhost": true,
|
||||
"SnapshotIntervalMilliseconds": 1000,
|
||||
"RecentFaultLimit": 100,
|
||||
"RecentSessionLimit": 200,
|
||||
"ShowTagValues": false
|
||||
},
|
||||
"Protocol": {
|
||||
"WorkerProtocolVersion": 1
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Environment variables use the normal .NET double-underscore form. For example,
|
||||
`MxGateway__Sessions__MaxSessions=20` overrides
|
||||
`MxGateway:Sessions:MaxSessions`.
|
||||
|
||||
## Authentication Options
|
||||
|
||||
| Option | Default | Description |
|
||||
|--------|---------|-------------|
|
||||
| `MxGateway:Authentication:Mode` | `ApiKey` | Selects public gRPC authentication. Supported values are `ApiKey` and `Disabled`. `Disabled` bypasses API-key verification and is for local development only. |
|
||||
| `MxGateway:Authentication:SqlitePath` | `C:\ProgramData\MxGateway\gateway-auth.db` | SQLite database path for API-key records and audit rows when API-key authentication is enabled. |
|
||||
| `MxGateway:Authentication:PepperSecretName` | `MxGateway:ApiKeyPepper` | Configuration key used to read the HMAC pepper for API-key secret hashing. The dashboard effective configuration redacts this value. |
|
||||
| `MxGateway:Authentication:RunMigrationsOnStartup` | `true` | Runs SQLite auth schema migrations at gateway startup when API-key authentication is enabled. |
|
||||
|
||||
When `Mode` is `ApiKey`, `SqlitePath` and `PepperSecretName` must be present.
|
||||
`SqlitePath` must be a valid filesystem path.
|
||||
|
||||
## Worker Options
|
||||
|
||||
| Option | Default | Description |
|
||||
|--------|---------|-------------|
|
||||
| `MxGateway:Worker:ExecutablePath` | `src\MxGateway.Worker\bin\x86\Release\MxGateway.Worker.exe` | Path to the x86 worker executable launched for each gateway session. The path must be valid and point to a `.exe` file. |
|
||||
| `MxGateway:Worker:WorkingDirectory` | `null` | Optional working directory for the worker process. When set, it must be a valid filesystem path. |
|
||||
| `MxGateway:Worker:RequiredArchitecture` | `X86` | Required Portable Executable architecture for the worker. Supported values are `X86` and `X64`; MXAccess parity uses `X86`. |
|
||||
| `MxGateway:Worker:StartupTimeoutSeconds` | `30` | Total startup budget for process launch, startup probe, pipe connect, handshake, and worker readiness. |
|
||||
| `MxGateway:Worker:StartupProbeRetryAttempts` | `3` | Number of retry attempts for transient worker startup probe failures before pipe connection and handshake continue. |
|
||||
| `MxGateway:Worker:StartupProbeRetryDelayMilliseconds` | `250` | Delay between transient startup probe retry attempts. |
|
||||
| `MxGateway:Worker:PipeConnectAttemptTimeoutMilliseconds` | `2000` | Per-attempt timeout used by the worker named-pipe connect retry path. The overall pipe connection still stays under the startup budget. |
|
||||
| `MxGateway:Worker:ShutdownTimeoutSeconds` | `10` | Grace period for worker shutdown before the gateway treats shutdown as failed and may kill the worker process tree. |
|
||||
| `MxGateway:Worker:HeartbeatIntervalSeconds` | `5` | Worker heartbeat send interval and gateway heartbeat check cadence input. |
|
||||
| `MxGateway:Worker:HeartbeatGraceSeconds` | `15` | Maximum age of the last worker heartbeat before the gateway faults the worker. This must be greater than or equal to `HeartbeatIntervalSeconds`. |
|
||||
| `MxGateway:Worker:MaxMessageBytes` | `16777216` | Maximum worker IPC frame payload size in bytes. The validator allows values from `1024` through `268435456`. |
|
||||
|
||||
`StartupProbeRetryAttempts`, `StartupProbeRetryDelayMilliseconds`,
|
||||
`PipeConnectAttemptTimeoutMilliseconds`, timeout values, heartbeat values, and
|
||||
`MaxMessageBytes` must be positive. `MaxMessageBytes` is intentionally bounded
|
||||
to avoid accidental large allocations from malformed or oversized frames.
|
||||
|
||||
## Session Options
|
||||
|
||||
| Option | Default | Description |
|
||||
|--------|---------|-------------|
|
||||
| `MxGateway:Sessions:DefaultCommandTimeoutSeconds` | `30` | Default timeout used while the gateway waits for a worker command reply when an open-session request does not provide a positive command timeout. |
|
||||
| `MxGateway:Sessions:MaxSessions` | `64` | Maximum number of concurrently open gateway sessions. Session opens reserve a slot atomically before worker creation. |
|
||||
| `MxGateway:Sessions:MaxPendingCommandsPerSession` | `128` | Maximum number of pending worker commands for one session. Excess commands fail fast instead of queueing indefinitely. |
|
||||
| `MxGateway:Sessions:AllowMultipleEventSubscribers` | `false` | Controls whether multiple `StreamEvents` subscribers may attach to one session. `true` is rejected until event fan-out is implemented. |
|
||||
|
||||
All numeric session options must be greater than zero. The current event stream
|
||||
implementation supports one active subscriber per session; this preserves event
|
||||
ordering and avoids competing consumers.
|
||||
|
||||
## Event Options
|
||||
|
||||
| Option | Default | Description |
|
||||
|--------|---------|-------------|
|
||||
| `MxGateway:Events:QueueCapacity` | `10000` | Capacity for bounded per-session event queues used by the gateway worker event channel and the public gRPC event stream queue. |
|
||||
| `MxGateway:Events:BackpressurePolicy` | `FailFast` | Event backpressure behavior. `FailFast` is the only supported value. |
|
||||
|
||||
`QueueCapacity` must be greater than zero. With `FailFast`, queue overflow
|
||||
faults the affected worker or session instead of silently dropping MXAccess
|
||||
events.
|
||||
|
||||
## Dashboard Options
|
||||
|
||||
| Option | Default | Description |
|
||||
|--------|---------|-------------|
|
||||
| `MxGateway:Dashboard:Enabled` | `true` | Enables Blazor Server dashboard route mapping. |
|
||||
| `MxGateway:Dashboard:PathBase` | `/dashboard` | Base path for dashboard routes. When the dashboard is enabled, this value is required and must start with `/`. |
|
||||
| `MxGateway:Dashboard:RequireAdminScope` | `true` | Requires API keys used for dashboard login to carry the `admin` scope. |
|
||||
| `MxGateway:Dashboard:AllowAnonymousLocalhost` | `true` | Allows loopback dashboard requests to bypass the dashboard cookie requirement for local development. Remote requests still require dashboard authentication. |
|
||||
| `MxGateway:Dashboard:SnapshotIntervalMilliseconds` | `1000` | Dashboard snapshot refresh interval used by realtime Blazor pages. |
|
||||
| `MxGateway:Dashboard:RecentFaultLimit` | `100` | Maximum number of fault summaries projected into each dashboard snapshot. |
|
||||
| `MxGateway:Dashboard:RecentSessionLimit` | `200` | Maximum number of session summaries projected into each dashboard snapshot. |
|
||||
| `MxGateway:Dashboard:ShowTagValues` | `false` | Reserved display control for tag values. The dashboard does not show full tag values by default. |
|
||||
|
||||
`SnapshotIntervalMilliseconds` must be greater than zero. `RecentFaultLimit`
|
||||
and `RecentSessionLimit` must be greater than or equal to zero.
|
||||
|
||||
## Protocol Options
|
||||
|
||||
| Option | Default | Description |
|
||||
|--------|---------|-------------|
|
||||
| `MxGateway:Protocol:WorkerProtocolVersion` | `1` | Worker IPC protocol version expected by the gateway and worker. This must match `GatewayContractInfo.WorkerProtocolVersion`. |
|
||||
|
||||
The protocol option is exposed for diagnostics and explicit deployment
|
||||
configuration, not for compatibility negotiation. A mismatch fails validation
|
||||
at startup.
|
||||
|
||||
## Related Documentation
|
||||
|
||||
- [Gateway Process Detailed Design](./gateway-process-design.md)
|
||||
- [Gateway Dashboard Detailed Design](./gateway-dashboard-design.md)
|
||||
- [Worker Process Launcher](./WorkerProcessLauncher.md)
|
||||
- [Worker Frame Protocol](./WorkerFrameProtocol.md)
|
||||
@@ -247,6 +247,10 @@ Each client should expose event streaming as the idiomatic streaming primitive:
|
||||
Events must preserve gateway order. Libraries should not reorder, coalesce, or
|
||||
drop events by default.
|
||||
|
||||
Long-lived event streams do not inherit unary call deadlines. Clients apply the
|
||||
default call timeout to unary operations only, and streams run until the caller
|
||||
cancels them or an explicit stream timeout is configured.
|
||||
|
||||
The event surface must include:
|
||||
|
||||
- `OnDataChange`
|
||||
|
||||
@@ -336,6 +336,9 @@ Recommended visual language:
|
||||
If charts are added later, prefer simple server-generated data tables first. Do
|
||||
not add a JavaScript charting dependency without a specific need.
|
||||
|
||||
The reusable visual rules for replicating this interface in other projects are
|
||||
documented in [Dashboard Interface Design](./DashboardInterfaceDesign.md).
|
||||
|
||||
## Testing
|
||||
|
||||
Dashboard unit/component tests should cover:
|
||||
|
||||
@@ -361,13 +361,13 @@ worker startup, and removes the session if startup fails. A successful
|
||||
`OpenSession` attaches the ready `IWorkerClient` and transitions the session to
|
||||
`Ready`.
|
||||
|
||||
Only `Ready` sessions accept command and event operations. `CloseSession` is
|
||||
idempotent for sessions still known to the registry: the first close shuts down
|
||||
the worker, and later closes return the final `Closed` state. Lease handling is
|
||||
exposed as a session hook so a monitor can close expired sessions without
|
||||
embedding lease policy in the worker client. Gateway shutdown walks the
|
||||
registry, closes each known session, and kills a worker if graceful shutdown
|
||||
fails.
|
||||
Only `Ready` sessions accept command and event operations. `CloseSession` shuts
|
||||
down the worker, disposes the worker client, and removes the session from the
|
||||
registry so closed sessions do not retain pipe or process handles. A later close
|
||||
for the same id returns `SessionNotFound`. Lease handling is exposed as a
|
||||
session hook so a monitor can close expired sessions without embedding lease
|
||||
policy in the worker client. Gateway shutdown walks the registry, closes each
|
||||
known session, and kills a worker if graceful shutdown fails.
|
||||
|
||||
## Worker Launch
|
||||
|
||||
@@ -813,6 +813,11 @@ It emits .NET `Meter` instruments for collectors and keeps a
|
||||
the dashboard needs current counters and queue depths without depending on a
|
||||
specific metrics exporter.
|
||||
|
||||
Event metrics use low-cardinality tags such as event family. Per-session event
|
||||
counts are kept only in the in-process snapshot for active dashboard sessions
|
||||
and are purged when the session is removed. Worker event queue depth and gRPC
|
||||
event stream queue depth are reported as separate gauges.
|
||||
|
||||
HTTP request handling uses `UseGatewayRequestLoggingScope()` to attach common
|
||||
structured log fields when request metadata is present:
|
||||
|
||||
@@ -842,6 +847,8 @@ Suggested configuration shape:
|
||||
},
|
||||
"Worker": {
|
||||
"ExecutablePath": "src/MxGateway.Worker/bin/x86/Release/MxGateway.Worker.exe",
|
||||
"WorkingDirectory": null,
|
||||
"RequiredArchitecture": "X86",
|
||||
"StartupTimeoutSeconds": 30,
|
||||
"StartupProbeRetryAttempts": 3,
|
||||
"StartupProbeRetryDelayMilliseconds": 250,
|
||||
@@ -854,6 +861,7 @@ Suggested configuration shape:
|
||||
"Sessions": {
|
||||
"DefaultCommandTimeoutSeconds": 30,
|
||||
"MaxSessions": 64,
|
||||
"MaxPendingCommandsPerSession": 128,
|
||||
"AllowMultipleEventSubscribers": false
|
||||
},
|
||||
"Events": {
|
||||
@@ -869,6 +877,9 @@ Suggested configuration shape:
|
||||
"RecentFaultLimit": 100,
|
||||
"RecentSessionLimit": 200,
|
||||
"ShowTagValues": false
|
||||
},
|
||||
"Protocol": {
|
||||
"WorkerProtocolVersion": 1
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -888,6 +899,9 @@ diagnostics, so it redacts secret-related fields such as
|
||||
`Authentication:PepperSecretName` and does not include raw API keys or key
|
||||
material.
|
||||
|
||||
The complete option reference, including defaults and validation rules, is in
|
||||
[Gateway Configuration](./GatewayConfiguration.md).
|
||||
|
||||
## Galaxy Repository Metadata
|
||||
|
||||
Galaxy hierarchy and tag metadata can be discovered through SQL Server when
|
||||
|
||||
@@ -0,0 +1,177 @@
|
||||
[CmdletBinding()]
|
||||
param(
|
||||
[string]$Endpoint = "http://127.0.0.1:5001",
|
||||
[string]$ApiKeyEnv = "MXGATEWAY_API_KEY",
|
||||
[string]$ApiKey,
|
||||
[int]$ClientCount = 5,
|
||||
[int]$MachineStart = 1,
|
||||
[int]$MachineEnd = 20,
|
||||
[string]$Attribute = "TestChangingInt",
|
||||
[int]$MaxEvents = [int]::MaxValue,
|
||||
[int]$StreamCallTimeoutSeconds = 86400,
|
||||
[string]$LogDirectory = (Join-Path (Get-Location) "artifacts\rust-testchangingint-subscribers")
|
||||
)
|
||||
|
||||
Set-StrictMode -Version Latest
|
||||
$ErrorActionPreference = "Stop"
|
||||
|
||||
if ($ClientCount -le 0) {
|
||||
throw "ClientCount must be greater than zero."
|
||||
}
|
||||
|
||||
if ($MachineStart -lt 1 -or $MachineEnd -lt $MachineStart) {
|
||||
throw "MachineStart must be at least 1 and MachineEnd must be greater than or equal to MachineStart."
|
||||
}
|
||||
|
||||
if ($StreamCallTimeoutSeconds -le 0) {
|
||||
throw "StreamCallTimeoutSeconds must be greater than zero."
|
||||
}
|
||||
|
||||
$repoRoot = Split-Path -Parent $PSScriptRoot
|
||||
$rustRoot = Join-Path $repoRoot "clients\rust"
|
||||
$mxgwExe = Join-Path $rustRoot "target\debug\mxgw.exe"
|
||||
$sessionIds = New-Object System.Collections.Generic.List[string]
|
||||
$streamProcesses = New-Object System.Collections.Generic.List[System.Diagnostics.Process]
|
||||
|
||||
function Get-ConnectionArgs {
|
||||
$args = @("--endpoint", $Endpoint, "--plaintext")
|
||||
if (-not [string]::IsNullOrWhiteSpace($ApiKey)) {
|
||||
$args += @("--api-key", $ApiKey)
|
||||
} else {
|
||||
$args += @("--api-key-env", $ApiKeyEnv)
|
||||
}
|
||||
|
||||
return $args
|
||||
}
|
||||
|
||||
function Invoke-MxgwJson {
|
||||
param(
|
||||
[Parameter(Mandatory = $true)]
|
||||
[string[]]$Arguments
|
||||
)
|
||||
|
||||
$output = & $mxgwExe @Arguments 2>&1
|
||||
if ($LASTEXITCODE -ne 0) {
|
||||
throw "mxgw failed with exit code $LASTEXITCODE for arguments: $($Arguments -join ' ')`n$output"
|
||||
}
|
||||
|
||||
$jsonText = ($output | Where-Object { -not [string]::IsNullOrWhiteSpace($_) }) -join "`n"
|
||||
if ([string]::IsNullOrWhiteSpace($jsonText)) {
|
||||
throw "mxgw returned no JSON for arguments: $($Arguments -join ' ')"
|
||||
}
|
||||
|
||||
return $jsonText | ConvertFrom-Json
|
||||
}
|
||||
|
||||
function Close-SessionQuietly {
|
||||
param([string]$SessionId)
|
||||
|
||||
if ([string]::IsNullOrWhiteSpace($SessionId)) {
|
||||
return
|
||||
}
|
||||
|
||||
try {
|
||||
$args = @("close-session") + (Get-ConnectionArgs) + @("--session-id", $SessionId, "--json")
|
||||
[void](Invoke-MxgwJson -Arguments $args)
|
||||
Write-Host "Closed session $SessionId"
|
||||
} catch {
|
||||
Write-Warning "Failed to close session ${SessionId}: $($_.Exception.Message)"
|
||||
}
|
||||
}
|
||||
|
||||
function Stop-StreamProcessQuietly {
|
||||
param([System.Diagnostics.Process]$Process)
|
||||
|
||||
if ($null -eq $Process -or $Process.HasExited) {
|
||||
return
|
||||
}
|
||||
|
||||
try {
|
||||
Stop-Process -Id $Process.Id -Force -ErrorAction Stop
|
||||
Write-Host "Stopped stream process $($Process.Id)"
|
||||
} catch {
|
||||
Write-Warning "Failed to stop stream process $($Process.Id): $($_.Exception.Message)"
|
||||
}
|
||||
}
|
||||
|
||||
New-Item -ItemType Directory -Force -Path $LogDirectory | Out-Null
|
||||
|
||||
Write-Host "Building Rust CLI..."
|
||||
Push-Location $rustRoot
|
||||
try {
|
||||
cargo build -p mxgw-cli
|
||||
} finally {
|
||||
Pop-Location
|
||||
}
|
||||
|
||||
if (-not (Test-Path -LiteralPath $mxgwExe)) {
|
||||
throw "Rust CLI executable was not found at $mxgwExe"
|
||||
}
|
||||
|
||||
$tags = for ($machine = $MachineStart; $machine -le $MachineEnd; $machine++) {
|
||||
"TestMachine_{0:D3}.{1}" -f $machine, $Attribute
|
||||
}
|
||||
|
||||
try {
|
||||
for ($clientIndex = 0; $clientIndex -lt $ClientCount; $clientIndex++) {
|
||||
$clientNumber = $clientIndex + 1
|
||||
$clientTags = for ($tagIndex = $clientIndex; $tagIndex -lt $tags.Count; $tagIndex += $ClientCount) {
|
||||
$tags[$tagIndex]
|
||||
}
|
||||
|
||||
if ($clientTags.Count -eq 0) {
|
||||
continue
|
||||
}
|
||||
|
||||
$clientName = "mxgw-rust-changingint-$clientNumber"
|
||||
Write-Host "Opening session for client $clientNumber with $($clientTags.Count) tag(s)..."
|
||||
$openArgs = @("open-session") + (Get-ConnectionArgs) + @("--client-name", $clientName, "--json")
|
||||
$open = Invoke-MxgwJson -Arguments $openArgs
|
||||
$sessionId = [string]$open.sessionId
|
||||
$sessionIds.Add($sessionId)
|
||||
|
||||
$registerArgs = @("register") + (Get-ConnectionArgs) + @("--session-id", $sessionId, "--client-name", $clientName, "--json")
|
||||
$register = Invoke-MxgwJson -Arguments $registerArgs
|
||||
$serverHandle = [int]$register.serverHandle
|
||||
|
||||
foreach ($tag in $clientTags) {
|
||||
$addArgs = @("add-item") + (Get-ConnectionArgs) + @("--session-id", $sessionId, "--server-handle", $serverHandle, "--item", $tag, "--json")
|
||||
$add = Invoke-MxgwJson -Arguments $addArgs
|
||||
$itemHandle = [int]$add.itemHandle
|
||||
|
||||
$adviseArgs = @("advise") + (Get-ConnectionArgs) + @("--session-id", $sessionId, "--server-handle", $serverHandle, "--item-handle", $itemHandle, "--json")
|
||||
[void](Invoke-MxgwJson -Arguments $adviseArgs)
|
||||
Write-Host "Client $clientNumber subscribed $tag itemHandle=$itemHandle"
|
||||
}
|
||||
|
||||
$stdout = Join-Path $LogDirectory ("client-{0:D2}.stdout.log" -f $clientNumber)
|
||||
$stderr = Join-Path $LogDirectory ("client-{0:D2}.stderr.log" -f $clientNumber)
|
||||
$streamArgs = @("stream-events") + (Get-ConnectionArgs) + @(
|
||||
"--session-id", $sessionId,
|
||||
"--max-events", $MaxEvents.ToString(),
|
||||
"--call-timeout-seconds", $StreamCallTimeoutSeconds.ToString(),
|
||||
"--json")
|
||||
$process = Start-Process -FilePath $mxgwExe -ArgumentList $streamArgs -WorkingDirectory $rustRoot -WindowStyle Hidden -RedirectStandardOutput $stdout -RedirectStandardError $stderr -PassThru
|
||||
$streamProcesses.Add($process)
|
||||
Write-Host "Client $clientNumber streaming session $sessionId in process $($process.Id); logs: $stdout"
|
||||
}
|
||||
|
||||
Write-Host "Started $($streamProcesses.Count) Rust stream client(s). Press Ctrl+C to stop and close sessions."
|
||||
while ($true) {
|
||||
Start-Sleep -Seconds 5
|
||||
foreach ($process in @($streamProcesses)) {
|
||||
if ($process.HasExited) {
|
||||
throw "Stream process $($process.Id) exited with code $($process.ExitCode). Check $LogDirectory."
|
||||
}
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
Write-Host "Stopping Rust stream clients and closing gateway sessions..."
|
||||
foreach ($process in @($streamProcesses)) {
|
||||
Stop-StreamProcessQuietly -Process $process
|
||||
}
|
||||
|
||||
foreach ($sessionId in @($sessionIds)) {
|
||||
Close-SessionQuietly -SessionId $sessionId
|
||||
}
|
||||
}
|
||||
@@ -125,6 +125,16 @@ public sealed class GatewayOptionsValidator : IValidateOptions<GatewayOptions>
|
||||
"MxGateway:Sessions:DefaultCommandTimeoutSeconds must be greater than zero.",
|
||||
failures);
|
||||
AddIfNotPositive(options.MaxSessions, "MxGateway:Sessions:MaxSessions must be greater than zero.", failures);
|
||||
AddIfNotPositive(
|
||||
options.MaxPendingCommandsPerSession,
|
||||
"MxGateway:Sessions:MaxPendingCommandsPerSession must be greater than zero.",
|
||||
failures);
|
||||
|
||||
if (options.AllowMultipleEventSubscribers)
|
||||
{
|
||||
failures.Add(
|
||||
"MxGateway:Sessions:AllowMultipleEventSubscribers is not supported until event fan-out is implemented.");
|
||||
}
|
||||
}
|
||||
|
||||
private static void ValidateEvents(EventOptions options, List<string> failures)
|
||||
|
||||
@@ -6,5 +6,7 @@ public sealed class SessionOptions
|
||||
|
||||
public int MaxSessions { get; init; } = 64;
|
||||
|
||||
public int MaxPendingCommandsPerSession { get; init; } = 128;
|
||||
|
||||
public bool AllowMultipleEventSubscribers { get; init; }
|
||||
}
|
||||
|
||||
@@ -21,6 +21,11 @@ public static class DashboardDisplay
|
||||
return string.IsNullOrWhiteSpace(value) ? "-" : value;
|
||||
}
|
||||
|
||||
public static string Count(long value)
|
||||
{
|
||||
return value.ToString("N0", System.Globalization.CultureInfo.InvariantCulture);
|
||||
}
|
||||
|
||||
public static long MetricValue(DashboardSnapshot snapshot, string name, string? dimension = null)
|
||||
{
|
||||
return snapshot.Metrics.FirstOrDefault(metric =>
|
||||
|
||||
@@ -20,13 +20,13 @@ else
|
||||
|
||||
<section class="metric-grid">
|
||||
<MetricCard Label="Uptime" Value="@DashboardDisplay.Duration(Snapshot.GatewayUptime)" Detail="@Snapshot.GatewayVersion" />
|
||||
<MetricCard Label="Open Sessions" Value="@DashboardDisplay.MetricValue(Snapshot, "mxgateway.sessions.open").ToString(System.Globalization.CultureInfo.InvariantCulture)" />
|
||||
<MetricCard Label="Workers Running" Value="@DashboardDisplay.MetricValue(Snapshot, "mxgateway.workers.running").ToString(System.Globalization.CultureInfo.InvariantCulture)" />
|
||||
<MetricCard Label="Event Queue Depth" Value="@DashboardDisplay.MetricValue(Snapshot, "mxgateway.events.queue.depth").ToString(System.Globalization.CultureInfo.InvariantCulture)" />
|
||||
<MetricCard Label="Commands Failed" Value="@DashboardDisplay.MetricValue(Snapshot, "mxgateway.commands.failed").ToString(System.Globalization.CultureInfo.InvariantCulture)" />
|
||||
<MetricCard Label="Events Received" Value="@DashboardDisplay.MetricValue(Snapshot, "mxgateway.events.received").ToString(System.Globalization.CultureInfo.InvariantCulture)" />
|
||||
<MetricCard Label="Faults" Value="@DashboardDisplay.MetricValue(Snapshot, "mxgateway.faults").ToString(System.Globalization.CultureInfo.InvariantCulture)" />
|
||||
<MetricCard Label="Queue Overflows" Value="@DashboardDisplay.MetricValue(Snapshot, "mxgateway.queues.overflows").ToString(System.Globalization.CultureInfo.InvariantCulture)" />
|
||||
<MetricCard Label="Open Sessions" Value="@DashboardDisplay.Count(DashboardDisplay.MetricValue(Snapshot, "mxgateway.sessions.open"))" />
|
||||
<MetricCard Label="Workers Running" Value="@DashboardDisplay.Count(DashboardDisplay.MetricValue(Snapshot, "mxgateway.workers.running"))" />
|
||||
<MetricCard Label="Event Queue Depth" Value="@DashboardDisplay.Count(DashboardDisplay.MetricValue(Snapshot, "mxgateway.events.worker_queue.depth"))" />
|
||||
<MetricCard Label="Commands Failed" Value="@DashboardDisplay.Count(DashboardDisplay.MetricValue(Snapshot, "mxgateway.commands.failed"))" />
|
||||
<MetricCard Label="Events Received" Value="@DashboardDisplay.Count(DashboardDisplay.MetricValue(Snapshot, "mxgateway.events.received"))" />
|
||||
<MetricCard Label="Faults" Value="@DashboardDisplay.Count(DashboardDisplay.MetricValue(Snapshot, "mxgateway.faults"))" />
|
||||
<MetricCard Label="Queue Overflows" Value="@DashboardDisplay.Count(DashboardDisplay.MetricValue(Snapshot, "mxgateway.queues.overflows"))" />
|
||||
</section>
|
||||
|
||||
<section class="dashboard-section">
|
||||
|
||||
@@ -18,10 +18,11 @@ else
|
||||
</div>
|
||||
|
||||
<section class="metric-grid compact">
|
||||
<MetricCard Label="Events Received" Value="@DashboardDisplay.MetricValue(Snapshot, "mxgateway.events.received").ToString(System.Globalization.CultureInfo.InvariantCulture)" />
|
||||
<MetricCard Label="Event Queue Depth" Value="@DashboardDisplay.MetricValue(Snapshot, "mxgateway.events.queue.depth").ToString(System.Globalization.CultureInfo.InvariantCulture)" />
|
||||
<MetricCard Label="Queue Overflows" Value="@DashboardDisplay.MetricValue(Snapshot, "mxgateway.queues.overflows").ToString(System.Globalization.CultureInfo.InvariantCulture)" />
|
||||
<MetricCard Label="Stream Disconnects" Value="@DashboardDisplay.MetricValue(Snapshot, "mxgateway.grpc.streams.disconnected").ToString(System.Globalization.CultureInfo.InvariantCulture)" />
|
||||
<MetricCard Label="Events Received" Value="@DashboardDisplay.Count(DashboardDisplay.MetricValue(Snapshot, "mxgateway.events.received"))" />
|
||||
<MetricCard Label="Worker Event Queue Depth" Value="@DashboardDisplay.Count(DashboardDisplay.MetricValue(Snapshot, "mxgateway.events.worker_queue.depth"))" />
|
||||
<MetricCard Label="Stream Queue Depth" Value="@DashboardDisplay.Count(DashboardDisplay.MetricValue(Snapshot, "mxgateway.events.grpc_stream_queue.depth"))" />
|
||||
<MetricCard Label="Queue Overflows" Value="@DashboardDisplay.Count(DashboardDisplay.MetricValue(Snapshot, "mxgateway.queues.overflows"))" />
|
||||
<MetricCard Label="Stream Disconnects" Value="@DashboardDisplay.Count(DashboardDisplay.MetricValue(Snapshot, "mxgateway.grpc.streams.disconnected"))" />
|
||||
</section>
|
||||
|
||||
<section class="dashboard-section">
|
||||
@@ -47,7 +48,7 @@ else
|
||||
{
|
||||
<tr>
|
||||
<td>@metric.Dimension</td>
|
||||
<td>@metric.Value</td>
|
||||
<td>@DashboardDisplay.Count(metric.Value)</td>
|
||||
</tr>
|
||||
}
|
||||
</tbody>
|
||||
|
||||
@@ -39,6 +39,7 @@ else
|
||||
<tr><th scope="row">Opened</th><td>@DashboardDisplay.DateTime(CurrentSession.OpenedAt)</td></tr>
|
||||
<tr><th scope="row">Last activity</th><td>@DashboardDisplay.DateTime(CurrentSession.LastClientActivityAt)</td></tr>
|
||||
<tr><th scope="row">Lease expires</th><td>@DashboardDisplay.DateTime(CurrentSession.LeaseExpiresAt)</td></tr>
|
||||
<tr><th scope="row">Events received</th><td>@DashboardDisplay.Count(CurrentSession.EventsReceived)</td></tr>
|
||||
<tr><th scope="row">Last fault</th><td>@DashboardDisplay.Text(CurrentSession.LastFault)</td></tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
@@ -33,6 +33,7 @@ else
|
||||
<th scope="col">Client</th>
|
||||
<th scope="col">Backend</th>
|
||||
<th scope="col">Worker</th>
|
||||
<th scope="col">Events</th>
|
||||
<th scope="col">Opened</th>
|
||||
<th scope="col">Activity</th>
|
||||
<th scope="col">Heartbeat</th>
|
||||
@@ -54,6 +55,7 @@ else
|
||||
<span class="ms-1"><StatusBadge Text="@session.WorkerState.ToString()" /></span>
|
||||
}
|
||||
</td>
|
||||
<td>@DashboardDisplay.Count(session.EventsReceived)</td>
|
||||
<td>@DashboardDisplay.DateTime(session.OpenedAt)</td>
|
||||
<td>@DashboardDisplay.DateTime(session.LastClientActivityAt)</td>
|
||||
<td>@DashboardDisplay.DateTime(session.LastWorkerHeartbeatAt)</td>
|
||||
|
||||
@@ -16,4 +16,5 @@ public sealed record DashboardSessionSummary(
|
||||
int? WorkerProcessId,
|
||||
WorkerClientState? WorkerState,
|
||||
DateTimeOffset? LastWorkerHeartbeatAt,
|
||||
long EventsReceived,
|
||||
string? LastFault);
|
||||
|
||||
@@ -45,15 +45,15 @@ public sealed class DashboardSnapshotService : IDashboardSnapshotService
|
||||
IReadOnlyList<GatewaySession> sessions = _sessionRegistry.Snapshot()
|
||||
.OrderByDescending(session => session.OpenedAt)
|
||||
.ToArray();
|
||||
GatewayMetricsSnapshot metricsSnapshot = _metrics.GetSnapshot();
|
||||
IReadOnlyList<DashboardSessionSummary> sessionSummaries = sessions
|
||||
.Take(ResolveLimit(_recentSessionLimit))
|
||||
.Select(CreateSessionSummary)
|
||||
.Select(session => CreateSessionSummary(session, metricsSnapshot))
|
||||
.ToArray();
|
||||
IReadOnlyList<DashboardWorkerSummary> workerSummaries = sessions
|
||||
.Where(session => session.WorkerClient is not null)
|
||||
.Where(session => session.WorkerClient is { State: not WorkerClientState.Closed })
|
||||
.Select(CreateWorkerSummary)
|
||||
.ToArray();
|
||||
GatewayMetricsSnapshot metricsSnapshot = _metrics.GetSnapshot();
|
||||
|
||||
return new DashboardSnapshot(
|
||||
GeneratedAt: generatedAt,
|
||||
@@ -100,9 +100,12 @@ public sealed class DashboardSnapshotService : IDashboardSnapshotService
|
||||
}
|
||||
}
|
||||
|
||||
private static DashboardSessionSummary CreateSessionSummary(GatewaySession session)
|
||||
private static DashboardSessionSummary CreateSessionSummary(
|
||||
GatewaySession session,
|
||||
GatewayMetricsSnapshot metricsSnapshot)
|
||||
{
|
||||
IWorkerClient? workerClient = session.WorkerClient;
|
||||
metricsSnapshot.EventsBySession.TryGetValue(session.SessionId, out long eventsReceived);
|
||||
|
||||
return new DashboardSessionSummary(
|
||||
SessionId: session.SessionId,
|
||||
@@ -117,6 +120,7 @@ public sealed class DashboardSnapshotService : IDashboardSnapshotService
|
||||
WorkerProcessId: workerClient?.ProcessId,
|
||||
WorkerState: workerClient?.State,
|
||||
LastWorkerHeartbeatAt: workerClient?.LastHeartbeatAt,
|
||||
EventsReceived: eventsReceived,
|
||||
LastFault: DashboardRedactor.Redact(session.FinalFault));
|
||||
}
|
||||
|
||||
@@ -138,7 +142,8 @@ public sealed class DashboardSnapshotService : IDashboardSnapshotService
|
||||
[
|
||||
new("mxgateway.sessions.open", snapshot.OpenSessions),
|
||||
new("mxgateway.workers.running", snapshot.WorkersRunning),
|
||||
new("mxgateway.events.queue.depth", snapshot.EventQueueDepth),
|
||||
new("mxgateway.events.worker_queue.depth", snapshot.WorkerEventQueueDepth),
|
||||
new("mxgateway.events.grpc_stream_queue.depth", snapshot.GrpcEventStreamQueueDepth),
|
||||
new("mxgateway.sessions.opened", snapshot.SessionsOpened),
|
||||
new("mxgateway.sessions.closed", snapshot.SessionsClosed),
|
||||
new("mxgateway.commands.started", snapshot.CommandsStarted),
|
||||
|
||||
@@ -47,7 +47,7 @@ public sealed class EventStreamService(
|
||||
() =>
|
||||
{
|
||||
int depth = Interlocked.Increment(ref streamQueueDepth);
|
||||
metrics.SetEventQueueDepth(depth);
|
||||
metrics.SetGrpcEventStreamQueueDepth(depth);
|
||||
},
|
||||
streamCts.Token);
|
||||
|
||||
@@ -56,7 +56,7 @@ public sealed class EventStreamService(
|
||||
await foreach (MxEvent mxEvent in eventQueue.Reader.ReadAllAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
int depth = Math.Max(0, Interlocked.Decrement(ref streamQueueDepth));
|
||||
metrics.SetEventQueueDepth(depth);
|
||||
metrics.SetGrpcEventStreamQueueDepth(depth);
|
||||
yield return mxEvent;
|
||||
}
|
||||
|
||||
|
||||
@@ -26,11 +26,13 @@ public sealed class GatewayMetrics : IDisposable
|
||||
private readonly Histogram<double> _eventStreamSendLatencyHistogram;
|
||||
private readonly Dictionary<string, long> _commandFailuresByMethod = new(StringComparer.OrdinalIgnoreCase);
|
||||
private readonly Dictionary<string, long> _eventsByFamily = new(StringComparer.OrdinalIgnoreCase);
|
||||
private readonly Dictionary<string, long> _eventsBySession = new(StringComparer.Ordinal);
|
||||
private readonly Dictionary<string, long> _retryAttemptsByArea = new(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
private int _openSessions;
|
||||
private int _workersRunning;
|
||||
private int _eventQueueDepth;
|
||||
private int _workerEventQueueDepth;
|
||||
private int _grpcEventStreamQueueDepth;
|
||||
private long _sessionsOpened;
|
||||
private long _sessionsClosed;
|
||||
private long _commandsStarted;
|
||||
@@ -68,7 +70,8 @@ public sealed class GatewayMetrics : IDisposable
|
||||
|
||||
_meter.CreateObservableGauge("mxgateway.sessions.open", GetOpenSessions);
|
||||
_meter.CreateObservableGauge("mxgateway.workers.running", GetWorkersRunning);
|
||||
_meter.CreateObservableGauge("mxgateway.events.queue.depth", GetEventQueueDepth);
|
||||
_meter.CreateObservableGauge("mxgateway.events.worker_queue.depth", GetWorkerEventQueueDepth);
|
||||
_meter.CreateObservableGauge("mxgateway.events.grpc_stream_queue.depth", GetGrpcEventStreamQueueDepth);
|
||||
}
|
||||
|
||||
public void SessionOpened()
|
||||
@@ -174,11 +177,11 @@ public sealed class GatewayMetrics : IDisposable
|
||||
{
|
||||
_eventsReceived++;
|
||||
Increment(_eventsByFamily, family);
|
||||
Increment(_eventsBySession, sessionId);
|
||||
}
|
||||
|
||||
_eventsReceivedCounter.Add(
|
||||
1,
|
||||
new KeyValuePair<string, object?>("session_id", sessionId),
|
||||
new KeyValuePair<string, object?>("family", family));
|
||||
}
|
||||
|
||||
@@ -190,6 +193,11 @@ public sealed class GatewayMetrics : IDisposable
|
||||
}
|
||||
|
||||
public void SetEventQueueDepth(int depth)
|
||||
{
|
||||
SetWorkerEventQueueDepth(depth);
|
||||
}
|
||||
|
||||
public void SetWorkerEventQueueDepth(int depth)
|
||||
{
|
||||
if (depth < 0)
|
||||
{
|
||||
@@ -198,7 +206,28 @@ public sealed class GatewayMetrics : IDisposable
|
||||
|
||||
lock (_syncRoot)
|
||||
{
|
||||
_eventQueueDepth = depth;
|
||||
_workerEventQueueDepth = depth;
|
||||
}
|
||||
}
|
||||
|
||||
public void SetGrpcEventStreamQueueDepth(int depth)
|
||||
{
|
||||
if (depth < 0)
|
||||
{
|
||||
throw new ArgumentOutOfRangeException(nameof(depth), depth, "Queue depth cannot be negative.");
|
||||
}
|
||||
|
||||
lock (_syncRoot)
|
||||
{
|
||||
_grpcEventStreamQueueDepth = depth;
|
||||
}
|
||||
}
|
||||
|
||||
public void RemoveSessionEvents(string sessionId)
|
||||
{
|
||||
lock (_syncRoot)
|
||||
{
|
||||
_eventsBySession.Remove(sessionId);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -260,7 +289,8 @@ public sealed class GatewayMetrics : IDisposable
|
||||
return new GatewayMetricsSnapshot(
|
||||
OpenSessions: _openSessions,
|
||||
WorkersRunning: _workersRunning,
|
||||
EventQueueDepth: _eventQueueDepth,
|
||||
WorkerEventQueueDepth: _workerEventQueueDepth,
|
||||
GrpcEventStreamQueueDepth: _grpcEventStreamQueueDepth,
|
||||
SessionsOpened: _sessionsOpened,
|
||||
SessionsClosed: _sessionsClosed,
|
||||
CommandsStarted: _commandsStarted,
|
||||
@@ -276,6 +306,7 @@ public sealed class GatewayMetrics : IDisposable
|
||||
RetryAttempts: _retryAttempts,
|
||||
CommandFailuresByMethod: new Dictionary<string, long>(_commandFailuresByMethod, StringComparer.OrdinalIgnoreCase),
|
||||
EventsByFamily: new Dictionary<string, long>(_eventsByFamily, StringComparer.OrdinalIgnoreCase),
|
||||
EventsBySession: new Dictionary<string, long>(_eventsBySession, StringComparer.Ordinal),
|
||||
RetryAttemptsByArea: new Dictionary<string, long>(_retryAttemptsByArea, StringComparer.OrdinalIgnoreCase));
|
||||
}
|
||||
}
|
||||
@@ -307,11 +338,19 @@ public sealed class GatewayMetrics : IDisposable
|
||||
}
|
||||
}
|
||||
|
||||
private int GetEventQueueDepth()
|
||||
private int GetWorkerEventQueueDepth()
|
||||
{
|
||||
lock (_syncRoot)
|
||||
{
|
||||
return _eventQueueDepth;
|
||||
return _workerEventQueueDepth;
|
||||
}
|
||||
}
|
||||
|
||||
private int GetGrpcEventStreamQueueDepth()
|
||||
{
|
||||
lock (_syncRoot)
|
||||
{
|
||||
return _grpcEventStreamQueueDepth;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -3,7 +3,8 @@ namespace MxGateway.Server.Metrics;
|
||||
public sealed record GatewayMetricsSnapshot(
|
||||
int OpenSessions,
|
||||
int WorkersRunning,
|
||||
int EventQueueDepth,
|
||||
int WorkerEventQueueDepth,
|
||||
int GrpcEventStreamQueueDepth,
|
||||
long SessionsOpened,
|
||||
long SessionsClosed,
|
||||
long CommandsStarted,
|
||||
@@ -19,4 +20,5 @@ public sealed record GatewayMetricsSnapshot(
|
||||
long RetryAttempts,
|
||||
IReadOnlyDictionary<string, long> CommandFailuresByMethod,
|
||||
IReadOnlyDictionary<string, long> EventsByFamily,
|
||||
IReadOnlyDictionary<string, long> EventsBySession,
|
||||
IReadOnlyDictionary<string, long> RetryAttemptsByArea);
|
||||
|
||||
@@ -23,6 +23,7 @@ public sealed class SessionManager : ISessionManager
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly ILogger<SessionManager> _logger;
|
||||
private readonly GatewayOptions _options;
|
||||
private readonly SemaphoreSlim _sessionSlots;
|
||||
|
||||
public SessionManager(
|
||||
ISessionRegistry registry,
|
||||
@@ -39,6 +40,7 @@ public sealed class SessionManager : ISessionManager
|
||||
_timeProvider = timeProvider ?? TimeProvider.System;
|
||||
_logger = logger ?? NullLogger<SessionManager>.Instance;
|
||||
_options = options.Value;
|
||||
_sessionSlots = new SemaphoreSlim(_options.Sessions.MaxSessions, _options.Sessions.MaxSessions);
|
||||
}
|
||||
|
||||
public async Task<GatewaySession> OpenSessionAsync(
|
||||
@@ -49,16 +51,17 @@ public sealed class SessionManager : ISessionManager
|
||||
ArgumentNullException.ThrowIfNull(request);
|
||||
EnsureSessionCapacity();
|
||||
|
||||
GatewaySession session = CreateSession(request, clientIdentity);
|
||||
if (!_registry.TryAdd(session))
|
||||
{
|
||||
throw new SessionManagerException(
|
||||
SessionManagerErrorCode.OpenFailed,
|
||||
$"Session id collision while opening session {session.SessionId}.");
|
||||
}
|
||||
|
||||
GatewaySession? session = null;
|
||||
try
|
||||
{
|
||||
session = CreateSession(request, clientIdentity);
|
||||
if (!_registry.TryAdd(session))
|
||||
{
|
||||
throw new SessionManagerException(
|
||||
SessionManagerErrorCode.OpenFailed,
|
||||
$"Session id collision while opening session {session.SessionId}.");
|
||||
}
|
||||
|
||||
session.TransitionTo(SessionState.StartingWorker);
|
||||
IWorkerClient workerClient = await _workerClientFactory
|
||||
.CreateAsync(session, cancellationToken)
|
||||
@@ -72,18 +75,23 @@ public sealed class SessionManager : ISessionManager
|
||||
}
|
||||
catch (Exception exception)
|
||||
{
|
||||
session.MarkFaulted(exception.Message);
|
||||
_registry.TryRemove(session.SessionId, out _);
|
||||
await session.DisposeAsync().ConfigureAwait(false);
|
||||
session?.MarkFaulted(exception.Message);
|
||||
if (session is not null)
|
||||
{
|
||||
_registry.TryRemove(session.SessionId, out _);
|
||||
await session.DisposeAsync().ConfigureAwait(false);
|
||||
}
|
||||
|
||||
ReleaseSessionSlot();
|
||||
_metrics.Fault(SessionManagerErrorCode.OpenFailed.ToString());
|
||||
_logger.LogWarning(
|
||||
exception,
|
||||
"Failed to open gateway session {SessionId}.",
|
||||
session.SessionId);
|
||||
session?.SessionId ?? "<not-created>");
|
||||
|
||||
throw new SessionManagerException(
|
||||
SessionManagerErrorCode.OpenFailed,
|
||||
$"Failed to open session {session.SessionId}.",
|
||||
session is null ? "Failed to create session." : $"Failed to open session {session.SessionId}.",
|
||||
exception);
|
||||
}
|
||||
}
|
||||
@@ -177,6 +185,7 @@ public sealed class SessionManager : ISessionManager
|
||||
"Graceful shutdown failed for session {SessionId}; killing worker.",
|
||||
session.SessionId);
|
||||
session.KillWorker(GatewayShutdownReason);
|
||||
await RemoveSessionAsync(session).ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -195,6 +204,7 @@ public sealed class SessionManager : ISessionManager
|
||||
_metrics.SessionClosed();
|
||||
}
|
||||
|
||||
await RemoveSessionAsync(session).ConfigureAwait(false);
|
||||
return result;
|
||||
}
|
||||
catch (Exception exception)
|
||||
@@ -222,7 +232,7 @@ public sealed class SessionManager : ISessionManager
|
||||
|
||||
private void EnsureSessionCapacity()
|
||||
{
|
||||
if (_registry.ActiveCount >= _options.Sessions.MaxSessions)
|
||||
if (!_sessionSlots.Wait(0))
|
||||
{
|
||||
throw new SessionManagerException(
|
||||
SessionManagerErrorCode.SessionLimitExceeded,
|
||||
@@ -230,6 +240,29 @@ public sealed class SessionManager : ISessionManager
|
||||
}
|
||||
}
|
||||
|
||||
private async Task RemoveSessionAsync(GatewaySession session)
|
||||
{
|
||||
if (!_registry.TryRemove(session.SessionId, out GatewaySession? removedSession))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
_metrics.RemoveSessionEvents(session.SessionId);
|
||||
ReleaseSessionSlot();
|
||||
await removedSession.DisposeAsync().ConfigureAwait(false);
|
||||
}
|
||||
|
||||
private void ReleaseSessionSlot()
|
||||
{
|
||||
try
|
||||
{
|
||||
_sessionSlots.Release();
|
||||
}
|
||||
catch (SemaphoreFullException)
|
||||
{
|
||||
}
|
||||
}
|
||||
|
||||
private GatewaySession CreateSession(
|
||||
SessionOpenRequest request,
|
||||
string? clientIdentity)
|
||||
@@ -244,6 +277,7 @@ public sealed class SessionManager : ISessionManager
|
||||
string pipeName = $"mxaccess-gateway-{Environment.ProcessId}-{sessionId}";
|
||||
string nonce = CreateNonce();
|
||||
DateTimeOffset openedAt = _timeProvider.GetUtcNow();
|
||||
string clientCorrelationId = CreateClientCorrelationId(request.ClientSessionName, sessionId);
|
||||
|
||||
return new GatewaySession(
|
||||
sessionId,
|
||||
@@ -252,13 +286,24 @@ public sealed class SessionManager : ISessionManager
|
||||
nonce,
|
||||
clientIdentity,
|
||||
request.ClientSessionName,
|
||||
request.ClientCorrelationId,
|
||||
clientCorrelationId,
|
||||
commandTimeout,
|
||||
startupTimeout,
|
||||
shutdownTimeout,
|
||||
openedAt);
|
||||
}
|
||||
|
||||
private static string CreateClientCorrelationId(
|
||||
string? clientSessionName,
|
||||
string sessionId)
|
||||
{
|
||||
string clientName = string.IsNullOrWhiteSpace(clientSessionName)
|
||||
? "client"
|
||||
: clientSessionName!;
|
||||
|
||||
return $"{clientName}-{sessionId}";
|
||||
}
|
||||
|
||||
private TimeSpan ResolveCommandTimeout(Duration? requestedTimeout)
|
||||
{
|
||||
if (requestedTimeout is null)
|
||||
|
||||
@@ -7,6 +7,7 @@ public static class SessionServiceCollectionExtensions
|
||||
services.AddSingleton<ISessionRegistry, SessionRegistry>();
|
||||
services.AddSingleton<ISessionWorkerClientFactory, SessionWorkerClientFactory>();
|
||||
services.AddSingleton<ISessionManager, SessionManager>();
|
||||
services.AddHostedService<SessionShutdownHostedService>();
|
||||
|
||||
return services;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,26 @@
|
||||
using Microsoft.Extensions.Hosting;
|
||||
using Microsoft.Extensions.Logging;
|
||||
|
||||
namespace MxGateway.Server.Sessions;
|
||||
|
||||
public sealed class SessionShutdownHostedService(
|
||||
ISessionManager sessionManager,
|
||||
ILogger<SessionShutdownHostedService> logger) : IHostedService
|
||||
{
|
||||
public Task StartAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
public async Task StopAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
try
|
||||
{
|
||||
await sessionManager.ShutdownAsync(cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
logger.LogWarning("Gateway session shutdown was canceled by host shutdown timeout.");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -74,6 +74,7 @@ public sealed class SessionWorkerClientFactory : ISessionWorkerClientFactory
|
||||
HeartbeatGrace = TimeSpan.FromSeconds(_options.Worker.HeartbeatGraceSeconds),
|
||||
HeartbeatCheckInterval = TimeSpan.FromSeconds(_options.Worker.HeartbeatIntervalSeconds),
|
||||
EventChannelCapacity = _options.Events.QueueCapacity,
|
||||
MaxPendingCommands = _options.Sessions.MaxPendingCommandsPerSession,
|
||||
};
|
||||
|
||||
workerClient = new WorkerClient(
|
||||
|
||||
@@ -24,6 +24,7 @@ public sealed class WorkerClient : IWorkerClient
|
||||
private readonly Channel<WorkerEnvelope> _outboundEnvelopes;
|
||||
private readonly Channel<WorkerEvent> _events;
|
||||
private readonly ConcurrentDictionary<string, PendingCommand> _pendingCommands = new(StringComparer.Ordinal);
|
||||
private readonly SemaphoreSlim _pendingCommandSlots;
|
||||
private readonly CancellationTokenSource _stopCts = new();
|
||||
private long _nextSequence;
|
||||
private WorkerClientState _state;
|
||||
@@ -33,6 +34,7 @@ public sealed class WorkerClient : IWorkerClient
|
||||
private Task? _readLoopTask;
|
||||
private Task? _writeLoopTask;
|
||||
private Task? _heartbeatLoopTask;
|
||||
private bool _workerStartRecorded;
|
||||
private bool _disposed;
|
||||
|
||||
public WorkerClient(
|
||||
@@ -49,11 +51,13 @@ public sealed class WorkerClient : IWorkerClient
|
||||
_logger = logger ?? NullLogger<WorkerClient>.Instance;
|
||||
_reader = new WorkerFrameReader(connection.Stream, connection.FrameOptions);
|
||||
_writer = new WorkerFrameWriter(connection.Stream, connection.FrameOptions);
|
||||
_outboundEnvelopes = Channel.CreateUnbounded<WorkerEnvelope>(
|
||||
new UnboundedChannelOptions
|
||||
_pendingCommandSlots = new SemaphoreSlim(_options.MaxPendingCommands, _options.MaxPendingCommands);
|
||||
_outboundEnvelopes = Channel.CreateBounded<WorkerEnvelope>(
|
||||
new BoundedChannelOptions(_options.MaxPendingCommands + 4)
|
||||
{
|
||||
SingleReader = true,
|
||||
SingleWriter = false,
|
||||
FullMode = BoundedChannelFullMode.Wait,
|
||||
AllowSynchronousContinuations = false,
|
||||
});
|
||||
_events = Channel.CreateBounded<WorkerEvent>(
|
||||
@@ -140,6 +144,14 @@ public sealed class WorkerClient : IWorkerClient
|
||||
|
||||
string correlationId = Guid.NewGuid().ToString("N");
|
||||
string method = GetCommandMethod(command);
|
||||
if (!_pendingCommandSlots.Wait(0))
|
||||
{
|
||||
_metrics?.QueueOverflow("worker-pending-commands");
|
||||
throw new WorkerClientException(
|
||||
WorkerClientErrorCode.PendingCommandLimitExceeded,
|
||||
$"Worker session {SessionId} already has {_options.MaxPendingCommands} pending command(s).");
|
||||
}
|
||||
|
||||
PendingCommand pendingCommand = new(
|
||||
correlationId,
|
||||
method,
|
||||
@@ -147,6 +159,7 @@ public sealed class WorkerClient : IWorkerClient
|
||||
|
||||
if (!_pendingCommands.TryAdd(correlationId, pendingCommand))
|
||||
{
|
||||
ReleasePendingCommandSlot();
|
||||
throw new InvalidOperationException("Generated a duplicate command correlation id.");
|
||||
}
|
||||
|
||||
@@ -188,7 +201,11 @@ public sealed class WorkerClient : IWorkerClient
|
||||
}
|
||||
catch
|
||||
{
|
||||
_pendingCommands.TryRemove(correlationId, out _);
|
||||
if (_pendingCommands.TryRemove(correlationId, out _))
|
||||
{
|
||||
ReleasePendingCommandSlot();
|
||||
}
|
||||
|
||||
throw;
|
||||
}
|
||||
}
|
||||
@@ -199,7 +216,7 @@ public sealed class WorkerClient : IWorkerClient
|
||||
await foreach (WorkerEvent workerEvent in _events.Reader.ReadAllAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
int queueDepth = Math.Max(0, Interlocked.Decrement(ref _eventQueueDepth));
|
||||
_metrics?.SetEventQueueDepth(queueDepth);
|
||||
_metrics?.SetWorkerEventQueueDepth(queueDepth);
|
||||
yield return workerEvent;
|
||||
}
|
||||
}
|
||||
@@ -272,6 +289,7 @@ public sealed class WorkerClient : IWorkerClient
|
||||
await WaitForBackgroundTasksAsync(CancellationToken.None).ConfigureAwait(false);
|
||||
await _connection.Stream.DisposeAsync().ConfigureAwait(false);
|
||||
_connection.ProcessHandle?.Dispose();
|
||||
_pendingCommandSlots.Dispose();
|
||||
_stopCts.Dispose();
|
||||
}
|
||||
|
||||
@@ -409,7 +427,7 @@ public sealed class WorkerClient : IWorkerClient
|
||||
}
|
||||
|
||||
int queueDepth = Interlocked.Increment(ref _eventQueueDepth);
|
||||
_metrics?.SetEventQueueDepth(queueDepth);
|
||||
_metrics?.SetWorkerEventQueueDepth(queueDepth);
|
||||
}
|
||||
|
||||
private void CompleteCommand(WorkerEnvelope envelope)
|
||||
@@ -429,6 +447,7 @@ public sealed class WorkerClient : IWorkerClient
|
||||
return;
|
||||
}
|
||||
|
||||
ReleasePendingCommandSlot();
|
||||
TimeSpan duration = _timeProvider.GetElapsedTime(pendingCommand.StartTimestamp);
|
||||
_metrics?.CommandSucceeded(pendingCommand.Method, duration);
|
||||
pendingCommand.SetResult(envelope.WorkerCommandReply);
|
||||
@@ -445,6 +464,7 @@ public sealed class WorkerClient : IWorkerClient
|
||||
return;
|
||||
}
|
||||
|
||||
ReleasePendingCommandSlot();
|
||||
TimeSpan duration = _timeProvider.GetElapsedTime(pendingCommand.StartTimestamp);
|
||||
_metrics?.CommandFailed(pendingCommand.Method, errorCode.ToString(), duration);
|
||||
pendingCommand.SetException(new WorkerClientException(errorCode, message));
|
||||
@@ -498,6 +518,7 @@ public sealed class WorkerClient : IWorkerClient
|
||||
: ready.WorkerProcessId;
|
||||
_lastHeartbeatAt = _timeProvider.GetUtcNow();
|
||||
_state = WorkerClientState.Ready;
|
||||
_workerStartRecorded = true;
|
||||
}
|
||||
|
||||
DateTimeOffset readyAt = _timeProvider.GetUtcNow();
|
||||
@@ -549,7 +570,7 @@ public sealed class WorkerClient : IWorkerClient
|
||||
new WorkerClientException(
|
||||
WorkerClientErrorCode.GatewayShutdown,
|
||||
$"Worker client closed because {reason}."));
|
||||
_metrics?.WorkerStopped(reason);
|
||||
RecordWorkerStoppedOnce(reason);
|
||||
}
|
||||
|
||||
private void SetFaulted(
|
||||
@@ -575,16 +596,33 @@ public sealed class WorkerClient : IWorkerClient
|
||||
_outboundEnvelopes.Writer.TryComplete(fault);
|
||||
_events.Writer.TryComplete(fault);
|
||||
CompletePendingCommands(fault);
|
||||
RecordWorkerStoppedOnce(errorCode.ToString());
|
||||
_metrics?.Fault(errorCode.ToString());
|
||||
_logger.LogWarning(exception, "Worker client faulted for session {SessionId}: {Message}", SessionId, message);
|
||||
}
|
||||
|
||||
private void RecordWorkerStoppedOnce(string reason)
|
||||
{
|
||||
bool shouldRecord;
|
||||
lock (_syncRoot)
|
||||
{
|
||||
shouldRecord = _workerStartRecorded;
|
||||
_workerStartRecorded = false;
|
||||
}
|
||||
|
||||
if (shouldRecord)
|
||||
{
|
||||
_metrics?.WorkerStopped(reason);
|
||||
}
|
||||
}
|
||||
|
||||
private void CompletePendingCommands(Exception exception)
|
||||
{
|
||||
foreach (KeyValuePair<string, PendingCommand> item in _pendingCommands.ToArray())
|
||||
{
|
||||
if (_pendingCommands.TryRemove(item.Key, out PendingCommand? pendingCommand))
|
||||
{
|
||||
ReleasePendingCommandSlot();
|
||||
TimeSpan duration = _timeProvider.GetElapsedTime(pendingCommand.StartTimestamp);
|
||||
_metrics?.CommandFailed(pendingCommand.Method, exception.GetType().Name, duration);
|
||||
pendingCommand.SetException(exception);
|
||||
@@ -592,6 +630,17 @@ public sealed class WorkerClient : IWorkerClient
|
||||
}
|
||||
}
|
||||
|
||||
private void ReleasePendingCommandSlot()
|
||||
{
|
||||
try
|
||||
{
|
||||
_pendingCommandSlots.Release();
|
||||
}
|
||||
catch (SemaphoreFullException)
|
||||
{
|
||||
}
|
||||
}
|
||||
|
||||
private void TransitionFromCreatedToHandshaking()
|
||||
{
|
||||
lock (_syncRoot)
|
||||
|
||||
@@ -11,4 +11,5 @@ public enum WorkerClientErrorCode
|
||||
ShutdownTimeout,
|
||||
GatewayShutdown,
|
||||
WriteFailed,
|
||||
PendingCommandLimitExceeded,
|
||||
}
|
||||
|
||||
@@ -12,6 +12,7 @@ public sealed class WorkerClientOptions
|
||||
HeartbeatCheckInterval = DefaultHeartbeatCheckInterval;
|
||||
EventChannelCapacity = 1_024;
|
||||
EventChannelFullModeTimeout = DefaultEventChannelFullModeTimeout;
|
||||
MaxPendingCommands = 128;
|
||||
}
|
||||
|
||||
public TimeSpan HeartbeatGrace { get; init; }
|
||||
@@ -21,4 +22,6 @@ public sealed class WorkerClientOptions
|
||||
public int EventChannelCapacity { get; init; }
|
||||
|
||||
public TimeSpan EventChannelFullModeTimeout { get; init; }
|
||||
|
||||
public int MaxPendingCommands { get; init; }
|
||||
}
|
||||
|
||||
@@ -96,8 +96,6 @@ public sealed class WorkerProcessLauncher : IWorkerProcessLauncher
|
||||
startupTimeout.Token)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
_metrics.WorkerStarted(_timeProvider.GetUtcNow() - startedAt);
|
||||
|
||||
return new WorkerProcessHandle(process, commandLine, startedAt);
|
||||
}
|
||||
catch (OperationCanceledException exception) when (!cancellationToken.IsCancellationRequested)
|
||||
|
||||
@@ -42,8 +42,15 @@ public sealed class DashboardSnapshotServiceTests
|
||||
DateTimeOffset.Parse("2026-04-26T10:01:00Z"));
|
||||
faultedSession.AttachWorkerClient(new FakeWorkerClient("session-faulted", 1202, WorkerClientState.Faulted));
|
||||
faultedSession.MarkFaulted("worker pipe disconnected");
|
||||
GatewaySession closedSession = CreateSession(
|
||||
"session-closed",
|
||||
"client-three",
|
||||
DateTimeOffset.Parse("2026-04-26T09:59:00Z"));
|
||||
closedSession.AttachWorkerClient(new FakeWorkerClient("session-closed", 1203, WorkerClientState.Closed));
|
||||
closedSession.TransitionTo(SessionState.Closed);
|
||||
registry.TryAdd(activeSession);
|
||||
registry.TryAdd(faultedSession);
|
||||
registry.TryAdd(closedSession);
|
||||
using GatewayMetrics metrics = new();
|
||||
metrics.SessionOpened();
|
||||
metrics.SessionOpened();
|
||||
@@ -55,10 +62,15 @@ public sealed class DashboardSnapshotServiceTests
|
||||
|
||||
DashboardSnapshot snapshot = service.GetSnapshot();
|
||||
|
||||
Assert.Equal(2, snapshot.Sessions.Count);
|
||||
Assert.Equal(3, snapshot.Sessions.Count);
|
||||
Assert.Equal("session-faulted", snapshot.Sessions[0].SessionId);
|
||||
Assert.Equal(SessionState.Faulted, snapshot.Sessions[0].State);
|
||||
DashboardSessionSummary activeSummary = Assert.Single(
|
||||
snapshot.Sessions,
|
||||
session => session.SessionId == "session-active");
|
||||
Assert.Equal(1, activeSummary.EventsReceived);
|
||||
Assert.Equal(2, snapshot.Workers.Count);
|
||||
Assert.DoesNotContain(snapshot.Workers, worker => worker.SessionId == "session-closed");
|
||||
Assert.Contains(snapshot.Metrics, metric => metric.Name == "mxgateway.commands.started" && metric.Value == 1);
|
||||
Assert.Contains(
|
||||
snapshot.Metrics,
|
||||
|
||||
@@ -32,6 +32,35 @@ public sealed class SessionManagerTests
|
||||
Assert.Equal(1, metrics.GetSnapshot().SessionsOpened);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task OpenSessionAsync_GeneratesClientCorrelationIdFromClientNameAndSessionId()
|
||||
{
|
||||
SessionOpenRequest request = CreateOpenRequest() with
|
||||
{
|
||||
ClientSessionName = "rust-load-client",
|
||||
ClientCorrelationId = "caller-provided-correlation",
|
||||
};
|
||||
SessionManager manager = CreateManager(new FakeSessionWorkerClientFactory(new FakeWorkerClient()));
|
||||
|
||||
GatewaySession session = await manager.OpenSessionAsync(request, "client-1", CancellationToken.None);
|
||||
|
||||
Assert.Equal($"rust-load-client-{session.SessionId}", session.ClientCorrelationId);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task OpenSessionAsync_WhenClientSessionNameMissing_UsesClientCorrelationPrefix()
|
||||
{
|
||||
SessionOpenRequest request = CreateOpenRequest() with
|
||||
{
|
||||
ClientSessionName = "",
|
||||
};
|
||||
SessionManager manager = CreateManager(new FakeSessionWorkerClientFactory(new FakeWorkerClient()));
|
||||
|
||||
GatewaySession session = await manager.OpenSessionAsync(request, "client-1", CancellationToken.None);
|
||||
|
||||
Assert.Equal($"client-{session.SessionId}", session.ClientCorrelationId);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task InvokeAsync_WhenSessionReady_ForwardsCommandToWorker()
|
||||
{
|
||||
@@ -111,7 +140,7 @@ public sealed class SessionManagerTests
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task CloseSessionAsync_WhenCalledTwice_IsIdempotent()
|
||||
public async Task CloseSessionAsync_RemovesClosedSession()
|
||||
{
|
||||
FakeWorkerClient workerClient = new();
|
||||
using GatewayMetrics metrics = new();
|
||||
@@ -119,12 +148,12 @@ public sealed class SessionManagerTests
|
||||
GatewaySession session = await manager.OpenSessionAsync(CreateOpenRequest(), "client-1", CancellationToken.None);
|
||||
|
||||
SessionCloseResult firstClose = await manager.CloseSessionAsync(session.SessionId, CancellationToken.None);
|
||||
SessionCloseResult secondClose = await manager.CloseSessionAsync(session.SessionId, CancellationToken.None);
|
||||
SessionManagerException secondClose = await Assert.ThrowsAsync<SessionManagerException>(
|
||||
async () => await manager.CloseSessionAsync(session.SessionId, CancellationToken.None));
|
||||
|
||||
Assert.False(firstClose.AlreadyClosed);
|
||||
Assert.True(secondClose.AlreadyClosed);
|
||||
Assert.Equal(SessionState.Closed, firstClose.FinalState);
|
||||
Assert.Equal(SessionState.Closed, secondClose.FinalState);
|
||||
Assert.Equal(SessionManagerErrorCode.SessionNotFound, secondClose.ErrorCode);
|
||||
Assert.Equal(1, workerClient.ShutdownCount);
|
||||
Assert.Equal(1, metrics.GetSnapshot().SessionsClosed);
|
||||
Assert.Equal(0, metrics.GetSnapshot().OpenSessions);
|
||||
|
||||
@@ -2,6 +2,7 @@ using System.IO.Pipes;
|
||||
using Google.Protobuf.WellKnownTypes;
|
||||
using MxGateway.Contracts;
|
||||
using MxGateway.Contracts.Proto;
|
||||
using MxGateway.Server.Metrics;
|
||||
using MxGateway.Server.Workers;
|
||||
|
||||
namespace MxGateway.Tests.Gateway.Workers;
|
||||
@@ -152,6 +153,27 @@ public sealed class WorkerClientTests
|
||||
Assert.Equal(WorkerClientState.Faulted, client.State);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ReadLoop_WhenPipeDisconnects_StopsRunningWorkerMetric()
|
||||
{
|
||||
await using PipePair pipePair = await PipePair.CreateAsync();
|
||||
using GatewayMetrics metrics = new();
|
||||
await using WorkerClient client = CreateClient(pipePair, metrics: metrics);
|
||||
await CompleteHandshakeAsync(client, pipePair);
|
||||
|
||||
Assert.Equal(1, metrics.GetSnapshot().WorkersRunning);
|
||||
|
||||
await pipePair.DisposeWorkerSideAsync();
|
||||
|
||||
await WaitUntilAsync(
|
||||
() => client.State == WorkerClientState.Faulted,
|
||||
TestTimeout);
|
||||
|
||||
GatewayMetricsSnapshot snapshot = metrics.GetSnapshot();
|
||||
Assert.Equal(0, snapshot.WorkersRunning);
|
||||
Assert.Equal(1, snapshot.WorkerExits);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ReadLoop_WhenHeartbeatArrives_UpdatesLastHeartbeatAndWorkerProcess()
|
||||
{
|
||||
@@ -193,7 +215,8 @@ public sealed class WorkerClientTests
|
||||
|
||||
private static WorkerClient CreateClient(
|
||||
PipePair pipePair,
|
||||
WorkerClientOptions? options = null)
|
||||
WorkerClientOptions? options = null,
|
||||
GatewayMetrics? metrics = null)
|
||||
{
|
||||
WorkerFrameProtocolOptions frameOptions = new(SessionId);
|
||||
WorkerClientConnection connection = new(
|
||||
@@ -202,7 +225,7 @@ public sealed class WorkerClientTests
|
||||
pipePair.GatewayStream,
|
||||
frameOptions);
|
||||
|
||||
return new WorkerClient(connection, options);
|
||||
return new WorkerClient(connection, options, metrics);
|
||||
}
|
||||
|
||||
private static async Task CompleteHandshakeAsync(
|
||||
|
||||
@@ -43,7 +43,7 @@ public sealed class WorkerProcessLauncherTests
|
||||
Assert.DoesNotContain(Nonce, handle.CommandLine.ToString(), StringComparison.Ordinal);
|
||||
Assert.DoesNotContain(Nonce, string.Join(" ", handle.CommandLine.Arguments), StringComparison.Ordinal);
|
||||
Assert.False(pipeReservation.DisposeCalled);
|
||||
Assert.Equal(1, metrics.GetSnapshot().WorkersRunning);
|
||||
Assert.Equal(0, metrics.GetSnapshot().WorkersRunning);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
|
||||
@@ -17,7 +17,8 @@ public sealed class GatewayMetricsTests
|
||||
metrics.CommandFailed("WriteSecured", "AuthorizationFailed", TimeSpan.FromMilliseconds(12));
|
||||
metrics.EventReceived("session-1", "OnDataChange");
|
||||
metrics.EventReceived("session-1", "OnDataChange");
|
||||
metrics.SetEventQueueDepth(7);
|
||||
metrics.SetWorkerEventQueueDepth(7);
|
||||
metrics.SetGrpcEventStreamQueueDepth(3);
|
||||
metrics.QueueOverflow("session-events");
|
||||
metrics.Fault("CommandTimeout");
|
||||
metrics.WorkerKilled("CommandTimeout");
|
||||
@@ -30,7 +31,8 @@ public sealed class GatewayMetricsTests
|
||||
|
||||
Assert.Equal(0, snapshot.OpenSessions);
|
||||
Assert.Equal(0, snapshot.WorkersRunning);
|
||||
Assert.Equal(7, snapshot.EventQueueDepth);
|
||||
Assert.Equal(7, snapshot.WorkerEventQueueDepth);
|
||||
Assert.Equal(3, snapshot.GrpcEventStreamQueueDepth);
|
||||
Assert.Equal(1, snapshot.SessionsOpened);
|
||||
Assert.Equal(1, snapshot.SessionsClosed);
|
||||
Assert.Equal(2, snapshot.CommandsStarted);
|
||||
@@ -45,6 +47,7 @@ public sealed class GatewayMetricsTests
|
||||
Assert.Equal(1, snapshot.StreamDisconnects);
|
||||
Assert.Equal(1, snapshot.CommandFailuresByMethod["WriteSecured"]);
|
||||
Assert.Equal(2, snapshot.EventsByFamily["OnDataChange"]);
|
||||
Assert.Equal(2, snapshot.EventsBySession["session-1"]);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
@@ -53,7 +56,7 @@ public sealed class GatewayMetricsTests
|
||||
using GatewayMetrics metrics = new();
|
||||
|
||||
ArgumentOutOfRangeException exception = Assert.Throws<ArgumentOutOfRangeException>(
|
||||
() => metrics.SetEventQueueDepth(-1));
|
||||
() => metrics.SetWorkerEventQueueDepth(-1));
|
||||
|
||||
Assert.Equal("depth", exception.ParamName);
|
||||
}
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.IO.Pipes;
|
||||
using System.Threading;
|
||||
@@ -228,6 +229,21 @@ public sealed class WorkerPipeClientTests
|
||||
currentCommandCorrelationId: string.Empty);
|
||||
}
|
||||
|
||||
public IReadOnlyList<WorkerEvent> DrainEvents(uint maxEvents)
|
||||
{
|
||||
return Array.Empty<WorkerEvent>();
|
||||
}
|
||||
|
||||
public WorkerFault? DrainFault()
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
public bool CancelCommand(string correlationId)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
public void RequestShutdown()
|
||||
{
|
||||
}
|
||||
|
||||
@@ -238,6 +238,37 @@ public sealed class WorkerPipeSessionTests
|
||||
await SendShutdownAndWaitAsync(pipePair, runTask, cancellation.Token);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task RunAsync_WhenRuntimeHasEvents_WritesWorkerEventEnvelope()
|
||||
{
|
||||
using CancellationTokenSource cancellation = new(TimeSpan.FromSeconds(5));
|
||||
using PipePair pipePair = await PipePair.CreateAsync(cancellation.Token);
|
||||
FakeRuntimeSession runtime = new();
|
||||
WorkerPipeSession session = CreatePipeSession(
|
||||
pipePair.WorkerStream,
|
||||
runtime,
|
||||
new WorkerPipeSessionOptions
|
||||
{
|
||||
HeartbeatInterval = TimeSpan.FromMilliseconds(100),
|
||||
HeartbeatGrace = TimeSpan.FromSeconds(5),
|
||||
});
|
||||
Task runTask = session.RunAsync(cancellation.Token);
|
||||
await CompleteGatewayHandshakeAsync(pipePair, cancellation.Token);
|
||||
|
||||
runtime.EnqueueEvent(CreateWorkerEvent(sequence: 7));
|
||||
|
||||
WorkerEnvelope workerEvent = await ReadUntilAsync(
|
||||
pipePair.GatewayReader,
|
||||
WorkerEnvelope.BodyOneofCase.WorkerEvent,
|
||||
cancellation.Token);
|
||||
|
||||
Assert.Equal(MxEventFamily.OnDataChange, workerEvent.WorkerEvent.Event.Family);
|
||||
Assert.Equal(7UL, workerEvent.WorkerEvent.Event.WorkerSequence);
|
||||
|
||||
await SendShutdownAndWaitAsync(pipePair, runTask, cancellation.Token);
|
||||
}
|
||||
|
||||
|
||||
[Fact]
|
||||
public async Task RunAsync_WhenStaActivityIsStale_WritesWatchdogFault()
|
||||
{
|
||||
@@ -364,6 +395,20 @@ public sealed class WorkerPipeSessionTests
|
||||
};
|
||||
}
|
||||
|
||||
private static WorkerEvent CreateWorkerEvent(ulong sequence)
|
||||
{
|
||||
return new WorkerEvent
|
||||
{
|
||||
Event = new MxEvent
|
||||
{
|
||||
SessionId = SessionId,
|
||||
Family = MxEventFamily.OnDataChange,
|
||||
WorkerSequence = sequence,
|
||||
OnDataChange = new OnDataChangeEvent(),
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
private static async Task CompleteGatewayHandshakeAsync(
|
||||
PipePair pipePair,
|
||||
CancellationToken cancellationToken)
|
||||
@@ -478,6 +523,7 @@ public sealed class WorkerPipeSessionTests
|
||||
{
|
||||
private readonly ManualResetEventSlim releaseDispatch = new(false);
|
||||
private readonly object gate = new();
|
||||
private readonly Queue<WorkerEvent> events = new();
|
||||
private WorkerRuntimeHeartbeatSnapshot snapshot = new(
|
||||
DateTimeOffset.UtcNow,
|
||||
pendingCommandCount: 0,
|
||||
@@ -550,6 +596,33 @@ public sealed class WorkerPipeSessionTests
|
||||
}
|
||||
}
|
||||
|
||||
public IReadOnlyList<WorkerEvent> DrainEvents(uint maxEvents)
|
||||
{
|
||||
lock (gate)
|
||||
{
|
||||
int drainCount = maxEvents == 0
|
||||
? events.Count
|
||||
: Math.Min(events.Count, checked((int)Math.Min(maxEvents, int.MaxValue)));
|
||||
List<WorkerEvent> drained = new(drainCount);
|
||||
for (int index = 0; index < drainCount; index++)
|
||||
{
|
||||
drained.Add(events.Dequeue());
|
||||
}
|
||||
|
||||
return drained;
|
||||
}
|
||||
}
|
||||
|
||||
public WorkerFault? DrainFault()
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
public bool CancelCommand(string correlationId)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
public void RequestShutdown()
|
||||
{
|
||||
releaseDispatch.Set();
|
||||
@@ -576,6 +649,14 @@ public sealed class WorkerPipeSessionTests
|
||||
}
|
||||
}
|
||||
|
||||
public void EnqueueEvent(WorkerEvent workerEvent)
|
||||
{
|
||||
lock (gate)
|
||||
{
|
||||
events.Enqueue(workerEvent);
|
||||
}
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
releaseDispatch.Set();
|
||||
|
||||
@@ -148,10 +148,22 @@ public sealed class WorkerPipeClient : IWorkerPipeClient
|
||||
})
|
||||
.Build();
|
||||
|
||||
return await pipeline.ExecuteAsync(
|
||||
async token => await ConnectSingleAttemptAsync(pipeName, token).ConfigureAwait(false),
|
||||
cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
using CancellationTokenSource connectDeadline =
|
||||
CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
|
||||
connectDeadline.CancelAfter(_connectTimeoutMilliseconds);
|
||||
|
||||
try
|
||||
{
|
||||
return await pipeline.ExecuteAsync(
|
||||
async token => await ConnectSingleAttemptAsync(pipeName, token).ConfigureAwait(false),
|
||||
connectDeadline.Token)
|
||||
.ConfigureAwait(false);
|
||||
}
|
||||
catch (OperationCanceledException) when (!cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
throw new TimeoutException(
|
||||
$"Worker pipe {pipeName} did not connect within {_connectTimeoutMilliseconds}ms.");
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<NamedPipeClientStream> ConnectSingleAttemptAsync(
|
||||
|
||||
@@ -14,6 +14,9 @@ namespace MxGateway.Worker.Ipc;
|
||||
|
||||
public sealed class WorkerPipeSession
|
||||
{
|
||||
private static readonly TimeSpan EventDrainInterval = TimeSpan.FromMilliseconds(25);
|
||||
private const uint EventDrainBatchSize = 128;
|
||||
|
||||
private readonly WorkerFrameProtocolOptions _options;
|
||||
private readonly Func<int> _processIdProvider;
|
||||
private readonly Func<IWorkerRuntimeSession> _runtimeSessionFactory;
|
||||
@@ -206,17 +209,22 @@ public sealed class WorkerPipeSession
|
||||
using CancellationTokenSource heartbeatCancellation = CancellationTokenSource
|
||||
.CreateLinkedTokenSource(cancellationToken);
|
||||
Task heartbeatTask = RunHeartbeatLoopAsync(heartbeatCancellation.Token);
|
||||
Task eventDrainTask = RunEventDrainLoopAsync(heartbeatCancellation.Token);
|
||||
|
||||
try
|
||||
{
|
||||
while (!cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
Task<WorkerEnvelope> readTask = _reader.ReadAsync(cancellationToken);
|
||||
Task completedTask = await Task.WhenAny(readTask, heartbeatTask).ConfigureAwait(false);
|
||||
Task completedTask = await Task.WhenAny(readTask, heartbeatTask, eventDrainTask).ConfigureAwait(false);
|
||||
if (completedTask == heartbeatTask)
|
||||
{
|
||||
await heartbeatTask.ConfigureAwait(false);
|
||||
}
|
||||
else if (completedTask == eventDrainTask)
|
||||
{
|
||||
await eventDrainTask.ConfigureAwait(false);
|
||||
}
|
||||
|
||||
WorkerEnvelope envelope = await readTask.ConfigureAwait(false);
|
||||
bool keepReading = await DispatchGatewayEnvelopeAsync(envelope, cancellationToken).ConfigureAwait(false);
|
||||
@@ -236,6 +244,52 @@ public sealed class WorkerPipeSession
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
await eventDrainTask.ConfigureAwait(false);
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private async Task RunEventDrainLoopAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
while (!cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
IWorkerRuntimeSession? runtimeSession = _runtimeSession;
|
||||
if (runtimeSession is null)
|
||||
{
|
||||
await Task.Delay(EventDrainInterval, cancellationToken).ConfigureAwait(false);
|
||||
continue;
|
||||
}
|
||||
|
||||
WorkerFault? fault = runtimeSession.DrainFault();
|
||||
if (fault is not null)
|
||||
{
|
||||
_state = WorkerState.Faulted;
|
||||
await TryWriteFaultAsync(fault, cancellationToken).ConfigureAwait(false);
|
||||
throw new InvalidOperationException(
|
||||
string.IsNullOrWhiteSpace(fault.DiagnosticMessage)
|
||||
? $"MXAccess event queue faulted with category {fault.Category}."
|
||||
: fault.DiagnosticMessage);
|
||||
}
|
||||
|
||||
IReadOnlyList<WorkerEvent> events = runtimeSession.DrainEvents(EventDrainBatchSize);
|
||||
if (events.Count == 0)
|
||||
{
|
||||
await Task.Delay(EventDrainInterval, cancellationToken).ConfigureAwait(false);
|
||||
continue;
|
||||
}
|
||||
|
||||
foreach (WorkerEvent workerEvent in events)
|
||||
{
|
||||
await _writer
|
||||
.WriteAsync(CreateEnvelope(workerEvent), cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -252,6 +306,7 @@ public sealed class WorkerPipeSession
|
||||
await ShutdownAsync(envelope.WorkerShutdown, cancellationToken).ConfigureAwait(false);
|
||||
return false;
|
||||
case WorkerEnvelope.BodyOneofCase.WorkerCancel:
|
||||
_runtimeSession?.CancelCommand(envelope.CorrelationId);
|
||||
return true;
|
||||
default:
|
||||
throw new WorkerFrameProtocolException(
|
||||
@@ -461,6 +516,11 @@ public sealed class WorkerPipeSession
|
||||
return CreateBaseEnvelope(reply);
|
||||
}
|
||||
|
||||
private WorkerEnvelope CreateEnvelope(WorkerEvent workerEvent)
|
||||
{
|
||||
return CreateBaseEnvelope(workerEvent);
|
||||
}
|
||||
|
||||
private WorkerEnvelope CreateEnvelope(WorkerShutdownAck shutdownAck)
|
||||
{
|
||||
return CreateBaseEnvelope(shutdownAck);
|
||||
@@ -500,6 +560,13 @@ public sealed class WorkerPipeSession
|
||||
return envelope;
|
||||
}
|
||||
|
||||
private WorkerEnvelope CreateBaseEnvelope(WorkerEvent body)
|
||||
{
|
||||
WorkerEnvelope envelope = CreateBaseEnvelope();
|
||||
envelope.WorkerEvent = body;
|
||||
return envelope;
|
||||
}
|
||||
|
||||
private WorkerEnvelope CreateBaseEnvelope(WorkerShutdownAck body)
|
||||
{
|
||||
WorkerEnvelope envelope = CreateBaseEnvelope();
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using MxGateway.Contracts.Proto;
|
||||
@@ -17,6 +18,12 @@ public interface IWorkerRuntimeSession : IDisposable
|
||||
|
||||
WorkerRuntimeHeartbeatSnapshot CaptureHeartbeat();
|
||||
|
||||
IReadOnlyList<WorkerEvent> DrainEvents(uint maxEvents);
|
||||
|
||||
WorkerFault? DrainFault();
|
||||
|
||||
bool CancelCommand(string correlationId);
|
||||
|
||||
void RequestShutdown();
|
||||
|
||||
Task<MxAccessShutdownResult> ShutdownGracefullyAsync(
|
||||
|
||||
@@ -14,6 +14,7 @@ public sealed class MxAccessEventQueue
|
||||
private readonly object syncRoot = new();
|
||||
private ulong lastEventSequence;
|
||||
private WorkerFault? fault;
|
||||
private bool faultDrained;
|
||||
|
||||
public MxAccessEventQueue()
|
||||
: this(DefaultCapacity)
|
||||
@@ -163,6 +164,20 @@ public sealed class MxAccessEventQueue
|
||||
}
|
||||
}
|
||||
|
||||
public WorkerFault? DrainFault()
|
||||
{
|
||||
lock (syncRoot)
|
||||
{
|
||||
if (fault is null || faultDrained)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
faultDrained = true;
|
||||
return fault.Clone();
|
||||
}
|
||||
}
|
||||
|
||||
private WorkerFault CreateOverflowFault()
|
||||
{
|
||||
string message = $"MXAccess outbound event queue reached capacity {capacity}.";
|
||||
|
||||
@@ -79,7 +79,14 @@ public sealed class MxAccessSession : IDisposable
|
||||
}
|
||||
catch (Exception exception)
|
||||
{
|
||||
eventSink.Detach();
|
||||
try
|
||||
{
|
||||
eventSink.Detach();
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Preserve the creation failure while still releasing the COM object below.
|
||||
}
|
||||
|
||||
if (mxAccessComObject is not null && Marshal.IsComObject(mxAccessComObject))
|
||||
{
|
||||
@@ -535,13 +542,15 @@ public sealed class MxAccessSession : IDisposable
|
||||
|
||||
private void DisposeCore(ICollection<MxAccessShutdownFailure>? failures)
|
||||
{
|
||||
Exception? detachException = null;
|
||||
try
|
||||
{
|
||||
eventSink.Detach();
|
||||
}
|
||||
catch (Exception exception) when (failures is not null)
|
||||
catch (Exception exception)
|
||||
{
|
||||
failures.Add(new MxAccessShutdownFailure(
|
||||
detachException = exception;
|
||||
failures?.Add(new MxAccessShutdownFailure(
|
||||
"DetachEvents",
|
||||
serverHandle: null,
|
||||
itemHandle: null,
|
||||
@@ -565,6 +574,10 @@ public sealed class MxAccessSession : IDisposable
|
||||
}
|
||||
|
||||
disposed = true;
|
||||
if (detachException is not null && failures is null)
|
||||
{
|
||||
throw detachException;
|
||||
}
|
||||
}
|
||||
|
||||
private void ThrowIfDisposed()
|
||||
|
||||
@@ -127,6 +127,16 @@ public sealed class MxAccessStaSession : IWorkerRuntimeSession
|
||||
return eventQueue.Drain(maxEvents);
|
||||
}
|
||||
|
||||
public WorkerFault? DrainFault()
|
||||
{
|
||||
return eventQueue.DrainFault();
|
||||
}
|
||||
|
||||
public bool CancelCommand(string correlationId)
|
||||
{
|
||||
return commandDispatcher?.CancelQueuedCommand(correlationId) ?? false;
|
||||
}
|
||||
|
||||
public Task<IReadOnlyList<RegisteredServerHandle>> GetRegisteredServerHandlesAsync(
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
@@ -207,7 +217,14 @@ public sealed class MxAccessStaSession : IWorkerRuntimeSession
|
||||
throw new TimeoutException($"MXAccess graceful shutdown exceeded {timeout}.");
|
||||
}
|
||||
|
||||
result = await cleanupTask.ConfigureAwait(false);
|
||||
try
|
||||
{
|
||||
result = await cleanupTask.ConfigureAwait(false);
|
||||
}
|
||||
catch (OperationCanceledException) when (!cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
throw new TimeoutException($"MXAccess graceful shutdown exceeded {timeout}.");
|
||||
}
|
||||
}
|
||||
|
||||
TimeSpan remaining = timeout - stopwatch.Elapsed;
|
||||
@@ -232,7 +249,17 @@ public sealed class MxAccessStaSession : IWorkerRuntimeSession
|
||||
|
||||
if (session is not null)
|
||||
{
|
||||
staRuntime.InvokeAsync(() => session.Dispose()).GetAwaiter().GetResult();
|
||||
try
|
||||
{
|
||||
staRuntime.InvokeAsync(() => session.Dispose())
|
||||
.Wait(TimeSpan.FromSeconds(2));
|
||||
}
|
||||
catch (AggregateException)
|
||||
{
|
||||
}
|
||||
catch (ObjectDisposedException)
|
||||
{
|
||||
}
|
||||
}
|
||||
|
||||
staRuntime.Dispose();
|
||||
|
||||
@@ -8,10 +8,13 @@ namespace MxGateway.Worker.Sta;
|
||||
|
||||
public sealed class StaCommandDispatcher
|
||||
{
|
||||
public const int DefaultMaxPendingCommands = 128;
|
||||
|
||||
private readonly HResultConverter hresultConverter;
|
||||
private readonly IStaCommandExecutor commandExecutor;
|
||||
private readonly Queue<QueuedStaCommand> commandQueue = new();
|
||||
private readonly StaRuntime staRuntime;
|
||||
private readonly int maxPendingCommands;
|
||||
private readonly object gate = new();
|
||||
private bool drainActive;
|
||||
private bool shutdownRequested;
|
||||
@@ -28,10 +31,27 @@ public sealed class StaCommandDispatcher
|
||||
StaRuntime staRuntime,
|
||||
IStaCommandExecutor commandExecutor,
|
||||
HResultConverter hresultConverter)
|
||||
: this(staRuntime, commandExecutor, hresultConverter, DefaultMaxPendingCommands)
|
||||
{
|
||||
}
|
||||
|
||||
public StaCommandDispatcher(
|
||||
StaRuntime staRuntime,
|
||||
IStaCommandExecutor commandExecutor,
|
||||
HResultConverter hresultConverter,
|
||||
int maxPendingCommands)
|
||||
{
|
||||
if (maxPendingCommands <= 0)
|
||||
{
|
||||
throw new ArgumentOutOfRangeException(
|
||||
nameof(maxPendingCommands),
|
||||
"Max pending STA commands must be greater than zero.");
|
||||
}
|
||||
|
||||
this.staRuntime = staRuntime ?? throw new ArgumentNullException(nameof(staRuntime));
|
||||
this.commandExecutor = commandExecutor ?? throw new ArgumentNullException(nameof(commandExecutor));
|
||||
this.hresultConverter = hresultConverter ?? throw new ArgumentNullException(nameof(hresultConverter));
|
||||
this.maxPendingCommands = maxPendingCommands;
|
||||
}
|
||||
|
||||
public int PendingCommandCount
|
||||
@@ -73,6 +93,14 @@ public sealed class StaCommandDispatcher
|
||||
"The STA command dispatcher is shutting down."));
|
||||
}
|
||||
|
||||
if (commandQueue.Count >= maxPendingCommands)
|
||||
{
|
||||
return Task.FromResult(CreateRejectedReply(
|
||||
command,
|
||||
ProtocolStatusCode.WorkerUnavailable,
|
||||
$"The STA command dispatcher already has {maxPendingCommands} pending command(s)."));
|
||||
}
|
||||
|
||||
QueuedStaCommand queuedCommand = new(command);
|
||||
commandQueue.Enqueue(queuedCommand);
|
||||
|
||||
@@ -86,6 +114,51 @@ public sealed class StaCommandDispatcher
|
||||
}
|
||||
}
|
||||
|
||||
public bool CancelQueuedCommand(string correlationId)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(correlationId))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
lock (gate)
|
||||
{
|
||||
if (commandQueue.Count == 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
bool canceled = false;
|
||||
Queue<QueuedStaCommand> retainedCommands = new(commandQueue.Count);
|
||||
while (commandQueue.Count > 0)
|
||||
{
|
||||
QueuedStaCommand queuedCommand = commandQueue.Dequeue();
|
||||
if (!canceled
|
||||
&& string.Equals(
|
||||
queuedCommand.Command.CorrelationId,
|
||||
correlationId,
|
||||
StringComparison.Ordinal))
|
||||
{
|
||||
queuedCommand.Complete(CreateRejectedReply(
|
||||
queuedCommand.Command,
|
||||
ProtocolStatusCode.Canceled,
|
||||
"The STA command was canceled before execution."));
|
||||
canceled = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
retainedCommands.Enqueue(queuedCommand);
|
||||
}
|
||||
|
||||
while (retainedCommands.Count > 0)
|
||||
{
|
||||
commandQueue.Enqueue(retainedCommands.Dequeue());
|
||||
}
|
||||
|
||||
return canceled;
|
||||
}
|
||||
}
|
||||
|
||||
public void RequestShutdown()
|
||||
{
|
||||
lock (gate)
|
||||
|
||||
Reference in New Issue
Block a user