Compare commits
6 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| bd3096533d | |||
| 6eb9ea9105 | |||
| 555fe4c0ba | |||
| 89043cb2b6 | |||
| 1764eff1cf | |||
| fe9044115b |
@@ -32,7 +32,7 @@ dotnet test src/MxGateway.Worker.Tests/MxGateway.Worker.Tests.csproj -p:Platform
|
||||
dotnet run --project src/MxGateway.Server/MxGateway.Server.csproj
|
||||
|
||||
# API-key admin CLI (same exe, "apikey" subcommand)
|
||||
dotnet run --project src/MxGateway.Server/MxGateway.Server.csproj -- apikey create --display-name "dev" --scopes session,invoke,event,metadata,admin
|
||||
dotnet run --project src/MxGateway.Server/MxGateway.Server.csproj -- apikey create --display-name "dev" --scopes session:open,session:close,invoke:read,invoke:write,invoke:secure,events:read,metadata:read,admin
|
||||
```
|
||||
|
||||
Single test by name (xUnit `--filter`):
|
||||
@@ -114,7 +114,7 @@ External analysis sources referenced by design docs:
|
||||
|
||||
## Authentication
|
||||
|
||||
Gateway gRPC clients authenticate with an API key in metadata: `authorization: Bearer mxgw_<key-id>_<secret>`. Keys are stored hashed (with a peppered SHA) in a gateway-owned SQLite DB (default `C:\ProgramData\MxGateway\gateway-auth.db`). Scopes (`session`, `invoke`, `event`, `metadata`, `admin`) gate specific RPCs; missing → `Unauthenticated`, insufficient → `PermissionDenied`. The `apikey` subcommand on the server exe manages keys; see `src/MxGateway.Server/Security/Authentication/`.
|
||||
Gateway gRPC clients authenticate with an API key in metadata: `authorization: Bearer mxgw_<key-id>_<secret>`. Keys are stored hashed (with a peppered SHA) in a gateway-owned SQLite DB (default `C:\ProgramData\MxGateway\gateway-auth.db`). Scopes (`session:open`, `session:close`, `invoke:read`, `invoke:write`, `invoke:secure`, `events:read`, `metadata:read`, `admin`) gate specific RPCs; missing → `Unauthenticated`, insufficient → `PermissionDenied`. The `apikey` subcommand on the server exe manages keys; see `src/MxGateway.Server/Security/Authentication/`.
|
||||
|
||||
Dashboard auth uses the same verifier but exchanges the API key for an HTTP-only secure cookie at `/dashboard/login`. `Dashboard:AllowAnonymousLocalhost` bypasses cookie auth on loopback when explicitly enabled.
|
||||
|
||||
|
||||
@@ -122,7 +122,10 @@ public static class MxGatewayClientCli
|
||||
}
|
||||
catch (Exception exception) when (exception is not OperationCanceledException)
|
||||
{
|
||||
string? apiKey = arguments.GetOptional("api-key");
|
||||
// Redact the effective API key — whether it came from --api-key or from
|
||||
// the (documented default) --api-key-env environment variable — so a
|
||||
// transport error message that echoes the bearer token is never printed.
|
||||
string? apiKey = TryResolveApiKey(arguments);
|
||||
string message = MxGatewayCliSecretRedactor.Redact(exception.Message, apiKey);
|
||||
|
||||
if (arguments.HasFlag("json"))
|
||||
@@ -167,6 +170,27 @@ public static class MxGatewayClientCli
|
||||
}
|
||||
|
||||
private static string ResolveApiKey(CliArguments arguments)
|
||||
{
|
||||
string? apiKey = TryResolveApiKey(arguments);
|
||||
if (!string.IsNullOrWhiteSpace(apiKey))
|
||||
{
|
||||
return apiKey;
|
||||
}
|
||||
|
||||
string apiKeyEnvironmentName = arguments.GetOptional("api-key-env")
|
||||
?? "MXGATEWAY_API_KEY";
|
||||
|
||||
throw new ArgumentException(
|
||||
$"Gateway API key is required. Pass --api-key or set {apiKeyEnvironmentName}.");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Resolves the effective API key from <c>--api-key</c> or, failing that, the
|
||||
/// environment variable named by <c>--api-key-env</c> (default
|
||||
/// <c>MXGATEWAY_API_KEY</c>). Returns <see langword="null"/> when no key is
|
||||
/// configured; used for redaction where a missing key must not throw.
|
||||
/// </summary>
|
||||
private static string? TryResolveApiKey(CliArguments arguments)
|
||||
{
|
||||
string? apiKey = arguments.GetOptional("api-key");
|
||||
if (!string.IsNullOrWhiteSpace(apiKey))
|
||||
@@ -177,14 +201,7 @@ public static class MxGatewayClientCli
|
||||
string apiKeyEnvironmentName = arguments.GetOptional("api-key-env")
|
||||
?? "MXGATEWAY_API_KEY";
|
||||
|
||||
apiKey = Environment.GetEnvironmentVariable(apiKeyEnvironmentName);
|
||||
if (!string.IsNullOrWhiteSpace(apiKey))
|
||||
{
|
||||
return apiKey;
|
||||
}
|
||||
|
||||
throw new ArgumentException(
|
||||
$"Gateway API key is required. Pass --api-key or set {apiKeyEnvironmentName}.");
|
||||
return Environment.GetEnvironmentVariable(apiKeyEnvironmentName);
|
||||
}
|
||||
|
||||
private static CancellationTokenSource CreateCancellation(CliArguments arguments, string command)
|
||||
|
||||
@@ -106,6 +106,43 @@ public sealed class MxGatewayClientCliTests
|
||||
Assert.Contains("[redacted]", error.ToString());
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verifies that error output redacts the API key even when it was sourced from
|
||||
/// the <c>--api-key-env</c> environment variable rather than passed via
|
||||
/// <c>--api-key</c> — the documented default credential path.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task RunAsync_ErrorOutput_RedactsApiKey_WhenSourcedFromEnvironmentVariable()
|
||||
{
|
||||
const string environmentVariableName = "MXGATEWAY_TEST_API_KEY_REDACT";
|
||||
using var output = new StringWriter();
|
||||
using var error = new StringWriter();
|
||||
|
||||
Environment.SetEnvironmentVariable(environmentVariableName, "env-secret-api-key");
|
||||
try
|
||||
{
|
||||
int exitCode = await MxGatewayClientCli.RunAsync(
|
||||
[
|
||||
"open-session",
|
||||
"--endpoint",
|
||||
"http://localhost:5000",
|
||||
"--api-key-env",
|
||||
environmentVariableName,
|
||||
],
|
||||
output,
|
||||
error,
|
||||
_ => throw new InvalidOperationException("boom env-secret-api-key"));
|
||||
|
||||
Assert.Equal(1, exitCode);
|
||||
Assert.DoesNotContain("env-secret-api-key", error.ToString());
|
||||
Assert.Contains("[redacted]", error.ToString());
|
||||
}
|
||||
finally
|
||||
{
|
||||
Environment.SetEnvironmentVariable(environmentVariableName, null);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>Verifies that stream-events with max-events limit stops output in non-JSON format.</summary>
|
||||
[Fact]
|
||||
public async Task RunAsync_StreamEvents_WithMaxEventsStopsNonJsonOutput()
|
||||
|
||||
@@ -378,6 +378,84 @@ public sealed class MxGatewayClientSessionTests
|
||||
Assert.Equal(cancellation.Token, Assert.Single(transport.InvokeCalls).CallOptions.CancellationToken);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verifies that a client-imposed <see cref="StatusCode.DeadlineExceeded"/> is not
|
||||
/// retried. The deadline budget is shared across the whole safe-unary operation, so
|
||||
/// an immediate retry would only fail again — the call must surface the failure.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task InvokeAsync_DoesNotRetrySafeDiagnosticCommand_OnDeadlineExceeded()
|
||||
{
|
||||
FakeGatewayTransport transport = CreateTransport();
|
||||
transport.InvokeExceptions.Enqueue(
|
||||
new RpcException(new Status(StatusCode.DeadlineExceeded, "deadline exceeded")));
|
||||
transport.AddInvokeReply(new MxCommandReply
|
||||
{
|
||||
SessionId = "session-fixture",
|
||||
Kind = MxCommandKind.Ping,
|
||||
ProtocolStatus = new ProtocolStatus { Code = ProtocolStatusCode.Ok },
|
||||
});
|
||||
await using MxGatewayClient client = CreateClient(transport);
|
||||
MxGatewaySession session = await client.OpenSessionAsync();
|
||||
|
||||
await Assert.ThrowsAsync<RpcException>(async () => await session.InvokeAsync(
|
||||
new MxCommandRequest
|
||||
{
|
||||
SessionId = session.SessionId,
|
||||
Command = new MxCommand { Kind = MxCommandKind.Ping, Ping = new PingCommand() },
|
||||
}));
|
||||
|
||||
Assert.Single(transport.InvokeCalls);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verifies that a successful register reply missing the typed <c>register</c>
|
||||
/// payload throws a descriptive <see cref="MxGatewayException"/> rather than
|
||||
/// silently returning a zero server handle.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task RegisterAsync_Throws_WhenSuccessfulReplyMissingPayload()
|
||||
{
|
||||
FakeGatewayTransport transport = CreateTransport();
|
||||
transport.AddInvokeReply(new MxCommandReply
|
||||
{
|
||||
SessionId = "session-fixture",
|
||||
Kind = MxCommandKind.Register,
|
||||
ProtocolStatus = new ProtocolStatus { Code = ProtocolStatusCode.Ok },
|
||||
});
|
||||
await using MxGatewayClient client = CreateClient(transport);
|
||||
MxGatewaySession session = await client.OpenSessionAsync();
|
||||
|
||||
MxGatewayException exception = await Assert.ThrowsAsync<MxGatewayException>(
|
||||
async () => await session.RegisterAsync("client-name"));
|
||||
|
||||
Assert.Contains("register", exception.Message, StringComparison.Ordinal);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verifies that a successful add-item reply missing the typed <c>add_item</c>
|
||||
/// payload throws a descriptive <see cref="MxGatewayException"/> rather than
|
||||
/// silently returning a zero item handle.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task AddItemAsync_Throws_WhenSuccessfulReplyMissingPayload()
|
||||
{
|
||||
FakeGatewayTransport transport = CreateTransport();
|
||||
transport.AddInvokeReply(new MxCommandReply
|
||||
{
|
||||
SessionId = "session-fixture",
|
||||
Kind = MxCommandKind.AddItem,
|
||||
ProtocolStatus = new ProtocolStatus { Code = ProtocolStatusCode.Ok },
|
||||
});
|
||||
await using MxGatewayClient client = CreateClient(transport);
|
||||
MxGatewaySession session = await client.OpenSessionAsync();
|
||||
|
||||
MxGatewayException exception = await Assert.ThrowsAsync<MxGatewayException>(
|
||||
async () => await session.AddItemAsync(1, "Area.Pump.Speed"));
|
||||
|
||||
Assert.Contains("add_item", exception.Message, StringComparison.Ordinal);
|
||||
}
|
||||
|
||||
private static MxGatewayClient CreateClient(FakeGatewayTransport transport)
|
||||
{
|
||||
return new MxGatewayClient(transport.Options, transport);
|
||||
|
||||
@@ -184,9 +184,10 @@ public sealed class MxGatewayClient : IAsyncDisposable
|
||||
|
||||
/// <summary>
|
||||
/// Acknowledges an active MXAccess alarm condition through the gateway. The
|
||||
/// gateway authenticates the request against the API key's <c>invoke:alarm-ack</c>
|
||||
/// scope and forwards the acknowledge to the worker's MXAccess session;
|
||||
/// the resulting <see cref="MxStatusProxy"/> is returned in the reply.
|
||||
/// gateway authorizes <see cref="AcknowledgeAlarmRequest"/> against the API
|
||||
/// key's <c>admin</c> scope (there is no finer-grained alarm-ack sub-scope)
|
||||
/// and forwards the acknowledge to the worker's MXAccess session; the
|
||||
/// resulting <see cref="MxStatusProxy"/> is returned in the reply.
|
||||
/// </summary>
|
||||
/// <param name="request">The acknowledge request — alarm reference, comment, operator user.</param>
|
||||
/// <param name="cancellationToken">Cancellation token for the operation.</param>
|
||||
|
||||
@@ -7,9 +7,19 @@ namespace MxGateway.Client;
|
||||
/// </summary>
|
||||
public static class MxGatewayClientContractInfo
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets the gateway gRPC protocol version compiled into this client package.
|
||||
/// A client and gateway are wire-compatible only when this value matches the
|
||||
/// gateway's advertised gateway protocol version.
|
||||
/// </summary>
|
||||
public const uint GatewayProtocolVersion =
|
||||
GatewayContractInfo.GatewayProtocolVersion;
|
||||
|
||||
/// <summary>
|
||||
/// Gets the worker frame protocol version compiled into this client package.
|
||||
/// Exposed for diagnostics so callers can report the worker protocol the
|
||||
/// shared contracts were generated against.
|
||||
/// </summary>
|
||||
public const uint WorkerProtocolVersion =
|
||||
GatewayContractInfo.WorkerProtocolVersion;
|
||||
}
|
||||
|
||||
@@ -38,7 +38,12 @@ public sealed class MxGatewayClientOptions
|
||||
public TimeSpan ConnectTimeout { get; init; } = TimeSpan.FromSeconds(10);
|
||||
|
||||
/// <summary>
|
||||
/// Gets the default timeout for unary gRPC calls.
|
||||
/// Gets the timeout budget for a unary gRPC operation. This is both the gRPC
|
||||
/// deadline stamped on each individual attempt and the overall budget for the
|
||||
/// whole safe-unary operation: for retryable calls the initial attempt, every
|
||||
/// retry, and the backoff delays between them all share this single budget.
|
||||
/// It is therefore an upper bound on the total wall-clock time a safe-unary
|
||||
/// call can take, not a fresh per-retry allowance.
|
||||
/// </summary>
|
||||
public TimeSpan DefaultCallTimeout { get; init; } = TimeSpan.FromSeconds(30);
|
||||
|
||||
@@ -47,6 +52,11 @@ public sealed class MxGatewayClientOptions
|
||||
/// </summary>
|
||||
public TimeSpan? StreamTimeout { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the maximum size, in bytes, of a single gRPC message the client will
|
||||
/// send or receive. Applied to both the send and receive limits of the
|
||||
/// underlying channel. Defaults to 16 MiB.
|
||||
/// </summary>
|
||||
public int MaxGrpcMessageBytes { get; init; } = 16 * 1024 * 1024;
|
||||
|
||||
/// <summary>
|
||||
|
||||
@@ -61,8 +61,13 @@ internal static class MxGatewayClientRetryPolicy
|
||||
|
||||
private static bool IsTransientStatus(StatusCode statusCode)
|
||||
{
|
||||
// DeadlineExceeded is intentionally NOT treated as transient. The deadline
|
||||
// on every unary call is client-imposed (CreateCallOptions stamps the
|
||||
// DefaultCallTimeout budget), and that same budget is shared across the
|
||||
// initial attempt plus all retries plus backoff. A DeadlineExceeded means
|
||||
// the shared budget is exhausted, so an immediate retry would only fail
|
||||
// again — burning the remaining budget on a call that cannot succeed.
|
||||
return statusCode is StatusCode.Unavailable
|
||||
or StatusCode.DeadlineExceeded
|
||||
or StatusCode.ResourceExhausted;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -101,7 +101,8 @@ public sealed class MxGatewaySession : IAsyncDisposable
|
||||
MxCommandReply reply = await RegisterRawAsync(clientName, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
reply.EnsureProtocolSuccess().EnsureMxAccessSuccess();
|
||||
return reply.Register?.ServerHandle ?? reply.ReturnValue.Int32Value;
|
||||
return reply.Register?.ServerHandle
|
||||
?? throw CreateMissingPayloadException(reply, "register");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@@ -143,7 +144,8 @@ public sealed class MxGatewaySession : IAsyncDisposable
|
||||
cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
reply.EnsureProtocolSuccess().EnsureMxAccessSuccess();
|
||||
return reply.AddItem?.ItemHandle ?? reply.ReturnValue.Int32Value;
|
||||
return reply.AddItem?.ItemHandle
|
||||
?? throw CreateMissingPayloadException(reply, "add_item");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@@ -194,7 +196,8 @@ public sealed class MxGatewaySession : IAsyncDisposable
|
||||
cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
reply.EnsureProtocolSuccess().EnsureMxAccessSuccess();
|
||||
return reply.AddItem2?.ItemHandle ?? reply.ReturnValue.Int32Value;
|
||||
return reply.AddItem2?.ItemHandle
|
||||
?? throw CreateMissingPayloadException(reply, "add_item2");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@@ -723,4 +726,21 @@ public sealed class MxGatewaySession : IAsyncDisposable
|
||||
cancellationToken);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Builds the exception thrown when a command reply passed protocol and
|
||||
/// MXAccess success checks but is missing the typed handle-bearing payload
|
||||
/// the command contract requires. Surfacing this as a clear error avoids
|
||||
/// silently handing a zero handle to the caller (it would otherwise fall
|
||||
/// through to <see cref="MxCommandReply.ReturnValue"/>, which is 0 when the
|
||||
/// reply carries no return value).
|
||||
/// </summary>
|
||||
private static MxGatewayException CreateMissingPayloadException(
|
||||
MxCommandReply reply,
|
||||
string expectedPayload)
|
||||
{
|
||||
return new MxGatewayException(
|
||||
$"Gateway reply for command kind={reply.Kind} reported success but is missing "
|
||||
+ $"the required '{expectedPayload}' payload; cannot resolve a handle. "
|
||||
+ $"session={reply.SessionId}; correlation={reply.CorrelationId}");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -142,7 +142,8 @@ dotnet run --project clients/dotnet/MxGateway.Client.Cli -- smoke --endpoint htt
|
||||
`smoke` opens a session, registers a client, adds one item, advises it,
|
||||
optionally writes a value when `--type` and `--value` are supplied, reads a
|
||||
bounded event stream, and closes the session in a `finally` block. CLI error
|
||||
output redacts API keys supplied through `--api-key`.
|
||||
output redacts the effective API key, whether it was supplied through
|
||||
`--api-key` or resolved from the `--api-key-env` environment variable.
|
||||
|
||||
## Galaxy Repository Browse
|
||||
|
||||
|
||||
@@ -90,6 +90,19 @@ events may be lost. Raw protobuf messages remain available through the
|
||||
`errors.As` for `GatewayError`, `CommandError`, and `MxAccessError`; command
|
||||
errors preserve the raw reply.
|
||||
|
||||
`Dial` and `DialGalaxy` create the connection lazily (`grpc.NewClient`): a
|
||||
gateway that is briefly unavailable no longer turns into a hard error — the
|
||||
connection recovers once the gateway comes up. To keep fail-fast behavior,
|
||||
both run a readiness probe bounded by `DialTimeout` (default 10s, or the
|
||||
context deadline when sooner) and return a `*GatewayError` if the gateway
|
||||
cannot be reached in that window.
|
||||
|
||||
For retry, timeout, and auth handling, `GatewayError.Code()` exposes the
|
||||
wrapped gRPC `codes.Code`, and `mxgateway.IsTransient(err)` reports whether a
|
||||
failure (`Unavailable`, `DeadlineExceeded`, `ResourceExhausted`, `Aborted`)
|
||||
may succeed on retry — so callers do not have to unwrap the error and call
|
||||
`status.Code` themselves.
|
||||
|
||||
## Galaxy Repository browse
|
||||
|
||||
The `GalaxyRepository` service (proto package `galaxy_repository.v1`) is a
|
||||
|
||||
@@ -150,8 +150,8 @@ func TestQueryActiveAlarmsPassesFilterPrefix(t *testing.T) {
|
||||
defer cleanup()
|
||||
|
||||
stream, err := client.QueryActiveAlarms(context.Background(), &pb.QueryActiveAlarmsRequest{
|
||||
SessionId: "session-1",
|
||||
AlarmFilterPrefix: "Tank01.",
|
||||
SessionId: "session-1",
|
||||
AlarmFilterPrefix: "Tank01.",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("QueryActiveAlarms() error = %v", err)
|
||||
@@ -221,8 +221,10 @@ func newBufconnClientWithAlarms(t *testing.T, fake *fakeGatewayWithAlarms) (*Cli
|
||||
dialer := func(ctx context.Context, _ string) (net.Conn, error) {
|
||||
return listener.DialContext(ctx)
|
||||
}
|
||||
// grpc.NewClient defaults to the dns scheme; use passthrough so the
|
||||
// bufconn fake target reaches the context dialer unresolved.
|
||||
client, err := Dial(context.Background(), Options{
|
||||
Endpoint: "bufnet",
|
||||
Endpoint: "passthrough:///bufnet",
|
||||
APIKey: "test-api-key",
|
||||
Plaintext: true,
|
||||
DialOptions: []grpc.DialOption{grpc.WithContextDialer(dialer)},
|
||||
|
||||
@@ -19,6 +19,7 @@ import (
|
||||
|
||||
pb "gitea.dohertylan.com/dohertj2/mxaccessgw/clients/go/internal/generated"
|
||||
"google.golang.org/grpc"
|
||||
"google.golang.org/grpc/connectivity"
|
||||
"google.golang.org/grpc/credentials"
|
||||
"google.golang.org/grpc/credentials/insecure"
|
||||
"google.golang.org/protobuf/types/known/durationpb"
|
||||
@@ -36,22 +37,36 @@ type Client struct {
|
||||
opts Options
|
||||
}
|
||||
|
||||
// Dial opens a gRPC connection to the gateway and configures auth metadata,
|
||||
// transport security, and blocking dial cancellation from ctx.
|
||||
// Dial opens a gRPC connection to the gateway and configures auth metadata
|
||||
// and transport security.
|
||||
//
|
||||
// The connection is created lazily with grpc.NewClient: the channel is not
|
||||
// established until the first RPC (or the readiness probe below) needs it, so
|
||||
// a gateway that is briefly unavailable at Dial time no longer turns into a
|
||||
// hard error — the connection recovers when the gateway comes up. To preserve
|
||||
// fail-fast behavior, Dial then runs an explicit readiness probe bounded by
|
||||
// DialTimeout (default 10s, or ctx's deadline when sooner): it triggers the
|
||||
// initial connect and waits for the channel to reach Ready, returning a
|
||||
// *GatewayError if the gateway cannot be reached in that window. Cancelling
|
||||
// ctx aborts the probe.
|
||||
func Dial(ctx context.Context, opts Options) (*Client, error) {
|
||||
conn, err := dial(ctx, opts)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return NewClient(conn, opts), nil
|
||||
}
|
||||
|
||||
// dial builds the shared gRPC connection used by both Client and GalaxyClient:
|
||||
// it resolves transport credentials, assembles dial options, creates a lazy
|
||||
// connection with grpc.NewClient, and runs the DialTimeout-bounded readiness
|
||||
// probe so callers still fail fast when the gateway is unreachable.
|
||||
func dial(ctx context.Context, opts Options) (*grpc.ClientConn, error) {
|
||||
if opts.Endpoint == "" {
|
||||
return nil, errors.New("mxgateway: endpoint is required")
|
||||
}
|
||||
|
||||
dialCtx := ctx
|
||||
cancel := func() {}
|
||||
if opts.DialTimeout > 0 {
|
||||
dialCtx, cancel = context.WithTimeout(ctx, opts.DialTimeout)
|
||||
} else if _, ok := ctx.Deadline(); !ok {
|
||||
dialCtx, cancel = context.WithTimeout(ctx, defaultDialTimeout)
|
||||
}
|
||||
defer cancel()
|
||||
|
||||
transportCredentials, err := resolveTransportCredentials(opts)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -61,16 +76,46 @@ func Dial(ctx context.Context, opts Options) (*Client, error) {
|
||||
grpc.WithTransportCredentials(transportCredentials),
|
||||
grpc.WithUnaryInterceptor(unaryAuthInterceptor(opts.APIKey)),
|
||||
grpc.WithStreamInterceptor(streamAuthInterceptor(opts.APIKey)),
|
||||
grpc.WithBlock(),
|
||||
}
|
||||
dialOptions = append(dialOptions, opts.DialOptions...)
|
||||
|
||||
conn, err := grpc.DialContext(dialCtx, opts.Endpoint, dialOptions...)
|
||||
conn, err := grpc.NewClient(opts.Endpoint, dialOptions...)
|
||||
if err != nil {
|
||||
return nil, &GatewayError{Op: "dial", Err: err}
|
||||
}
|
||||
|
||||
return NewClient(conn, opts), nil
|
||||
if err := waitForReady(ctx, conn, opts.DialTimeout); err != nil {
|
||||
_ = conn.Close()
|
||||
return nil, &GatewayError{Op: "dial", Err: err}
|
||||
}
|
||||
|
||||
return conn, nil
|
||||
}
|
||||
|
||||
// waitForReady triggers the initial connect on conn and blocks until the
|
||||
// channel reaches connectivity.Ready, the timeout elapses, or ctx is
|
||||
// cancelled. The wait is bounded by dialTimeout when positive, otherwise by
|
||||
// ctx's existing deadline, otherwise by defaultDialTimeout.
|
||||
func waitForReady(ctx context.Context, conn *grpc.ClientConn, dialTimeout time.Duration) error {
|
||||
probeCtx := ctx
|
||||
cancel := func() {}
|
||||
if dialTimeout > 0 {
|
||||
probeCtx, cancel = context.WithTimeout(ctx, dialTimeout)
|
||||
} else if _, ok := ctx.Deadline(); !ok {
|
||||
probeCtx, cancel = context.WithTimeout(ctx, defaultDialTimeout)
|
||||
}
|
||||
defer cancel()
|
||||
|
||||
conn.Connect()
|
||||
for {
|
||||
state := conn.GetState()
|
||||
if state == connectivity.Ready {
|
||||
return nil
|
||||
}
|
||||
if !conn.WaitForStateChange(probeCtx, state) {
|
||||
return probeCtx.Err()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// NewClient wraps an existing gRPC connection. The caller owns closing conn
|
||||
@@ -188,7 +233,15 @@ func (c *Client) Close() error {
|
||||
}
|
||||
|
||||
func (c *Client) callContext(ctx context.Context) (context.Context, context.CancelFunc) {
|
||||
timeout := c.opts.CallTimeout
|
||||
return callContext(ctx, c.opts.CallTimeout)
|
||||
}
|
||||
|
||||
// callContext derives a per-RPC context from ctx, applying callTimeout: zero
|
||||
// uses defaultCallTimeout, a negative value disables the bound entirely, and a
|
||||
// caller-supplied deadline that is already sooner than the derived timeout is
|
||||
// kept as-is rather than being lengthened.
|
||||
func callContext(ctx context.Context, callTimeout time.Duration) (context.Context, context.CancelFunc) {
|
||||
timeout := callTimeout
|
||||
if timeout == 0 {
|
||||
timeout = defaultCallTimeout
|
||||
}
|
||||
|
||||
@@ -292,8 +292,11 @@ func newBufconnClient(t *testing.T, fake *fakeGatewayServer) (*Client, func()) {
|
||||
dialer := func(ctx context.Context, _ string) (net.Conn, error) {
|
||||
return listener.DialContext(ctx)
|
||||
}
|
||||
// grpc.NewClient defaults the target scheme to dns; the bufconn fake name
|
||||
// is not DNS-resolvable, so use the passthrough scheme to hand the target
|
||||
// straight to the context dialer.
|
||||
client, err := Dial(context.Background(), Options{
|
||||
Endpoint: "bufnet",
|
||||
Endpoint: "passthrough:///bufnet",
|
||||
APIKey: "test-api-key",
|
||||
Plaintext: true,
|
||||
DialOptions: []grpc.DialOption{
|
||||
|
||||
@@ -0,0 +1,401 @@
|
||||
package mxgateway
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"errors"
|
||||
"net"
|
||||
"reflect"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
pb "gitea.dohertylan.com/dohertj2/mxaccessgw/clients/go/internal/generated"
|
||||
"google.golang.org/grpc"
|
||||
"google.golang.org/grpc/codes"
|
||||
"google.golang.org/grpc/credentials/insecure"
|
||||
"google.golang.org/grpc/status"
|
||||
"google.golang.org/protobuf/types/known/timestamppb"
|
||||
)
|
||||
|
||||
// --- Client.Go-008: resolveTransportCredentials precedence -----------------
|
||||
|
||||
// TestResolveTransportCredentialsPrecedence covers every branch of
|
||||
// resolveTransportCredentials, which previously only had the Plaintext path
|
||||
// exercised.
|
||||
func TestResolveTransportCredentialsPrecedence(t *testing.T) {
|
||||
custom := insecure.NewCredentials()
|
||||
|
||||
t.Run("TransportCredentialsWins", func(t *testing.T) {
|
||||
creds, err := resolveTransportCredentials(Options{
|
||||
TransportCredentials: custom,
|
||||
Plaintext: true, // must be ignored
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if creds != custom {
|
||||
t.Fatal("expected the explicit TransportCredentials to be returned as-is")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("Plaintext", func(t *testing.T) {
|
||||
creds, err := resolveTransportCredentials(Options{Plaintext: true})
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if got := creds.Info().SecurityProtocol; got != "insecure" {
|
||||
t.Fatalf("expected insecure credentials, got security protocol %q", got)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("CACertFileMissingErrors", func(t *testing.T) {
|
||||
_, err := resolveTransportCredentials(Options{CACertFile: "does-not-exist.pem"})
|
||||
if err == nil {
|
||||
t.Fatal("expected an error for a missing CA cert file")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("TLSConfigWithServerNameOverride", func(t *testing.T) {
|
||||
creds, err := resolveTransportCredentials(Options{
|
||||
TLSConfig: &tls.Config{MinVersion: tls.VersionTLS13},
|
||||
ServerNameOverride: "gateway.internal",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if got := creds.Info().ServerName; got != "gateway.internal" {
|
||||
t.Fatalf("expected ServerName override to be applied, got %q", got)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("DefaultTLSFloor", func(t *testing.T) {
|
||||
creds, err := resolveTransportCredentials(Options{ServerNameOverride: "host"})
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if got := creds.Info().SecurityProtocol; got != "tls" {
|
||||
t.Fatalf("expected the default TLS credentials, got %q", got)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// TestResolveTransportCredentialsDoesNotMutateTLSConfig confirms the supplied
|
||||
// TLSConfig is cloned, not mutated, when ServerNameOverride is applied.
|
||||
func TestResolveTransportCredentialsDoesNotMutateTLSConfig(t *testing.T) {
|
||||
cfg := &tls.Config{MinVersion: tls.VersionTLS12}
|
||||
if _, err := resolveTransportCredentials(Options{
|
||||
TLSConfig: cfg,
|
||||
ServerNameOverride: "override",
|
||||
}); err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if cfg.ServerName != "" {
|
||||
t.Fatalf("resolveTransportCredentials mutated the caller's TLSConfig (ServerName=%q)", cfg.ServerName)
|
||||
}
|
||||
}
|
||||
|
||||
// --- Client.Go-008: callContext deadline arithmetic ------------------------
|
||||
|
||||
// TestCallContextDeadlineArithmetic covers the shared callContext deadline
|
||||
// logic, including the negative-timeout disable case and the
|
||||
// caller-deadline-is-sooner case.
|
||||
func TestCallContextDeadlineArithmetic(t *testing.T) {
|
||||
t.Run("ZeroUsesDefault", func(t *testing.T) {
|
||||
ctx, cancel := callContext(context.Background(), 0)
|
||||
defer cancel()
|
||||
deadline, ok := ctx.Deadline()
|
||||
if !ok {
|
||||
t.Fatal("expected a deadline for the default timeout")
|
||||
}
|
||||
remaining := time.Until(deadline)
|
||||
if remaining <= 0 || remaining > defaultCallTimeout+time.Second {
|
||||
t.Fatalf("default deadline out of range: %v", remaining)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("NegativeDisablesBound", func(t *testing.T) {
|
||||
base := context.Background()
|
||||
ctx, cancel := callContext(base, -1)
|
||||
defer cancel()
|
||||
if _, ok := ctx.Deadline(); ok {
|
||||
t.Fatal("a negative timeout must disable the deadline entirely")
|
||||
}
|
||||
if ctx != base {
|
||||
t.Fatal("a negative timeout must return the caller context unchanged")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("PositiveAppliesTimeout", func(t *testing.T) {
|
||||
ctx, cancel := callContext(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
deadline, ok := ctx.Deadline()
|
||||
if !ok {
|
||||
t.Fatal("expected a deadline")
|
||||
}
|
||||
remaining := time.Until(deadline)
|
||||
if remaining <= 0 || remaining > 5*time.Second+time.Second {
|
||||
t.Fatalf("deadline out of range: %v", remaining)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("CallerDeadlineSoonerIsKept", func(t *testing.T) {
|
||||
base, baseCancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
|
||||
defer baseCancel()
|
||||
ctx, cancel := callContext(base, 30*time.Second)
|
||||
defer cancel()
|
||||
if ctx != base {
|
||||
t.Fatal("a caller deadline sooner than the timeout must be kept as-is")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("CallerDeadlineLaterIsShortened", func(t *testing.T) {
|
||||
base, baseCancel := context.WithTimeout(context.Background(), time.Hour)
|
||||
defer baseCancel()
|
||||
ctx, cancel := callContext(base, time.Second)
|
||||
defer cancel()
|
||||
deadline, ok := ctx.Deadline()
|
||||
if !ok {
|
||||
t.Fatal("expected a deadline")
|
||||
}
|
||||
if remaining := time.Until(deadline); remaining > 2*time.Second {
|
||||
t.Fatalf("expected the shorter timeout to win, got %v remaining", remaining)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// --- Client.Go-008: NativeValue / NativeArray edge branches ----------------
|
||||
|
||||
// TestNativeValueEdgeKinds covers the array, raw-bytes, null, and
|
||||
// nil-input branches of NativeValue.
|
||||
func TestNativeValueEdgeKinds(t *testing.T) {
|
||||
t.Run("NilInput", func(t *testing.T) {
|
||||
got, err := NativeValue(nil)
|
||||
if err != nil || got != nil {
|
||||
t.Fatalf("NativeValue(nil) = (%v, %v), want (nil, nil)", got, err)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("ExplicitNull", func(t *testing.T) {
|
||||
got, err := NativeValue(&pb.MxValue{IsNull: true})
|
||||
if err != nil || got != nil {
|
||||
t.Fatalf("NativeValue(null) = (%v, %v), want (nil, nil)", got, err)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("RawBytes", func(t *testing.T) {
|
||||
raw := []byte{0x01, 0x02, 0x03}
|
||||
got, err := NativeValue(&pb.MxValue{Kind: &pb.MxValue_RawValue{RawValue: raw}})
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
gotBytes, ok := got.([]byte)
|
||||
if !ok || !reflect.DeepEqual(gotBytes, raw) {
|
||||
t.Fatalf("NativeValue raw = %v, want %v", got, raw)
|
||||
}
|
||||
// The result must be a copy, not aliasing the protobuf field.
|
||||
gotBytes[0] = 0xFF
|
||||
if raw[0] != 0x01 {
|
||||
t.Fatal("NativeValue raw result aliases the protobuf backing array")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("ArrayValue", func(t *testing.T) {
|
||||
value := &pb.MxValue{Kind: &pb.MxValue_ArrayValue{
|
||||
ArrayValue: &pb.MxArray{Values: &pb.MxArray_Int32Values{
|
||||
Int32Values: &pb.Int32Array{Values: []int32{7, 8}},
|
||||
}},
|
||||
}}
|
||||
got, err := NativeValue(value)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if !reflect.DeepEqual(got, []int32{7, 8}) {
|
||||
t.Fatalf("NativeValue array = %v, want [7 8]", got)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// TestNativeArrayEdgeKinds covers the nil, raw-bytes, timestamp-with-nil, and
|
||||
// unsupported-kind branches of NativeArray.
|
||||
func TestNativeArrayEdgeKinds(t *testing.T) {
|
||||
t.Run("NilInput", func(t *testing.T) {
|
||||
got, err := NativeArray(nil)
|
||||
if err != nil || got != nil {
|
||||
t.Fatalf("NativeArray(nil) = (%v, %v), want (nil, nil)", got, err)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("RawValues", func(t *testing.T) {
|
||||
got, err := NativeArray(&pb.MxArray{Values: &pb.MxArray_RawValues{
|
||||
RawValues: &pb.RawArray{Values: [][]byte{{0x0A}, {0x0B}}},
|
||||
}})
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
want := [][]byte{{0x0A}, {0x0B}}
|
||||
if !reflect.DeepEqual(got, want) {
|
||||
t.Fatalf("NativeArray raw = %v, want %v", got, want)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("TimestampWithNilEntry", func(t *testing.T) {
|
||||
got, err := NativeArray(&pb.MxArray{Values: &pb.MxArray_TimestampValues{
|
||||
TimestampValues: &pb.TimestampArray{Values: []*timestamppb.Timestamp{nil}},
|
||||
}})
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
times, ok := got.([]time.Time)
|
||||
if !ok || len(times) != 1 || !times[0].IsZero() {
|
||||
t.Fatalf("NativeArray timestamp-with-nil = %v, want [zero-time]", got)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("UnsupportedKind", func(t *testing.T) {
|
||||
// An MxArray with no oneof set hits the default branch.
|
||||
_, err := NativeArray(&pb.MxArray{})
|
||||
if err == nil {
|
||||
t.Fatal("expected an error for an MxArray with no values set")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "unsupported array value kind") {
|
||||
t.Fatalf("unexpected error text: %v", err)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// TestNativeValueUnsupportedKind covers the default branch of NativeValue.
|
||||
func TestNativeValueUnsupportedKind(t *testing.T) {
|
||||
// An MxValue with no oneof Kind set and IsNull false hits the default.
|
||||
_, err := NativeValue(&pb.MxValue{})
|
||||
if err == nil {
|
||||
t.Fatal("expected an error for an MxValue with no kind set")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "unsupported value kind") {
|
||||
t.Fatalf("unexpected error text: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// --- Client.Go-005: dial migration -----------------------------------------
|
||||
|
||||
// TestDialFailsFastWhenGatewayUnreachable confirms that after the migration to
|
||||
// grpc.NewClient the DialTimeout-bounded readiness probe still fails fast (and
|
||||
// wraps the failure in *GatewayError) when the gateway cannot be reached.
|
||||
func TestDialFailsFastWhenGatewayUnreachable(t *testing.T) {
|
||||
dialer := func(ctx context.Context, _ string) (net.Conn, error) {
|
||||
return nil, errors.New("connection refused")
|
||||
}
|
||||
start := time.Now()
|
||||
client, err := Dial(context.Background(), Options{
|
||||
Endpoint: "passthrough:///unreachable",
|
||||
APIKey: "k",
|
||||
Plaintext: true,
|
||||
DialTimeout: 500 * time.Millisecond,
|
||||
DialOptions: []grpc.DialOption{grpc.WithContextDialer(dialer)},
|
||||
})
|
||||
elapsed := time.Since(start)
|
||||
if err == nil {
|
||||
client.Close()
|
||||
t.Fatal("expected Dial to fail for an unreachable gateway")
|
||||
}
|
||||
var gwErr *GatewayError
|
||||
if !errors.As(err, &gwErr) || gwErr.Op != "dial" {
|
||||
t.Fatalf("expected a *GatewayError with Op=dial, got %#v", err)
|
||||
}
|
||||
if elapsed > 5*time.Second {
|
||||
t.Fatalf("Dial did not honor DialTimeout: took %v", elapsed)
|
||||
}
|
||||
}
|
||||
|
||||
// TestDialReadinessProbeReachesReady confirms the readiness probe succeeds
|
||||
// against a live (bufconn) gateway, i.e. the lazy grpc.NewClient connection is
|
||||
// driven to Ready before Dial returns.
|
||||
func TestDialReadinessProbeReachesReady(t *testing.T) {
|
||||
client, cleanup := newBufconnClient(t, &fakeGatewayServer{
|
||||
openReply: &pb.OpenSessionReply{},
|
||||
})
|
||||
defer cleanup()
|
||||
if client == nil {
|
||||
t.Fatal("expected a connected client")
|
||||
}
|
||||
}
|
||||
|
||||
// --- Client.Go-006: error taxonomy ----------------------------------------
|
||||
|
||||
// TestGatewayErrorCode confirms GatewayError.Code surfaces the wrapped gRPC
|
||||
// status code without the caller unwrapping it.
|
||||
func TestGatewayErrorCode(t *testing.T) {
|
||||
var nilErr *GatewayError
|
||||
if got := nilErr.Code(); got != codes.OK {
|
||||
t.Fatalf("nil GatewayError.Code() = %v, want OK", got)
|
||||
}
|
||||
|
||||
gwErr := &GatewayError{Op: "invoke", Err: status.Error(codes.Unavailable, "down")}
|
||||
if got := gwErr.Code(); got != codes.Unavailable {
|
||||
t.Fatalf("GatewayError.Code() = %v, want Unavailable", got)
|
||||
}
|
||||
|
||||
plain := &GatewayError{Op: "dial", Err: errors.New("boom")}
|
||||
if got := plain.Code(); got != codes.Unknown {
|
||||
t.Fatalf("GatewayError.Code() for a non-status error = %v, want Unknown", got)
|
||||
}
|
||||
}
|
||||
|
||||
// TestIsTransient verifies the transient/permanent classification including
|
||||
// the unwrap-through-GatewayError path.
|
||||
func TestIsTransient(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
err error
|
||||
want bool
|
||||
}{
|
||||
{name: "nil", err: nil, want: false},
|
||||
{name: "unavailable wrapped", err: &GatewayError{Op: "invoke", Err: status.Error(codes.Unavailable, "x")}, want: true},
|
||||
{name: "deadline wrapped", err: &GatewayError{Op: "invoke", Err: status.Error(codes.DeadlineExceeded, "x")}, want: true},
|
||||
{name: "resource exhausted", err: &GatewayError{Err: status.Error(codes.ResourceExhausted, "x")}, want: true},
|
||||
{name: "unauthenticated permanent", err: &GatewayError{Err: status.Error(codes.Unauthenticated, "x")}, want: false},
|
||||
{name: "invalid argument permanent", err: &GatewayError{Err: status.Error(codes.InvalidArgument, "x")}, want: false},
|
||||
{name: "bare status unavailable", err: status.Error(codes.Unavailable, "x"), want: true},
|
||||
{name: "plain error", err: errors.New("nope"), want: false},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if got := IsTransient(tt.err); got != tt.want {
|
||||
t.Fatalf("IsTransient(%v) = %v, want %v", tt.err, got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// --- Client.Go-007: correlation id fallback --------------------------------
|
||||
|
||||
// TestNewCorrelationIDUsesRandEntropy confirms the happy path yields a
|
||||
// 32-hex-character id.
|
||||
func TestNewCorrelationIDUsesRandEntropy(t *testing.T) {
|
||||
id := newCorrelationID()
|
||||
if len(id) != 32 {
|
||||
t.Fatalf("expected a 32-char hex id, got %q (len %d)", id, len(id))
|
||||
}
|
||||
}
|
||||
|
||||
// TestNewCorrelationIDFallsBackOnRandFailure reproduces Client.Go-007: when
|
||||
// crypto/rand fails, newCorrelationID must not return an empty string but a
|
||||
// unique, non-empty fallback id so the command stays traceable.
|
||||
func TestNewCorrelationIDFallsBackOnRandFailure(t *testing.T) {
|
||||
original := randRead
|
||||
randRead = func([]byte) (int, error) { return 0, errors.New("entropy unavailable") }
|
||||
defer func() { randRead = original }()
|
||||
|
||||
first := newCorrelationID()
|
||||
second := newCorrelationID()
|
||||
|
||||
if first == "" || second == "" {
|
||||
t.Fatal("newCorrelationID returned an empty id on rand failure")
|
||||
}
|
||||
if !strings.HasPrefix(first, "fallback-") {
|
||||
t.Fatalf("expected a fallback- prefixed id, got %q", first)
|
||||
}
|
||||
if first == second {
|
||||
t.Fatalf("fallback correlation ids must be unique, got %q twice", first)
|
||||
}
|
||||
}
|
||||
@@ -5,6 +5,8 @@ import (
|
||||
"fmt"
|
||||
|
||||
pb "gitea.dohertylan.com/dohertj2/mxaccessgw/clients/go/internal/generated"
|
||||
"google.golang.org/grpc/codes"
|
||||
"google.golang.org/grpc/status"
|
||||
)
|
||||
|
||||
// ErrEventBufferOverflow is the terminal error delivered on the compatibility
|
||||
@@ -42,6 +44,45 @@ func (e *GatewayError) Unwrap() error {
|
||||
return e.Err
|
||||
}
|
||||
|
||||
// Code returns the gRPC status code of the wrapped transport error. It returns
|
||||
// codes.OK when the error is nil and codes.Unknown when the wrapped error does
|
||||
// not carry a gRPC status. Callers can use it to write retry, timeout, and
|
||||
// auth handling without manually unwrapping and re-parsing the error.
|
||||
func (e *GatewayError) Code() codes.Code {
|
||||
if e == nil || e.Err == nil {
|
||||
return codes.OK
|
||||
}
|
||||
return status.Code(e.Err)
|
||||
}
|
||||
|
||||
// IsTransient reports whether err is a transport failure that may succeed on
|
||||
// retry — for example a gateway that is briefly Unavailable or a call that
|
||||
// hit a DeadlineExceeded. Permanent failures (Unauthenticated, PermissionDenied,
|
||||
// InvalidArgument, NotFound, and similar) return false. It unwraps through
|
||||
// *GatewayError and any other error chain carrying a gRPC status, so callers
|
||||
// do not need to call status.Code themselves.
|
||||
func IsTransient(err error) bool {
|
||||
if err == nil {
|
||||
return false
|
||||
}
|
||||
switch transientCode(err) {
|
||||
case codes.Unavailable, codes.DeadlineExceeded, codes.ResourceExhausted, codes.Aborted:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// transientCode extracts a gRPC status code from err, preferring a wrapped
|
||||
// *GatewayError's Code and otherwise falling back to status.Code on the chain.
|
||||
func transientCode(err error) codes.Code {
|
||||
var gatewayErr *GatewayError
|
||||
if errors.As(err, &gatewayErr) {
|
||||
return gatewayErr.Code()
|
||||
}
|
||||
return status.Code(err)
|
||||
}
|
||||
|
||||
// CommandError reports a non-OK gateway protocol status and keeps the raw
|
||||
// command reply when one exists.
|
||||
type CommandError struct {
|
||||
|
||||
@@ -2,7 +2,6 @@ package mxgateway
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"io"
|
||||
"time"
|
||||
|
||||
@@ -56,39 +55,13 @@ type GalaxyClient struct {
|
||||
|
||||
// DialGalaxy opens a gRPC connection to the gateway for the Galaxy Repository
|
||||
// service. It applies the same authentication metadata, transport security,
|
||||
// and dial-timeout behavior as Dial.
|
||||
// lazy connection, and DialTimeout-bounded readiness probe as Dial.
|
||||
func DialGalaxy(ctx context.Context, opts Options) (*GalaxyClient, error) {
|
||||
if opts.Endpoint == "" {
|
||||
return nil, errors.New("mxgateway: endpoint is required")
|
||||
}
|
||||
|
||||
dialCtx := ctx
|
||||
cancel := func() {}
|
||||
if opts.DialTimeout > 0 {
|
||||
dialCtx, cancel = context.WithTimeout(ctx, opts.DialTimeout)
|
||||
} else if _, ok := ctx.Deadline(); !ok {
|
||||
dialCtx, cancel = context.WithTimeout(ctx, defaultDialTimeout)
|
||||
}
|
||||
defer cancel()
|
||||
|
||||
transportCredentials, err := resolveTransportCredentials(opts)
|
||||
conn, err := dial(ctx, opts)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
dialOptions := []grpc.DialOption{
|
||||
grpc.WithTransportCredentials(transportCredentials),
|
||||
grpc.WithUnaryInterceptor(unaryAuthInterceptor(opts.APIKey)),
|
||||
grpc.WithStreamInterceptor(streamAuthInterceptor(opts.APIKey)),
|
||||
grpc.WithBlock(),
|
||||
}
|
||||
dialOptions = append(dialOptions, opts.DialOptions...)
|
||||
|
||||
conn, err := grpc.DialContext(dialCtx, opts.Endpoint, dialOptions...)
|
||||
if err != nil {
|
||||
return nil, &GatewayError{Op: "dial", Err: err}
|
||||
}
|
||||
|
||||
return NewGalaxyClient(conn, opts), nil
|
||||
}
|
||||
|
||||
@@ -239,18 +212,5 @@ func (c *GalaxyClient) Close() error {
|
||||
}
|
||||
|
||||
func (c *GalaxyClient) callContext(ctx context.Context) (context.Context, context.CancelFunc) {
|
||||
timeout := c.opts.CallTimeout
|
||||
if timeout == 0 {
|
||||
timeout = defaultCallTimeout
|
||||
}
|
||||
if timeout < 0 {
|
||||
return ctx, func() {}
|
||||
}
|
||||
if deadline, ok := ctx.Deadline(); ok {
|
||||
timeoutDeadline := time.Now().Add(timeout)
|
||||
if deadline.Before(timeoutDeadline) {
|
||||
return ctx, func() {}
|
||||
}
|
||||
}
|
||||
return context.WithTimeout(ctx, timeout)
|
||||
return callContext(ctx, c.opts.CallTimeout)
|
||||
}
|
||||
|
||||
@@ -55,8 +55,8 @@ func TestGalaxyGetLastDeployTimeReturnsTimestampWhenPresent(t *testing.T) {
|
||||
want := time.Date(2026, 4, 28, 12, 34, 56, 0, time.UTC)
|
||||
fake := &fakeGalaxyServer{
|
||||
deployReply: &pb.GetLastDeployTimeReply{
|
||||
Present: true,
|
||||
TimeOfLastDeploy: timestamppb.New(want),
|
||||
Present: true,
|
||||
TimeOfLastDeploy: timestamppb.New(want),
|
||||
},
|
||||
}
|
||||
client, cleanup := newGalaxyBufconnClient(t, fake)
|
||||
@@ -348,8 +348,10 @@ func newGalaxyBufconnClient(t *testing.T, fake *fakeGalaxyServer) (*GalaxyClient
|
||||
dialer := func(ctx context.Context, _ string) (net.Conn, error) {
|
||||
return listener.DialContext(ctx)
|
||||
}
|
||||
// grpc.NewClient defaults to the dns scheme; use passthrough so the
|
||||
// bufconn fake target reaches the context dialer unresolved.
|
||||
client, err := DialGalaxy(context.Background(), Options{
|
||||
Endpoint: "bufnet",
|
||||
Endpoint: "passthrough:///bufnet",
|
||||
APIKey: "test-api-key",
|
||||
Plaintext: true,
|
||||
DialOptions: []grpc.DialOption{
|
||||
|
||||
@@ -8,6 +8,8 @@ import (
|
||||
"fmt"
|
||||
"io"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
pb "gitea.dohertylan.com/dohertj2/mxaccessgw/clients/go/internal/generated"
|
||||
"google.golang.org/grpc/codes"
|
||||
@@ -547,10 +549,25 @@ func (s *Session) invokeCommand(ctx context.Context, command *MxCommand) (*MxCom
|
||||
})
|
||||
}
|
||||
|
||||
// correlationIDCounter backs the deterministic fallback id used when
|
||||
// crypto/rand is unavailable, so every command still carries a unique,
|
||||
// traceable correlation id.
|
||||
var correlationIDCounter atomic.Uint64
|
||||
|
||||
// randRead is the entropy source for newCorrelationID. It is a package
|
||||
// variable solely so tests can simulate a crypto/rand failure.
|
||||
var randRead = rand.Read
|
||||
|
||||
// newCorrelationID returns a unique correlation id for an MxCommandRequest.
|
||||
// It prefers 16 bytes of crypto/rand entropy; if rand.Read fails (rare) it
|
||||
// falls back to a "fallback-" prefixed id built from the current time and a
|
||||
// process-wide monotonic counter rather than returning an empty string, which
|
||||
// would leave the command untraceable in gateway logs.
|
||||
func newCorrelationID() string {
|
||||
var buffer [16]byte
|
||||
if _, err := rand.Read(buffer[:]); err != nil {
|
||||
return ""
|
||||
if _, err := randRead(buffer[:]); err != nil {
|
||||
return fmt.Sprintf("fallback-%x-%x",
|
||||
time.Now().UnixNano(), correlationIDCounter.Add(1))
|
||||
}
|
||||
return hex.EncodeToString(buffer[:])
|
||||
}
|
||||
|
||||
+16
-1
@@ -74,10 +74,25 @@ that RPC so a close-time error never replaces the exception a try-with-resources
|
||||
body is already propagating. Call `closeRaw()` explicitly when you need to
|
||||
observe the close result or handle a close-time failure.
|
||||
|
||||
`MxGatewayClient` and `GalaxyRepositoryClient` implement `AutoCloseable`. For a
|
||||
client that owns its channel (built with `connect`), the try-with-resources
|
||||
`close()` shuts the channel down and waits up to the configured connect timeout
|
||||
for termination, forcibly shutting it down on timeout, so in-flight calls and
|
||||
Netty event-loop threads are not left running after the block exits. If the
|
||||
calling thread is interrupted while waiting, the channel is forcibly shut down
|
||||
and the interrupt flag is restored. `closeAndAwaitTermination()` does the same
|
||||
but throws `InterruptedException` for callers that want a checked,
|
||||
blocking-aware shutdown. `close()` is a no-op for a caller-managed channel.
|
||||
|
||||
`MxEventStream` implements `Iterator<MxEvent>` and `AutoCloseable`. Closing it
|
||||
cancels the underlying gRPC stream. Canceling or timing out a Java client call
|
||||
only stops the client from waiting; it does not abort an in-flight MXAccess COM
|
||||
call on the worker STA.
|
||||
call on the worker STA. The event stream uses gRPC's default auto-inbound flow
|
||||
control with a fixed 16-element buffer and no client-side flow control: this is
|
||||
the gateway's documented fail-fast event-backpressure model, so a consumer that
|
||||
stalls long enough to fill the buffer triggers an overflow that cancels the
|
||||
subscription and surfaces an `MxGatewayException` from the next `next()` call.
|
||||
Drain events promptly and be prepared to resubscribe with a resume cursor.
|
||||
|
||||
## Galaxy Repository Browse
|
||||
|
||||
|
||||
+38
-11
@@ -661,33 +661,60 @@ public final class MxGatewayCli implements Callable<Integer> {
|
||||
@Option(names = "--timeout", defaultValue = "30s", description = "Per-call timeout.")
|
||||
String timeout;
|
||||
|
||||
private String resolvedApiKey = "";
|
||||
private Duration resolvedTimeout = Duration.ofSeconds(30);
|
||||
|
||||
/**
|
||||
* Returns this options object unchanged.
|
||||
*
|
||||
* <p>Retained as a no-op for call sites that read more naturally as
|
||||
* {@code common.resolved()}. Resolution of the API key and timeout is
|
||||
* computed lazily on demand by {@link #resolvedApiKey()} and
|
||||
* {@link #resolvedTimeout()}, so {@link #toClientOptions()} and
|
||||
* {@link #redactedJsonMap()} produce correct output regardless of
|
||||
* whether this method was ever called.
|
||||
*
|
||||
* @return this options object
|
||||
*/
|
||||
CommonOptions resolved() {
|
||||
resolvedApiKey = apiKey == null || apiKey.isBlank() ? System.getenv(apiKeyEnv) : apiKey;
|
||||
if (resolvedApiKey == null) {
|
||||
resolvedApiKey = "";
|
||||
}
|
||||
resolvedTimeout = parseDuration(timeout);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolves the effective API key: the explicit {@code --api-key} value
|
||||
* when non-blank, otherwise the value of the {@code --api-key-env}
|
||||
* environment variable, otherwise an empty string. Computed on each
|
||||
* call so there is no stale cached state.
|
||||
*
|
||||
* @return the resolved API key, never {@code null}
|
||||
*/
|
||||
String resolvedApiKey() {
|
||||
String resolved = apiKey == null || apiKey.isBlank() ? System.getenv(apiKeyEnv) : apiKey;
|
||||
return resolved == null ? "" : resolved;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolves the effective per-call timeout from the {@code --timeout}
|
||||
* option. Computed on each call so there is no stale cached state.
|
||||
*
|
||||
* @return the resolved call timeout
|
||||
*/
|
||||
Duration resolvedTimeout() {
|
||||
return parseDuration(timeout);
|
||||
}
|
||||
|
||||
MxGatewayClientOptions toClientOptions() {
|
||||
return MxGatewayClientOptions.builder()
|
||||
.endpoint(endpoint)
|
||||
.apiKey(resolvedApiKey)
|
||||
.apiKey(resolvedApiKey())
|
||||
.plaintext(plaintext)
|
||||
.caCertificatePath(caFile)
|
||||
.serverNameOverride(serverNameOverride)
|
||||
.callTimeout(resolvedTimeout)
|
||||
.callTimeout(resolvedTimeout())
|
||||
.build();
|
||||
}
|
||||
|
||||
Map<String, Object> redactedJsonMap() {
|
||||
Map<String, Object> values = new LinkedHashMap<>();
|
||||
values.put("endpoint", endpoint);
|
||||
values.put("apiKey", MxGatewaySecrets.redactApiKey(resolvedApiKey));
|
||||
values.put("apiKey", MxGatewaySecrets.redactApiKey(resolvedApiKey()));
|
||||
values.put("apiKeyEnv", apiKeyEnv);
|
||||
values.put("plaintext", plaintext);
|
||||
values.put("caFile", caFile == null ? "" : caFile.toString());
|
||||
|
||||
+51
-96
@@ -1,8 +1,5 @@
|
||||
package com.dohertylan.mxgateway.client;
|
||||
|
||||
import com.google.common.util.concurrent.FutureCallback;
|
||||
import com.google.common.util.concurrent.Futures;
|
||||
import com.google.common.util.concurrent.MoreExecutors;
|
||||
import galaxy_repository.v1.GalaxyRepositoryGrpc;
|
||||
import galaxy_repository.v1.GalaxyRepositoryOuterClass.DeployEvent;
|
||||
import galaxy_repository.v1.GalaxyRepositoryOuterClass.DiscoverHierarchyReply;
|
||||
@@ -17,8 +14,6 @@ import com.google.protobuf.Timestamp;
|
||||
import io.grpc.Channel;
|
||||
import io.grpc.ClientInterceptors;
|
||||
import io.grpc.ManagedChannel;
|
||||
import io.grpc.netty.shaded.io.grpc.netty.GrpcSslContexts;
|
||||
import io.grpc.netty.shaded.io.grpc.netty.NettyChannelBuilder;
|
||||
import io.grpc.stub.StreamObserver;
|
||||
import java.time.Instant;
|
||||
import java.util.Iterator;
|
||||
@@ -27,7 +22,6 @@ import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import javax.net.ssl.SSLException;
|
||||
|
||||
/**
|
||||
* Thin wrapper around the generated {@link GalaxyRepositoryGrpc} stubs that
|
||||
@@ -78,7 +72,8 @@ public final class GalaxyRepositoryClient implements AutoCloseable {
|
||||
* @return a connected client
|
||||
*/
|
||||
public static GalaxyRepositoryClient connect(MxGatewayClientOptions options) {
|
||||
return new GalaxyRepositoryClient(createChannel(options), options);
|
||||
return new GalaxyRepositoryClient(
|
||||
MxGatewayChannels.createChannel(options, "failed to configure galaxy repository TLS"), options);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -87,7 +82,7 @@ public final class GalaxyRepositoryClient implements AutoCloseable {
|
||||
* @return the blocking stub
|
||||
*/
|
||||
public GalaxyRepositoryGrpc.GalaxyRepositoryBlockingStub rawBlockingStub() {
|
||||
return withDeadline(blockingStub);
|
||||
return MxGatewayChannels.withDeadline(blockingStub, options);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -96,7 +91,7 @@ public final class GalaxyRepositoryClient implements AutoCloseable {
|
||||
* @return the future stub
|
||||
*/
|
||||
public GalaxyRepositoryGrpc.GalaxyRepositoryFutureStub rawFutureStub() {
|
||||
return withDeadline(futureStub);
|
||||
return MxGatewayChannels.withDeadline(futureStub, options);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -133,7 +128,9 @@ public final class GalaxyRepositoryClient implements AutoCloseable {
|
||||
* exceptionally with {@link MxGatewayException} on failure
|
||||
*/
|
||||
public CompletableFuture<Boolean> testConnectionAsync() {
|
||||
return toCompletable(rawFutureStub().testConnection(TestConnectionRequest.getDefaultInstance()))
|
||||
return MxGatewayChannels.toCompletable(
|
||||
rawFutureStub().testConnection(TestConnectionRequest.getDefaultInstance()),
|
||||
"galaxy test connection")
|
||||
.thenApply(TestConnectionReply::getOk);
|
||||
}
|
||||
|
||||
@@ -165,8 +162,11 @@ public final class GalaxyRepositoryClient implements AutoCloseable {
|
||||
* completed exceptionally with {@link MxGatewayException} on failure
|
||||
*/
|
||||
public CompletableFuture<Optional<Instant>> getLastDeployTimeAsync() {
|
||||
return toCompletable(rawFutureStub().getLastDeployTime(GetLastDeployTimeRequest.getDefaultInstance()))
|
||||
.thenApply(GalaxyRepositoryClient::mapDeployTime);
|
||||
return MxGatewayChannels.toCompletable(
|
||||
rawFutureStub().getLastDeployTime(GetLastDeployTimeRequest.getDefaultInstance()),
|
||||
"galaxy get last deploy time")
|
||||
.thenApply(MxGatewayChannels.normalisingValidator(
|
||||
"galaxy get last deploy time", GalaxyRepositoryClient::mapDeployTime));
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -224,7 +224,8 @@ public final class GalaxyRepositoryClient implements AutoCloseable {
|
||||
*/
|
||||
public DeployEventStream watchDeployEvents(Instant lastSeenDeployTime) {
|
||||
DeployEventStream stream = new DeployEventStream(16);
|
||||
withStreamDeadline(rawAsyncStub()).watchDeployEvents(buildWatchRequest(lastSeenDeployTime), stream.observer());
|
||||
MxGatewayChannels.withStreamDeadline(rawAsyncStub(), options)
|
||||
.watchDeployEvents(buildWatchRequest(lastSeenDeployTime), stream.observer());
|
||||
return stream;
|
||||
}
|
||||
|
||||
@@ -253,7 +254,7 @@ public final class GalaxyRepositoryClient implements AutoCloseable {
|
||||
Instant lastSeenDeployTime, StreamObserver<DeployEvent> observer) {
|
||||
Objects.requireNonNull(observer, "observer");
|
||||
DeployEventSubscription subscription = new DeployEventSubscription();
|
||||
withStreamDeadline(rawAsyncStub())
|
||||
MxGatewayChannels.withStreamDeadline(rawAsyncStub(), options)
|
||||
.watchDeployEvents(buildWatchRequest(lastSeenDeployTime), subscription.wrap(observer));
|
||||
return subscription;
|
||||
}
|
||||
@@ -269,17 +270,31 @@ public final class GalaxyRepositoryClient implements AutoCloseable {
|
||||
return builder.build();
|
||||
}
|
||||
|
||||
private <T extends io.grpc.stub.AbstractStub<T>> T withStreamDeadline(T stub) {
|
||||
if (options.streamTimeout() == null || options.streamTimeout().isNegative()) {
|
||||
return stub;
|
||||
}
|
||||
return stub.withDeadlineAfter(options.streamTimeout().toNanos(), TimeUnit.NANOSECONDS);
|
||||
}
|
||||
|
||||
/**
|
||||
* Shuts the owned channel down and awaits termination so try-with-resources
|
||||
* callers do not leave in-flight calls or Netty event-loop threads running
|
||||
* after the block exits.
|
||||
*
|
||||
* <p>Waits up to the configured connect timeout for graceful termination
|
||||
* and forcibly shuts the channel down on timeout. If the calling thread is
|
||||
* interrupted while waiting, the channel is forcibly shut down and the
|
||||
* thread's interrupt flag is restored. No-op for clients that do not own
|
||||
* their channel. For an explicitly checked, blocking-aware shutdown call
|
||||
* {@link #closeAndAwaitTermination()}.
|
||||
*/
|
||||
@Override
|
||||
public void close() {
|
||||
if (ownedChannel != null) {
|
||||
ownedChannel.shutdown();
|
||||
if (ownedChannel == null) {
|
||||
return;
|
||||
}
|
||||
ownedChannel.shutdown();
|
||||
try {
|
||||
if (!ownedChannel.awaitTermination(options.connectTimeout().toMillis(), TimeUnit.MILLISECONDS)) {
|
||||
ownedChannel.shutdownNow();
|
||||
}
|
||||
} catch (InterruptedException error) {
|
||||
ownedChannel.shutdownNow();
|
||||
Thread.currentThread().interrupt();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -307,86 +322,26 @@ public final class GalaxyRepositoryClient implements AutoCloseable {
|
||||
return Optional.of(Instant.ofEpochSecond(ts.getSeconds(), ts.getNanos()));
|
||||
}
|
||||
|
||||
private static ManagedChannel createChannel(MxGatewayClientOptions options) {
|
||||
NettyChannelBuilder builder = NettyChannelBuilder.forTarget(options.endpoint())
|
||||
.maxInboundMessageSize(options.maxGrpcMessageBytes());
|
||||
if (!options.connectTimeout().isNegative()) {
|
||||
builder.withOption(
|
||||
io.grpc.netty.shaded.io.netty.channel.ChannelOption.CONNECT_TIMEOUT_MILLIS,
|
||||
Math.toIntExact(options.connectTimeout().toMillis()));
|
||||
}
|
||||
if (options.plaintext()) {
|
||||
builder.usePlaintext();
|
||||
} else if (options.caCertificatePath() != null) {
|
||||
try {
|
||||
builder.sslContext(GrpcSslContexts.forClient()
|
||||
.trustManager(options.caCertificatePath().toFile())
|
||||
.build());
|
||||
} catch (SSLException error) {
|
||||
throw new MxGatewayException("failed to configure galaxy repository TLS", error);
|
||||
}
|
||||
} else {
|
||||
builder.useTransportSecurity();
|
||||
}
|
||||
if (!options.serverNameOverride().isBlank()) {
|
||||
builder.overrideAuthority(options.serverNameOverride());
|
||||
}
|
||||
return builder.build();
|
||||
}
|
||||
|
||||
private <T extends io.grpc.stub.AbstractStub<T>> T withDeadline(T stub) {
|
||||
if (options.callTimeout().isNegative()) {
|
||||
return stub;
|
||||
}
|
||||
return stub.withDeadlineAfter(options.callTimeout().toNanos(), TimeUnit.NANOSECONDS);
|
||||
}
|
||||
|
||||
private CompletableFuture<List<GalaxyObject>> discoverHierarchyPageAsync(
|
||||
String pageToken, java.util.ArrayList<GalaxyObject> objects, java.util.HashSet<String> seenPageTokens) {
|
||||
DiscoverHierarchyRequest request = DiscoverHierarchyRequest.newBuilder()
|
||||
.setPageSize(DISCOVER_HIERARCHY_PAGE_SIZE)
|
||||
.setPageToken(pageToken)
|
||||
.build();
|
||||
return toCompletable(rawFutureStub().discoverHierarchy(request)).thenCompose(reply -> {
|
||||
objects.addAll(reply.getObjectsList());
|
||||
if (reply.getNextPageToken().isBlank()) {
|
||||
return CompletableFuture.completedFuture(objects);
|
||||
}
|
||||
if (!seenPageTokens.add(reply.getNextPageToken())) {
|
||||
CompletableFuture<List<GalaxyObject>> failed = new CompletableFuture<>();
|
||||
failed.completeExceptionally(new MxGatewayException(
|
||||
"galaxy discover hierarchy returned repeated page token: " + reply.getNextPageToken()));
|
||||
return failed;
|
||||
}
|
||||
return discoverHierarchyPageAsync(reply.getNextPageToken(), objects, seenPageTokens);
|
||||
});
|
||||
}
|
||||
|
||||
private static <T> CompletableFuture<T> toCompletable(com.google.common.util.concurrent.ListenableFuture<T> source) {
|
||||
CompletableFuture<T> target = new CompletableFuture<>();
|
||||
Futures.addCallback(
|
||||
source,
|
||||
new FutureCallback<>() {
|
||||
@Override
|
||||
public void onSuccess(T result) {
|
||||
target.complete(result);
|
||||
return MxGatewayChannels.toCompletable(rawFutureStub().discoverHierarchy(request), "galaxy discover hierarchy")
|
||||
.thenCompose(reply -> {
|
||||
objects.addAll(reply.getObjectsList());
|
||||
if (reply.getNextPageToken().isBlank()) {
|
||||
return CompletableFuture.completedFuture(objects);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onFailure(Throwable error) {
|
||||
if (error instanceof RuntimeException runtimeException) {
|
||||
target.completeExceptionally(MxGatewayErrors.fromGrpc("galaxy async call", runtimeException));
|
||||
return;
|
||||
}
|
||||
target.completeExceptionally(error);
|
||||
if (!seenPageTokens.add(reply.getNextPageToken())) {
|
||||
CompletableFuture<List<GalaxyObject>> failed = new CompletableFuture<>();
|
||||
failed.completeExceptionally(new MxGatewayException(
|
||||
"galaxy discover hierarchy returned repeated page token: "
|
||||
+ reply.getNextPageToken()));
|
||||
return failed;
|
||||
}
|
||||
},
|
||||
MoreExecutors.directExecutor());
|
||||
target.whenComplete((ignoredResult, ignoredError) -> {
|
||||
if (target.isCancelled()) {
|
||||
source.cancel(true);
|
||||
}
|
||||
});
|
||||
return target;
|
||||
return discoverHierarchyPageAsync(reply.getNextPageToken(), objects, seenPageTokens);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
+12
@@ -22,6 +22,18 @@ import mxaccess_gateway.v1.MxaccessGateway.StreamEventsRequest;
|
||||
* cancelled and a follow-up call to {@link #next()} throws
|
||||
* {@link MxGatewayException}.
|
||||
*
|
||||
* <p><strong>Backpressure (fail-fast):</strong> this adaptor relies on gRPC's
|
||||
* default auto-inbound flow control — the async stub auto-requests messages, so
|
||||
* the gateway can push events faster than the consumer drains the bounded
|
||||
* 16-element buffer. There is intentionally <em>no</em> real client flow
|
||||
* control: a consumer that stalls long enough to let the buffer fill triggers
|
||||
* an immediate overflow that cancels the subscription and surfaces an
|
||||
* {@link MxGatewayException} on the next {@link #next()} call. This matches the
|
||||
* gateway's documented fail-fast event-backpressure design — a slow consumer
|
||||
* loses its subscription rather than silently dropping events. Consumers that
|
||||
* cannot keep up must drain {@link #next()} promptly (e.g. hand events to their
|
||||
* own larger queue) and be prepared to resubscribe with a resume cursor.
|
||||
*
|
||||
* <p><strong>Threading:</strong> the iterator methods ({@link #hasNext()} and
|
||||
* {@link #next()}) are <em>not</em> thread-safe and must be driven by a single
|
||||
* consumer thread. {@link #close()} may be called from any thread. Terminal
|
||||
|
||||
+164
@@ -0,0 +1,164 @@
|
||||
package com.dohertylan.mxgateway.client;
|
||||
|
||||
import com.google.common.util.concurrent.FutureCallback;
|
||||
import com.google.common.util.concurrent.Futures;
|
||||
import com.google.common.util.concurrent.ListenableFuture;
|
||||
import com.google.common.util.concurrent.MoreExecutors;
|
||||
import io.grpc.ManagedChannel;
|
||||
import io.grpc.netty.shaded.io.grpc.netty.GrpcSslContexts;
|
||||
import io.grpc.netty.shaded.io.grpc.netty.NettyChannelBuilder;
|
||||
import io.grpc.stub.AbstractStub;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.function.Function;
|
||||
import javax.net.ssl.SSLException;
|
||||
|
||||
/**
|
||||
* Shared channel-builder and future-adaptor helpers used by both
|
||||
* {@link MxGatewayClient} and {@link GalaxyRepositoryClient}.
|
||||
*
|
||||
* <p>Extracted so transport construction, per-call deadlines, and the
|
||||
* {@link ListenableFuture}-to-{@link CompletableFuture} bridge live in one
|
||||
* place instead of being duplicated verbatim across the two clients.
|
||||
*/
|
||||
final class MxGatewayChannels {
|
||||
private MxGatewayChannels() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds a Netty managed channel from the supplied options, applying the
|
||||
* connect timeout, message-size limit, and the configured transport
|
||||
* security mode (plaintext, custom CA trust, or system trust).
|
||||
*
|
||||
* @param options the client options carrying endpoint and transport config
|
||||
* @param tlsErrorPrefix a human-readable prefix for the {@link MxGatewayException}
|
||||
* thrown when a custom CA certificate cannot be loaded
|
||||
* @return a new managed channel; the caller owns its lifecycle
|
||||
*/
|
||||
static ManagedChannel createChannel(MxGatewayClientOptions options, String tlsErrorPrefix) {
|
||||
NettyChannelBuilder builder = NettyChannelBuilder.forTarget(options.endpoint())
|
||||
.maxInboundMessageSize(options.maxGrpcMessageBytes());
|
||||
if (!options.connectTimeout().isNegative()) {
|
||||
builder.withOption(
|
||||
io.grpc.netty.shaded.io.netty.channel.ChannelOption.CONNECT_TIMEOUT_MILLIS,
|
||||
Math.toIntExact(options.connectTimeout().toMillis()));
|
||||
}
|
||||
if (options.plaintext()) {
|
||||
builder.usePlaintext();
|
||||
} else if (options.caCertificatePath() != null) {
|
||||
try {
|
||||
builder.sslContext(GrpcSslContexts.forClient()
|
||||
.trustManager(options.caCertificatePath().toFile())
|
||||
.build());
|
||||
} catch (SSLException | RuntimeException error) {
|
||||
// SSLException covers handshake-context failures; RuntimeException
|
||||
// (IllegalArgumentException wrapping CertificateException) covers a
|
||||
// missing or unreadable CA file. Either way callers see one typed
|
||||
// failure instead of a raw, unwrapped exception leaking out.
|
||||
throw new MxGatewayException(tlsErrorPrefix, error);
|
||||
}
|
||||
} else {
|
||||
builder.useTransportSecurity();
|
||||
}
|
||||
if (!options.serverNameOverride().isBlank()) {
|
||||
builder.overrideAuthority(options.serverNameOverride());
|
||||
}
|
||||
return builder.build();
|
||||
}
|
||||
|
||||
/**
|
||||
* Applies the configured per-call deadline to a unary stub.
|
||||
*
|
||||
* @param stub the stub to decorate
|
||||
* @param options the client options carrying the call timeout
|
||||
* @param <T> the concrete stub type
|
||||
* @return the stub with the call deadline applied, or the stub unchanged
|
||||
* when the call timeout is negative (disabled)
|
||||
*/
|
||||
static <T extends AbstractStub<T>> T withDeadline(T stub, MxGatewayClientOptions options) {
|
||||
if (options.callTimeout().isNegative()) {
|
||||
return stub;
|
||||
}
|
||||
return stub.withDeadlineAfter(options.callTimeout().toNanos(), TimeUnit.NANOSECONDS);
|
||||
}
|
||||
|
||||
/**
|
||||
* Applies the configured streaming deadline to a streaming stub.
|
||||
*
|
||||
* @param stub the stub to decorate
|
||||
* @param options the client options carrying the stream timeout
|
||||
* @param <T> the concrete stub type
|
||||
* @return the stub with the stream deadline applied, or the stub unchanged
|
||||
* when the stream timeout is unset or negative (disabled)
|
||||
*/
|
||||
static <T extends AbstractStub<T>> T withStreamDeadline(T stub, MxGatewayClientOptions options) {
|
||||
if (options.streamTimeout() == null || options.streamTimeout().isNegative()) {
|
||||
return stub;
|
||||
}
|
||||
return stub.withDeadlineAfter(options.streamTimeout().toNanos(), TimeUnit.NANOSECONDS);
|
||||
}
|
||||
|
||||
/**
|
||||
* Bridges a Guava {@link ListenableFuture} to a {@link CompletableFuture},
|
||||
* normalising any failure through {@link MxGatewayErrors#fromGrpc} so the
|
||||
* async error surface matches the synchronous methods. Cancelling the
|
||||
* returned future cancels the source RPC.
|
||||
*
|
||||
* @param source the gRPC future-stub result
|
||||
* @param operation the operation name used in normalised error messages
|
||||
* @param <T> the reply type
|
||||
* @return a completable future mirroring the source
|
||||
*/
|
||||
static <T> CompletableFuture<T> toCompletable(ListenableFuture<T> source, String operation) {
|
||||
CompletableFuture<T> target = new CompletableFuture<>();
|
||||
Futures.addCallback(
|
||||
source,
|
||||
new FutureCallback<>() {
|
||||
@Override
|
||||
public void onSuccess(T result) {
|
||||
target.complete(result);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onFailure(Throwable error) {
|
||||
if (error instanceof RuntimeException runtimeException) {
|
||||
target.completeExceptionally(MxGatewayErrors.fromGrpc(operation, runtimeException));
|
||||
return;
|
||||
}
|
||||
target.completeExceptionally(error);
|
||||
}
|
||||
},
|
||||
MoreExecutors.directExecutor());
|
||||
target.whenComplete((ignoredResult, ignoredError) -> {
|
||||
if (target.isCancelled()) {
|
||||
source.cancel(true);
|
||||
}
|
||||
});
|
||||
return target;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adapts a reply-validating function for use inside {@code thenApply} so
|
||||
* any non-{@link MxGatewayException} {@link RuntimeException} it raises is
|
||||
* routed through {@link MxGatewayErrors#fromGrpc}. This keeps the async
|
||||
* error surface consistent with the synchronous methods, which normalise
|
||||
* failures with a {@code try/catch}.
|
||||
*
|
||||
* @param operation the operation name used in normalised error messages
|
||||
* @param validator the validating/transforming function applied to the reply
|
||||
* @param <T> the reply type
|
||||
* @param <R> the result type
|
||||
* @return a function suitable for {@link CompletableFuture#thenApply}
|
||||
*/
|
||||
static <T, R> Function<T, R> normalisingValidator(String operation, Function<T, R> validator) {
|
||||
return reply -> {
|
||||
try {
|
||||
return validator.apply(reply);
|
||||
} catch (MxGatewayException error) {
|
||||
throw error;
|
||||
} catch (RuntimeException error) {
|
||||
throw MxGatewayErrors.fromGrpc(operation, error);
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
+47
-94
@@ -1,19 +1,13 @@
|
||||
package com.dohertylan.mxgateway.client;
|
||||
|
||||
import com.google.common.util.concurrent.FutureCallback;
|
||||
import com.google.common.util.concurrent.Futures;
|
||||
import com.google.common.util.concurrent.MoreExecutors;
|
||||
import com.google.protobuf.Duration;
|
||||
import io.grpc.Channel;
|
||||
import io.grpc.ClientInterceptors;
|
||||
import io.grpc.ManagedChannel;
|
||||
import io.grpc.netty.shaded.io.grpc.netty.GrpcSslContexts;
|
||||
import io.grpc.netty.shaded.io.grpc.netty.NettyChannelBuilder;
|
||||
import io.grpc.stub.StreamObserver;
|
||||
import java.util.Objects;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import javax.net.ssl.SSLException;
|
||||
import mxaccess_gateway.v1.MxAccessGatewayGrpc;
|
||||
import mxaccess_gateway.v1.MxaccessGateway.AcknowledgeAlarmReply;
|
||||
import mxaccess_gateway.v1.MxaccessGateway.AcknowledgeAlarmRequest;
|
||||
@@ -79,7 +73,8 @@ public final class MxGatewayClient implements AutoCloseable {
|
||||
* @return a connected client
|
||||
*/
|
||||
public static MxGatewayClient connect(MxGatewayClientOptions options) {
|
||||
return new MxGatewayClient(createChannel(options), options);
|
||||
return new MxGatewayClient(
|
||||
MxGatewayChannels.createChannel(options, "failed to configure gateway TLS"), options);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -88,7 +83,7 @@ public final class MxGatewayClient implements AutoCloseable {
|
||||
* @return the blocking stub
|
||||
*/
|
||||
public MxAccessGatewayGrpc.MxAccessGatewayBlockingStub rawBlockingStub() {
|
||||
return withDeadline(blockingStub);
|
||||
return MxGatewayChannels.withDeadline(blockingStub, options);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -97,7 +92,7 @@ public final class MxGatewayClient implements AutoCloseable {
|
||||
* @return the future stub
|
||||
*/
|
||||
public MxAccessGatewayGrpc.MxAccessGatewayFutureStub rawFutureStub() {
|
||||
return withDeadline(futureStub);
|
||||
return MxGatewayChannels.withDeadline(futureStub, options);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -186,12 +181,13 @@ public final class MxGatewayClient implements AutoCloseable {
|
||||
* with {@link MxGatewayException} on failure
|
||||
*/
|
||||
public CompletableFuture<OpenSessionReply> openSessionAsync(OpenSessionRequest request) {
|
||||
CompletableFuture<OpenSessionReply> future = toCompletable(rawFutureStub().openSession(request));
|
||||
return future.thenApply(reply -> {
|
||||
CompletableFuture<OpenSessionReply> future =
|
||||
MxGatewayChannels.toCompletable(rawFutureStub().openSession(request), "open session");
|
||||
return future.thenApply(MxGatewayChannels.normalisingValidator("open session", reply -> {
|
||||
MxGatewayErrors.ensureProtocolSuccess("open session", reply.getProtocolStatus(), null);
|
||||
ensureGatewayProtocolCompatible(reply);
|
||||
return reply;
|
||||
});
|
||||
}));
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -226,12 +222,13 @@ public final class MxGatewayClient implements AutoCloseable {
|
||||
* on failure
|
||||
*/
|
||||
public CompletableFuture<MxCommandReply> invokeAsync(MxCommandRequest request) {
|
||||
CompletableFuture<MxCommandReply> future = toCompletable(rawFutureStub().invoke(request));
|
||||
return future.thenApply(reply -> {
|
||||
CompletableFuture<MxCommandReply> future =
|
||||
MxGatewayChannels.toCompletable(rawFutureStub().invoke(request), "invoke");
|
||||
return future.thenApply(MxGatewayChannels.normalisingValidator("invoke", reply -> {
|
||||
MxGatewayErrors.ensureProtocolSuccess("invoke", reply.getProtocolStatus(), reply);
|
||||
MxGatewayErrors.ensureMxAccessSuccess("invoke", reply);
|
||||
return reply;
|
||||
});
|
||||
}));
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -264,7 +261,7 @@ public final class MxGatewayClient implements AutoCloseable {
|
||||
*/
|
||||
public MxEventStream streamEvents(StreamEventsRequest request) {
|
||||
MxEventStream stream = new MxEventStream(16);
|
||||
withStreamDeadline(rawAsyncStub()).streamEvents(request, stream.observer());
|
||||
MxGatewayChannels.withStreamDeadline(rawAsyncStub(), options).streamEvents(request, stream.observer());
|
||||
return stream;
|
||||
}
|
||||
|
||||
@@ -279,15 +276,17 @@ public final class MxGatewayClient implements AutoCloseable {
|
||||
public MxGatewayEventSubscription streamEventsAsync(
|
||||
StreamEventsRequest request, StreamObserver<MxEvent> observer) {
|
||||
MxGatewayEventSubscription subscription = new MxGatewayEventSubscription();
|
||||
withStreamDeadline(rawAsyncStub()).streamEvents(request, subscription.wrap(observer));
|
||||
MxGatewayChannels.withStreamDeadline(rawAsyncStub(), options)
|
||||
.streamEvents(request, subscription.wrap(observer));
|
||||
return subscription;
|
||||
}
|
||||
|
||||
/**
|
||||
* Acknowledges an active MXAccess alarm condition through the gateway.
|
||||
*
|
||||
* <p>The gateway authenticates the request against the API key's
|
||||
* {@code invoke:alarm-ack} scope and forwards the acknowledge to the
|
||||
* <p>The gateway authorizes this request against the API key's
|
||||
* {@code admin} scope (the gateway scope resolver maps alarm RPCs to the
|
||||
* default {@code admin} scope) and forwards the acknowledge to the
|
||||
* worker's MXAccess session; the resulting native MxStatus is returned
|
||||
* in the reply. Acks are idempotent at the MxAccess layer.
|
||||
*
|
||||
@@ -316,11 +315,12 @@ public final class MxGatewayClient implements AutoCloseable {
|
||||
* with {@link MxGatewayException} on failure
|
||||
*/
|
||||
public CompletableFuture<AcknowledgeAlarmReply> acknowledgeAlarmAsync(AcknowledgeAlarmRequest request) {
|
||||
CompletableFuture<AcknowledgeAlarmReply> future = toCompletable(rawFutureStub().acknowledgeAlarm(request));
|
||||
return future.thenApply(reply -> {
|
||||
CompletableFuture<AcknowledgeAlarmReply> future =
|
||||
MxGatewayChannels.toCompletable(rawFutureStub().acknowledgeAlarm(request), "acknowledge alarm");
|
||||
return future.thenApply(MxGatewayChannels.normalisingValidator("acknowledge alarm", reply -> {
|
||||
MxGatewayErrors.ensureProtocolSuccess("acknowledge alarm", reply.getProtocolStatus(), null);
|
||||
return reply;
|
||||
});
|
||||
}));
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -336,14 +336,36 @@ public final class MxGatewayClient implements AutoCloseable {
|
||||
public MxGatewayActiveAlarmsSubscription queryActiveAlarms(
|
||||
QueryActiveAlarmsRequest request, StreamObserver<ActiveAlarmSnapshot> observer) {
|
||||
MxGatewayActiveAlarmsSubscription subscription = new MxGatewayActiveAlarmsSubscription();
|
||||
withStreamDeadline(rawAsyncStub()).queryActiveAlarms(request, subscription.wrap(observer));
|
||||
MxGatewayChannels.withStreamDeadline(rawAsyncStub(), options)
|
||||
.queryActiveAlarms(request, subscription.wrap(observer));
|
||||
return subscription;
|
||||
}
|
||||
|
||||
/**
|
||||
* Shuts the owned channel down and awaits termination so try-with-resources
|
||||
* callers do not leave in-flight calls or Netty event-loop threads running
|
||||
* after the block exits.
|
||||
*
|
||||
* <p>Waits up to the configured connect timeout for graceful termination
|
||||
* and forcibly shuts the channel down on timeout. If the calling thread is
|
||||
* interrupted while waiting, the channel is forcibly shut down and the
|
||||
* thread's interrupt flag is restored. No-op for clients that do not own
|
||||
* their channel. For an explicitly checked, blocking-aware shutdown call
|
||||
* {@link #closeAndAwaitTermination()}.
|
||||
*/
|
||||
@Override
|
||||
public void close() {
|
||||
if (ownedChannel != null) {
|
||||
ownedChannel.shutdown();
|
||||
if (ownedChannel == null) {
|
||||
return;
|
||||
}
|
||||
ownedChannel.shutdown();
|
||||
try {
|
||||
if (!ownedChannel.awaitTermination(options.connectTimeout().toMillis(), TimeUnit.MILLISECONDS)) {
|
||||
ownedChannel.shutdownNow();
|
||||
}
|
||||
} catch (InterruptedException error) {
|
||||
ownedChannel.shutdownNow();
|
||||
Thread.currentThread().interrupt();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -363,75 +385,6 @@ public final class MxGatewayClient implements AutoCloseable {
|
||||
}
|
||||
}
|
||||
|
||||
private static ManagedChannel createChannel(MxGatewayClientOptions options) {
|
||||
NettyChannelBuilder builder = NettyChannelBuilder.forTarget(options.endpoint())
|
||||
.maxInboundMessageSize(options.maxGrpcMessageBytes());
|
||||
if (!options.connectTimeout().isNegative()) {
|
||||
builder.withOption(
|
||||
io.grpc.netty.shaded.io.netty.channel.ChannelOption.CONNECT_TIMEOUT_MILLIS,
|
||||
Math.toIntExact(options.connectTimeout().toMillis()));
|
||||
}
|
||||
if (options.plaintext()) {
|
||||
builder.usePlaintext();
|
||||
} else if (options.caCertificatePath() != null) {
|
||||
try {
|
||||
builder.sslContext(GrpcSslContexts.forClient()
|
||||
.trustManager(options.caCertificatePath().toFile())
|
||||
.build());
|
||||
} catch (SSLException error) {
|
||||
throw new MxGatewayException("failed to configure gateway TLS", error);
|
||||
}
|
||||
} else {
|
||||
builder.useTransportSecurity();
|
||||
}
|
||||
if (!options.serverNameOverride().isBlank()) {
|
||||
builder.overrideAuthority(options.serverNameOverride());
|
||||
}
|
||||
return builder.build();
|
||||
}
|
||||
|
||||
private <T extends io.grpc.stub.AbstractStub<T>> T withDeadline(T stub) {
|
||||
if (options.callTimeout().isNegative()) {
|
||||
return stub;
|
||||
}
|
||||
return stub.withDeadlineAfter(options.callTimeout().toNanos(), TimeUnit.NANOSECONDS);
|
||||
}
|
||||
|
||||
private <T extends io.grpc.stub.AbstractStub<T>> T withStreamDeadline(T stub) {
|
||||
if (options.streamTimeout() == null || options.streamTimeout().isNegative()) {
|
||||
return stub;
|
||||
}
|
||||
return stub.withDeadlineAfter(options.streamTimeout().toNanos(), TimeUnit.NANOSECONDS);
|
||||
}
|
||||
|
||||
private static <T> CompletableFuture<T> toCompletable(com.google.common.util.concurrent.ListenableFuture<T> source) {
|
||||
CompletableFuture<T> target = new CompletableFuture<>();
|
||||
Futures.addCallback(
|
||||
source,
|
||||
new FutureCallback<>() {
|
||||
@Override
|
||||
public void onSuccess(T result) {
|
||||
target.complete(result);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onFailure(Throwable error) {
|
||||
if (error instanceof RuntimeException runtimeException) {
|
||||
target.completeExceptionally(MxGatewayErrors.fromGrpc("async call", runtimeException));
|
||||
return;
|
||||
}
|
||||
target.completeExceptionally(error);
|
||||
}
|
||||
},
|
||||
MoreExecutors.directExecutor());
|
||||
target.whenComplete((ignoredResult, ignoredError) -> {
|
||||
if (target.isCancelled()) {
|
||||
source.cancel(true);
|
||||
}
|
||||
});
|
||||
return target;
|
||||
}
|
||||
|
||||
static ProtocolStatusCode okStatusCode() {
|
||||
return ProtocolStatusCode.PROTOCOL_STATUS_CODE_OK;
|
||||
}
|
||||
|
||||
+503
@@ -0,0 +1,503 @@
|
||||
package com.dohertylan.mxgateway.client;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||
import static org.junit.jupiter.api.Assertions.assertThrows;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
import io.grpc.ManagedChannel;
|
||||
import io.grpc.Server;
|
||||
import io.grpc.Status;
|
||||
import io.grpc.inprocess.InProcessChannelBuilder;
|
||||
import io.grpc.inprocess.InProcessServerBuilder;
|
||||
import io.grpc.stub.ClientCallStreamObserver;
|
||||
import io.grpc.stub.ClientResponseObserver;
|
||||
import io.grpc.stub.StreamObserver;
|
||||
import java.nio.file.Path;
|
||||
import java.time.Duration;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.UUID;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
import java.util.concurrent.CompletionException;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicReference;
|
||||
import mxaccess_gateway.v1.MxAccessGatewayGrpc;
|
||||
import mxaccess_gateway.v1.MxaccessGateway.AcknowledgeAlarmReply;
|
||||
import mxaccess_gateway.v1.MxaccessGateway.AcknowledgeAlarmRequest;
|
||||
import mxaccess_gateway.v1.MxaccessGateway.ActiveAlarmSnapshot;
|
||||
import mxaccess_gateway.v1.MxaccessGateway.AlarmConditionState;
|
||||
import mxaccess_gateway.v1.MxaccessGateway.MxEvent;
|
||||
import mxaccess_gateway.v1.MxaccessGateway.ProtocolStatus;
|
||||
import mxaccess_gateway.v1.MxaccessGateway.ProtocolStatusCode;
|
||||
import mxaccess_gateway.v1.MxaccessGateway.QueryActiveAlarmsRequest;
|
||||
import mxaccess_gateway.v1.MxaccessGateway.StreamEventsRequest;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
/**
|
||||
* Regression tests for the Low-severity Client.Java code-review findings
|
||||
* (Client.Java-006 through Client.Java-012). Covers the alarm RPC surface,
|
||||
* async streaming/subscription cancellation, queue overflow, and TLS-config
|
||||
* construction that Client.Java-007 reports as untested.
|
||||
*/
|
||||
final class MxGatewayLowFindingsTests {
|
||||
|
||||
// --- Client.Java-007: AcknowledgeAlarm RPC coverage ---
|
||||
|
||||
@Test
|
||||
void acknowledgeAlarmReturnsReplyAndSendsAuthMetadata() throws Exception {
|
||||
AtomicReference<String> authorization = new AtomicReference<>();
|
||||
AtomicReference<AcknowledgeAlarmRequest> seen = new AtomicReference<>();
|
||||
TestService service = new TestService() {
|
||||
@Override
|
||||
public void acknowledgeAlarm(
|
||||
AcknowledgeAlarmRequest request, StreamObserver<AcknowledgeAlarmReply> responseObserver) {
|
||||
seen.set(request);
|
||||
responseObserver.onNext(AcknowledgeAlarmReply.newBuilder()
|
||||
.setSessionId(request.getSessionId())
|
||||
.setProtocolStatus(ok())
|
||||
.setDiagnosticMessage("acked")
|
||||
.build());
|
||||
responseObserver.onCompleted();
|
||||
}
|
||||
};
|
||||
|
||||
try (Harness harness = Harness.start(service, "mxgw_keyid_secret", authorization)) {
|
||||
AcknowledgeAlarmReply reply = harness.client().acknowledgeAlarm(AcknowledgeAlarmRequest.newBuilder()
|
||||
.setSessionId("s-1")
|
||||
.setAlarmFullReference("Area1.Pump.PV.HiHi")
|
||||
.setComment("operator note")
|
||||
.build());
|
||||
assertEquals("acked", reply.getDiagnosticMessage());
|
||||
assertEquals("Area1.Pump.PV.HiHi", seen.get().getAlarmFullReference());
|
||||
assertEquals("Bearer mxgw_keyid_secret", authorization.get());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void acknowledgeAlarmThrowsTypedExceptionOnProtocolFailure() throws Exception {
|
||||
TestService service = new TestService() {
|
||||
@Override
|
||||
public void acknowledgeAlarm(
|
||||
AcknowledgeAlarmRequest request, StreamObserver<AcknowledgeAlarmReply> responseObserver) {
|
||||
responseObserver.onNext(AcknowledgeAlarmReply.newBuilder()
|
||||
.setSessionId(request.getSessionId())
|
||||
.setProtocolStatus(ProtocolStatus.newBuilder()
|
||||
.setCode(ProtocolStatusCode.PROTOCOL_STATUS_CODE_SESSION_NOT_FOUND))
|
||||
.build());
|
||||
responseObserver.onCompleted();
|
||||
}
|
||||
};
|
||||
|
||||
try (Harness harness = Harness.start(service)) {
|
||||
assertThrows(
|
||||
MxGatewayException.class,
|
||||
() -> harness.client().acknowledgeAlarm(AcknowledgeAlarmRequest.newBuilder()
|
||||
.setSessionId("missing")
|
||||
.build()));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void acknowledgeAlarmAsyncCompletesWithReply() throws Exception {
|
||||
TestService service = new TestService() {
|
||||
@Override
|
||||
public void acknowledgeAlarm(
|
||||
AcknowledgeAlarmRequest request, StreamObserver<AcknowledgeAlarmReply> responseObserver) {
|
||||
responseObserver.onNext(AcknowledgeAlarmReply.newBuilder()
|
||||
.setSessionId(request.getSessionId())
|
||||
.setProtocolStatus(ok())
|
||||
.setDiagnosticMessage("async-acked")
|
||||
.build());
|
||||
responseObserver.onCompleted();
|
||||
}
|
||||
};
|
||||
|
||||
try (Harness harness = Harness.start(service)) {
|
||||
CompletableFuture<AcknowledgeAlarmReply> future = harness.client()
|
||||
.acknowledgeAlarmAsync(AcknowledgeAlarmRequest.newBuilder().setSessionId("s-2").build());
|
||||
assertEquals("async-acked", future.get(5, TimeUnit.SECONDS).getDiagnosticMessage());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void acknowledgeAlarmAsyncFailsExceptionallyWithTypedException() throws Exception {
|
||||
TestService service = new TestService() {
|
||||
@Override
|
||||
public void acknowledgeAlarm(
|
||||
AcknowledgeAlarmRequest request, StreamObserver<AcknowledgeAlarmReply> responseObserver) {
|
||||
responseObserver.onError(Status.UNAVAILABLE.withDescription("worker down").asRuntimeException());
|
||||
}
|
||||
};
|
||||
|
||||
try (Harness harness = Harness.start(service)) {
|
||||
CompletableFuture<AcknowledgeAlarmReply> future = harness.client()
|
||||
.acknowledgeAlarmAsync(AcknowledgeAlarmRequest.newBuilder().setSessionId("s-3").build());
|
||||
ExecutionException error = assertThrows(
|
||||
ExecutionException.class, () -> future.get(5, TimeUnit.SECONDS));
|
||||
assertTrue(error.getCause() instanceof MxGatewayException, () -> String.valueOf(error.getCause()));
|
||||
}
|
||||
}
|
||||
|
||||
// --- Client.Java-007: QueryActiveAlarms RPC + subscription coverage ---
|
||||
|
||||
@Test
|
||||
void queryActiveAlarmsDeliversSnapshotsToObserver() throws Exception {
|
||||
ActiveAlarmSnapshot snapshot = ActiveAlarmSnapshot.newBuilder()
|
||||
.setAlarmFullReference("Area1.Tank.Level.Hi")
|
||||
.setSeverity(800)
|
||||
.setCurrentState(AlarmConditionState.ALARM_CONDITION_STATE_ACTIVE)
|
||||
.build();
|
||||
TestService service = new TestService() {
|
||||
@Override
|
||||
public void queryActiveAlarms(
|
||||
QueryActiveAlarmsRequest request, StreamObserver<ActiveAlarmSnapshot> responseObserver) {
|
||||
responseObserver.onNext(snapshot);
|
||||
responseObserver.onCompleted();
|
||||
}
|
||||
};
|
||||
|
||||
try (Harness harness = Harness.start(service)) {
|
||||
List<ActiveAlarmSnapshot> received = new ArrayList<>();
|
||||
CountDownLatch done = new CountDownLatch(1);
|
||||
harness.client().queryActiveAlarms(
|
||||
QueryActiveAlarmsRequest.newBuilder().setSessionId("s-4").build(),
|
||||
new StreamObserver<>() {
|
||||
@Override
|
||||
public void onNext(ActiveAlarmSnapshot value) {
|
||||
received.add(value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onError(Throwable t) {
|
||||
done.countDown();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onCompleted() {
|
||||
done.countDown();
|
||||
}
|
||||
});
|
||||
assertTrue(done.await(5, TimeUnit.SECONDS), "stream should complete");
|
||||
assertEquals(1, received.size());
|
||||
assertEquals("Area1.Tank.Level.Hi", received.get(0).getAlarmFullReference());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void activeAlarmsSubscriptionCancelBeforeBeforeStartCancelsStream() {
|
||||
MxGatewayActiveAlarmsSubscription subscription = new MxGatewayActiveAlarmsSubscription();
|
||||
ClientResponseObserver<QueryActiveAlarmsRequest, ActiveAlarmSnapshot> observer =
|
||||
subscription.wrap(new StreamObserver<>() {
|
||||
@Override
|
||||
public void onNext(ActiveAlarmSnapshot value) {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onError(Throwable t) {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onCompleted() {
|
||||
}
|
||||
});
|
||||
RecordingActiveAlarmsRequestStream requestStream = new RecordingActiveAlarmsRequestStream();
|
||||
|
||||
subscription.cancel();
|
||||
observer.beforeStart(requestStream);
|
||||
|
||||
assertTrue(requestStream.cancelled);
|
||||
assertEquals("client cancelled active-alarms query", requestStream.cancelMessage);
|
||||
}
|
||||
|
||||
// --- Client.Java-007: async streamEvents + subscription cancellation ---
|
||||
|
||||
@Test
|
||||
void streamEventsAsyncDeliversEventsToObserver() throws Exception {
|
||||
MxEvent event = MxEvent.newBuilder().setWorkerSequence(7).build();
|
||||
TestService service = new TestService() {
|
||||
@Override
|
||||
public void streamEvents(StreamEventsRequest request, StreamObserver<MxEvent> responseObserver) {
|
||||
responseObserver.onNext(event);
|
||||
responseObserver.onCompleted();
|
||||
}
|
||||
};
|
||||
|
||||
try (Harness harness = Harness.start(service)) {
|
||||
List<MxEvent> received = new ArrayList<>();
|
||||
CountDownLatch done = new CountDownLatch(1);
|
||||
harness.client().streamEventsAsync(
|
||||
StreamEventsRequest.newBuilder().setSessionId("s-5").build(),
|
||||
new StreamObserver<>() {
|
||||
@Override
|
||||
public void onNext(MxEvent value) {
|
||||
received.add(value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onError(Throwable t) {
|
||||
done.countDown();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onCompleted() {
|
||||
done.countDown();
|
||||
}
|
||||
});
|
||||
assertTrue(done.await(5, TimeUnit.SECONDS), "stream should complete");
|
||||
assertEquals(1, received.size());
|
||||
assertEquals(7, received.get(0).getWorkerSequence());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void eventSubscriptionCancelBeforeBeforeStartCancelsStream() {
|
||||
MxGatewayEventSubscription subscription = new MxGatewayEventSubscription();
|
||||
ClientResponseObserver<StreamEventsRequest, MxEvent> observer =
|
||||
subscription.wrap(new StreamObserver<>() {
|
||||
@Override
|
||||
public void onNext(MxEvent value) {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onError(Throwable t) {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onCompleted() {
|
||||
}
|
||||
});
|
||||
RecordingEventsRequestStream requestStream = new RecordingEventsRequestStream();
|
||||
|
||||
subscription.cancel();
|
||||
observer.beforeStart(requestStream);
|
||||
|
||||
assertTrue(requestStream.cancelled);
|
||||
assertEquals("client cancelled event stream", requestStream.cancelMessage);
|
||||
}
|
||||
|
||||
// --- Client.Java-007 / Client.Java-011: MxEventStream queue overflow ---
|
||||
|
||||
@Test
|
||||
void eventStreamQueueOverflowSurfacesExceptionFromNext() {
|
||||
MxEventStream stream = new MxEventStream(2);
|
||||
ClientResponseObserver<StreamEventsRequest, MxEvent> observer = stream.observer();
|
||||
RecordingEventsRequestStream requestStream = new RecordingEventsRequestStream();
|
||||
observer.beforeStart(requestStream);
|
||||
|
||||
// Push far more events than the capacity-2 buffer can hold without draining.
|
||||
for (int i = 0; i < 16; i++) {
|
||||
observer.onNext(MxEvent.newBuilder().setWorkerSequence(i).build());
|
||||
}
|
||||
|
||||
// Overflow must cancel the gRPC call and surface as MxGatewayException.
|
||||
assertTrue(requestStream.cancelled, "overflow should cancel the underlying call");
|
||||
MxGatewayException error = assertThrows(MxGatewayException.class, () -> {
|
||||
while (stream.hasNext()) {
|
||||
stream.next();
|
||||
}
|
||||
});
|
||||
assertTrue(error.getMessage().contains("overflow"), error::getMessage);
|
||||
}
|
||||
|
||||
// --- Client.Java-007: TLS channel construction ---
|
||||
|
||||
@Test
|
||||
void connectWithMissingCaCertificateThrowsTypedTlsException() {
|
||||
MxGatewayClientOptions options = MxGatewayClientOptions.builder()
|
||||
.endpoint("localhost:5001")
|
||||
.apiKey("mxgw_id_secret")
|
||||
.plaintext(false)
|
||||
.caCertificatePath(Path.of("does-not-exist-" + UUID.randomUUID() + ".pem"))
|
||||
.build();
|
||||
|
||||
MxGatewayException error = assertThrows(MxGatewayException.class, () -> MxGatewayClient.connect(options));
|
||||
assertTrue(error.getMessage().contains("TLS"), error::getMessage);
|
||||
|
||||
MxGatewayException galaxyError =
|
||||
assertThrows(MxGatewayException.class, () -> GalaxyRepositoryClient.connect(options));
|
||||
assertTrue(galaxyError.getMessage().contains("TLS"), galaxyError::getMessage);
|
||||
}
|
||||
|
||||
@Test
|
||||
void connectWithSystemTrustBuildsTlsChannelWithoutError() {
|
||||
// No CA path and plaintext=false exercises the useTransportSecurity() branch.
|
||||
MxGatewayClientOptions options = MxGatewayClientOptions.builder()
|
||||
.endpoint("localhost:5001")
|
||||
.apiKey("mxgw_id_secret")
|
||||
.plaintext(false)
|
||||
.build();
|
||||
|
||||
try (MxGatewayClient client = MxGatewayClient.connect(options)) {
|
||||
assertNotNull(client);
|
||||
}
|
||||
try (GalaxyRepositoryClient galaxy = GalaxyRepositoryClient.connect(options)) {
|
||||
assertNotNull(galaxy);
|
||||
}
|
||||
}
|
||||
|
||||
// --- Client.Java-008: async error surface is normalised ---
|
||||
|
||||
@Test
|
||||
void openSessionAsyncNormalisesNonGatewayRuntimeExceptionFromValidator() {
|
||||
// ensureGatewayProtocolCompatible already throws MxGatewayException; this verifies
|
||||
// the normalisingValidator wrapper routes a stray RuntimeException through fromGrpc.
|
||||
CompletableFuture<String> source = new CompletableFuture<>();
|
||||
CompletableFuture<String> wrapped =
|
||||
source.thenApply(MxGatewayChannels.normalisingValidator("open session", reply -> {
|
||||
throw new IllegalStateException("malformed reply");
|
||||
}));
|
||||
source.complete("payload");
|
||||
|
||||
CompletionException error = assertThrows(CompletionException.class, wrapped::join);
|
||||
assertTrue(error.getCause() instanceof MxGatewayException, () -> String.valueOf(error.getCause()));
|
||||
}
|
||||
|
||||
private static ProtocolStatus ok() {
|
||||
return ProtocolStatus.newBuilder()
|
||||
.setCode(ProtocolStatusCode.PROTOCOL_STATUS_CODE_OK)
|
||||
.build();
|
||||
}
|
||||
|
||||
private static class TestService extends MxAccessGatewayGrpc.MxAccessGatewayImplBase {
|
||||
}
|
||||
|
||||
private record Harness(Server server, ManagedChannel channel, MxGatewayClient client) implements AutoCloseable {
|
||||
static Harness start(MxAccessGatewayGrpc.MxAccessGatewayImplBase service) throws Exception {
|
||||
return start(service, "", new AtomicReference<>());
|
||||
}
|
||||
|
||||
static Harness start(
|
||||
MxAccessGatewayGrpc.MxAccessGatewayImplBase service,
|
||||
String apiKey,
|
||||
AtomicReference<String> authorization)
|
||||
throws Exception {
|
||||
String name = "mxgw-low-" + UUID.randomUUID();
|
||||
io.grpc.ServerInterceptor interceptor = new io.grpc.ServerInterceptor() {
|
||||
@Override
|
||||
public <ReqT, RespT> io.grpc.ServerCall.Listener<ReqT> interceptCall(
|
||||
io.grpc.ServerCall<ReqT, RespT> call,
|
||||
io.grpc.Metadata headers,
|
||||
io.grpc.ServerCallHandler<ReqT, RespT> next) {
|
||||
authorization.set(headers.get(MxGatewayAuthInterceptor.AUTHORIZATION_HEADER));
|
||||
return next.startCall(call, headers);
|
||||
}
|
||||
};
|
||||
Server server = InProcessServerBuilder.forName(name)
|
||||
.directExecutor()
|
||||
.addService(io.grpc.ServerInterceptors.intercept(service, interceptor))
|
||||
.build()
|
||||
.start();
|
||||
ManagedChannel channel = InProcessChannelBuilder.forName(name).directExecutor().build();
|
||||
MxGatewayClient client = new MxGatewayClient(
|
||||
channel,
|
||||
MxGatewayClientOptions.builder()
|
||||
.endpoint("in-process")
|
||||
.apiKey(apiKey)
|
||||
.plaintext(true)
|
||||
.callTimeout(Duration.ofSeconds(5))
|
||||
.streamTimeout(Duration.ofSeconds(5))
|
||||
.build());
|
||||
return new Harness(server, channel, client);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
channel.shutdownNow();
|
||||
server.shutdownNow();
|
||||
}
|
||||
}
|
||||
|
||||
private static final class RecordingEventsRequestStream
|
||||
extends ClientCallStreamObserver<StreamEventsRequest> {
|
||||
private boolean cancelled;
|
||||
private String cancelMessage;
|
||||
|
||||
@Override
|
||||
public void cancel(String message, Throwable cause) {
|
||||
cancelled = true;
|
||||
cancelMessage = message;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isReady() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setOnReadyHandler(Runnable onReadyHandler) {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void request(int count) {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setMessageCompression(boolean enable) {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void disableAutoInboundFlowControl() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onNext(StreamEventsRequest value) {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onError(Throwable t) {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onCompleted() {
|
||||
}
|
||||
}
|
||||
|
||||
private static final class RecordingActiveAlarmsRequestStream
|
||||
extends ClientCallStreamObserver<QueryActiveAlarmsRequest> {
|
||||
private boolean cancelled;
|
||||
private String cancelMessage;
|
||||
|
||||
@Override
|
||||
public void cancel(String message, Throwable cause) {
|
||||
cancelled = true;
|
||||
cancelMessage = message;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isReady() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setOnReadyHandler(Runnable onReadyHandler) {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void request(int count) {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setMessageCompression(boolean enable) {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void disableAutoInboundFlowControl() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onNext(QueryActiveAlarmsRequest value) {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onError(Throwable t) {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onCompleted() {
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -7,7 +7,7 @@
|
||||
| Review date | 2026-05-18 |
|
||||
| Commit reviewed | `3cc53a8` |
|
||||
| Status | Reviewed |
|
||||
| Open findings | 5 |
|
||||
| Open findings | 0 |
|
||||
|
||||
## Checklist coverage
|
||||
|
||||
@@ -78,13 +78,13 @@
|
||||
| Severity | Low |
|
||||
| Category | Error handling & resilience |
|
||||
| Location | `clients/dotnet/MxGateway.Client/MxGatewayClient.cs:283-294`, `clients/dotnet/MxGateway.Client/GalaxyRepositoryClient.cs:392-403` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `ExecuteSafeUnaryAsync` wraps the whole Polly retry pipeline in a single linked CTS cancelled after `Options.DefaultCallTimeout`, while `CreateCallOptions` also stamps each individual call with a `DefaultCallTimeout` gRPC deadline. The retry pipeline therefore shares one `DefaultCallTimeout` budget across the initial attempt plus all retries plus backoff delays. The README/XML docs describe `DefaultCallTimeout` as a per-call timeout, which misrepresents this. `DeadlineExceeded` is also classified as transient, so an attempt that exhausts the shared budget is retried only to immediately fail again.
|
||||
|
||||
**Recommendation:** Decide whether `DefaultCallTimeout` is per-attempt or per-operation and make code and docs consistent — e.g. a separate per-attempt deadline and a distinct overall-operation timeout. Reconsider retrying on `DeadlineExceeded` when the deadline was client-imposed.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** (2026-05-18) Confirmed against source: the shared linked-CTS budget plus per-call deadline both use `DefaultCallTimeout`, and `IsTransientStatus` listed `DeadlineExceeded`. Resolved as a per-operation budget (the simpler, non-breaking choice): the `DefaultCallTimeout` XML doc in `MxGatewayClientOptions.cs` now states it is both the per-attempt gRPC deadline and the overall budget shared across the initial attempt, every retry, and the backoff delays — an upper bound on total wall-clock time, not a fresh per-retry allowance. Removed `DeadlineExceeded` from `MxGatewayClientRetryPolicy.IsTransientStatus`: every unary deadline is client-imposed (`CreateCallOptions` stamps the shared budget), so a `DeadlineExceeded` means the budget is exhausted and an immediate retry can only fail again. Regression test `MxGatewayClientSessionTests.InvokeAsync_DoesNotRetrySafeDiagnosticCommand_OnDeadlineExceeded` asserts the safe diagnostic command (`Ping`) is attempted exactly once and the failure surfaces; verified red against the original transient set (the call retried and succeeded).
|
||||
|
||||
### Client.Dotnet-005
|
||||
|
||||
@@ -93,13 +93,13 @@
|
||||
| Severity | Low |
|
||||
| Category | Correctness & logic bugs |
|
||||
| Location | `clients/dotnet/MxGateway.Client/MxGatewaySession.cs:82,124,175` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `RegisterAsync`/`AddItemAsync`/`AddItem2Async` return `reply.<Typed>?.ServerHandle ?? reply.ReturnValue.Int32Value`. After `EnsureMxAccessSuccess()` passes, a missing typed payload silently falls back to `ReturnValue.Int32Value`, which for a reply carrying no return value is `0`. A caller then uses `0` as a `ServerHandle`/`ItemHandle`, producing a confusing downstream invalid-handle failure rather than a clear "gateway reply missing payload" error.
|
||||
|
||||
**Recommendation:** If the typed sub-message is the contract for these commands, treat its absence on an otherwise-successful reply as an error (throw a descriptive `MxGatewayException`) rather than falling through to `ReturnValue.Int32Value`.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** (2026-05-18) Confirmed against source and `mxaccess_gateway.proto`: `register`/`add_item`/`add_item2` are members of the `MxCommandReply.payload` oneof, so the typed accessor is `null` whenever the worker did not set that case — and the fallback returned `ReturnValue.Int32Value` (0 for a reply with no return value). The typed sub-message is the contract for these handle-returning commands, so its absence on an otherwise-successful reply is now an error: `RegisterAsync`/`AddItemAsync`/`AddItem2Async` throw via a new private `MxGatewaySession.CreateMissingPayloadException` helper that builds a descriptive `MxGatewayException` naming the missing payload, kind, session, and correlation id. Regression tests `MxGatewayClientSessionTests.RegisterAsync_Throws_WhenSuccessfulReplyMissingPayload` and `AddItemAsync_Throws_WhenSuccessfulReplyMissingPayload` enqueue an `Ok` reply with no typed payload and assert the descriptive throw; verified red against the original fallback (returned `0` instead of throwing).
|
||||
|
||||
### Client.Dotnet-006
|
||||
|
||||
@@ -108,13 +108,13 @@
|
||||
| Severity | Low |
|
||||
| Category | Code organization & conventions |
|
||||
| Location | `clients/dotnet/MxGateway.Client/MxGatewayClientOptions.cs:50`, `clients/dotnet/MxGateway.Client/MxGatewayClientContractInfo.cs:10-14` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `MxGatewayClientOptions.MaxGrpcMessageBytes` and the two `const`s in `MxGatewayClientContractInfo` are public members with no XML doc comments, inconsistent with every other public member in the assembly and with the repo's documented C# style emphasis on a documented public surface.
|
||||
|
||||
**Recommendation:** Add `<summary>` doc comments to `MaxGrpcMessageBytes`, `GatewayProtocolVersion`, and `WorkerProtocolVersion`.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** (2026-05-18) Confirmed: all three public members lacked XML docs while every other public member in the assembly is documented. Added `<summary>` comments to `MxGatewayClientOptions.MaxGrpcMessageBytes` (describing the 16 MiB default applied to both send and receive limits), and to `MxGatewayClientContractInfo.GatewayProtocolVersion` and `WorkerProtocolVersion` (describing their wire-compatibility / diagnostics purpose). Pure documentation change — no test needed; build remains warning-clean.
|
||||
|
||||
### Client.Dotnet-007
|
||||
|
||||
@@ -123,13 +123,13 @@
|
||||
| Severity | Low |
|
||||
| Category | Documentation & comments |
|
||||
| Location | `clients/dotnet/MxGateway.Client/MxGatewayClient.cs:185-192` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** The `AcknowledgeAlarmAsync` XML comment states the gateway authenticates against an `invoke:alarm-ack` scope, but `CLAUDE.md` documents the scope set without any `invoke:alarm-ack` sub-scope. The comment may describe an intended finer-grained scope that does not exist, misleading integrators about what API key they need.
|
||||
|
||||
**Recommendation:** Reconcile the comment with the actual server-side scope check, or update the scope documentation if sub-scopes were genuinely added; keep client doc and gateway auth model in sync.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** (2026-05-18) Confirmed against the server-side authorization model: `GatewayGrpcScopeResolver.ResolveRequiredScope` has no arm for `AcknowledgeAlarmRequest`, so it falls to the `_ => GatewayScopes.Admin` default — the RPC actually requires the `admin` scope. No `invoke:alarm-ack` sub-scope exists anywhere in `GatewayScopes`. The client XML comment on `AcknowledgeAlarmAsync` was wrong, not the docs. Corrected the comment to state the gateway authorizes `AcknowledgeAlarmRequest` against the API key's `admin` scope and that there is no finer-grained alarm-ack sub-scope. Pure documentation change — no test needed.
|
||||
|
||||
### Client.Dotnet-008
|
||||
|
||||
@@ -138,10 +138,10 @@
|
||||
| Severity | Low |
|
||||
| Category | Correctness & logic bugs |
|
||||
| Location | `clients/dotnet/MxGateway.Client.Cli/MxGatewayCliSecretRedactor.cs:9-17` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** The CLI redactor only removes the API key string when it was supplied via `--api-key`; `RunCoreAsync` passes `arguments.GetOptional("api-key")` to `Redact`. When the key comes from an environment variable (`--api-key-env`, the documented default path), `apiKey` is `null` and no redaction occurs. If a gRPC/transport error message ever echoes the bearer token, it would be printed unredacted.
|
||||
|
||||
**Recommendation:** Resolve the effective API key (same logic as `ResolveApiKey`) before redacting, so the env-var-sourced key is also stripped from error output.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** (2026-05-18) Confirmed against source: `MxGatewayClientCli.RunCoreAsync`'s catch block redacted only `arguments.GetOptional("api-key")`, so an env-var-sourced key (`--api-key-env`, default `MXGATEWAY_API_KEY`) was never stripped. Note `MxGatewayCliSecretRedactor` itself is correct — the defect was the caller passing the wrong value. Extracted a non-throwing `TryResolveApiKey` helper (used by both the existing `ResolveApiKey` and the catch block) that resolves `--api-key` then the `--api-key-env` environment variable; the catch block now redacts that effective key. Updated `clients/dotnet/README.md` (`smoke` paragraph) to state the CLI redacts the effective key whether from `--api-key` or `--api-key-env`. Regression test `MxGatewayClientCliTests.RunAsync_ErrorOutput_RedactsApiKey_WhenSourcedFromEnvironmentVariable` sets a test env var, forces a transport error echoing the key, and asserts the key is absent and `[redacted]` is present; verified red against the original `GetOptional("api-key")`-only redaction (key printed unredacted).
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
| Review date | 2026-05-18 |
|
||||
| Commit reviewed | `3cc53a8` |
|
||||
| Status | Reviewed |
|
||||
| Open findings | 7 |
|
||||
| Open findings | 0 |
|
||||
|
||||
## Checklist coverage
|
||||
|
||||
@@ -78,13 +78,13 @@
|
||||
| Severity | Low |
|
||||
| Category | mxaccessgw conventions |
|
||||
| Location | `clients/go/mxgateway/alarms_test.go:153-154`, `clients/go/mxgateway/galaxy_test.go:58-59` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `gofmt -l` flags `alarms_test.go` and `galaxy_test.go` for misaligned struct-literal field padding. The Go client README lists `gofmt` as part of the workflow and the repo enforces style; unformatted committed code breaks `gofmt`-gated checks and CI.
|
||||
|
||||
**Recommendation:** Run `gofmt -w mxgateway/alarms_test.go mxgateway/galaxy_test.go`.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** Resolved 2026-05-18: confirmed `gofmt -l .` flagged both files for misaligned struct-literal padding. Ran `gofmt -w` on `mxgateway/alarms_test.go` and `mxgateway/galaxy_test.go`; `gofmt -l .` is now clean for the whole module.
|
||||
|
||||
### Client.Go-005
|
||||
|
||||
@@ -93,13 +93,13 @@
|
||||
| Severity | Low |
|
||||
| Category | Design-document adherence |
|
||||
| Location | `clients/go/mxgateway/client.go:64,68`, `clients/go/mxgateway/galaxy.go:83,87` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** The client uses `grpc.DialContext` with `grpc.WithBlock()`. In current grpc-go both are deprecated in favour of `grpc.NewClient` (lazy connection). `WithBlock` also changes failure semantics: a transient gateway-unavailable at dial time becomes a hard `Dial` error rather than a connection that recovers when the gateway comes up, working against the design doc's resilience intent.
|
||||
|
||||
**Recommendation:** Migrate to `grpc.NewClient`; if a fail-fast connect probe is still wanted, do an explicit readiness wait bounded by `DialTimeout`, and update the doc comment.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** Resolved 2026-05-18: confirmed `Dial`/`DialGalaxy` used the deprecated `grpc.DialContext` + `grpc.WithBlock` pair. Migrated both to the shared `dial(ctx, opts)` helper, which now builds a lazy connection with `grpc.NewClient` and runs an explicit `waitForReady` readiness probe (`Connect` + `WaitForStateChange` until `connectivity.Ready`) bounded by `DialTimeout` — preserving fail-fast behavior while letting an otherwise lazy connection recover when the gateway is briefly down. Note: `grpc.NewClient` defaults the target scheme to `dns`, so the bufconn test harnesses (`client_session_test.go`, `alarms_test.go`, `galaxy_test.go`) were updated to use `passthrough:///bufnet` so the fake target reaches the context dialer. New tests `TestDialFailsFastWhenGatewayUnreachable` and `TestDialReadinessProbeReachesReady` cover the probe; `go vet` reports no deprecation. `clients/go/README.md` documents the lazy-connect + readiness-probe semantics.
|
||||
|
||||
### Client.Go-006
|
||||
|
||||
@@ -108,13 +108,13 @@
|
||||
| Severity | Low |
|
||||
| Category | Error handling & resilience |
|
||||
| Location | `clients/go/mxgateway/errors.go:9-130` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `docs/ClientLibrariesDesign.md` recommends a high-level error taxonomy (`TransportError`, `AuthenticationError`, `TimeoutError`, etc.). The Go client collapses all transport/gRPC failures into a single `GatewayError` with no way to classify transient (`Unavailable`, `DeadlineExceeded`) vs permanent (`Unauthenticated`, `InvalidArgument`) without manually unwrapping and calling `status.Code`.
|
||||
|
||||
**Recommendation:** Add a helper (e.g. `IsTransient(err) bool`) or expose the gRPC `codes.Code` on `GatewayError`, so retry/timeout/auth handling can be written without re-parsing the wrapped error.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** Resolved 2026-05-18: implemented the recommended classification surface in `errors.go` rather than a full parallel type hierarchy (the existing `GatewayError`/`CommandError`/`MxAccessError` chain already separates transport from protocol from MXAccess failures). Added `GatewayError.Code()` (returns the wrapped gRPC `codes.Code`, `OK` for nil, `Unknown` for a non-status error) and the free function `IsTransient(err error) bool`, which unwraps through `*GatewayError` and any gRPC-status chain and reports `true` for `Unavailable`, `DeadlineExceeded`, `ResourceExhausted`, and `Aborted`. Tests `TestGatewayErrorCode` and `TestIsTransient` cover the matrix; `clients/go/README.md` documents both for retry/timeout/auth handling.
|
||||
|
||||
### Client.Go-007
|
||||
|
||||
@@ -123,13 +123,13 @@
|
||||
| Severity | Low |
|
||||
| Category | Correctness & logic bugs |
|
||||
| Location | `clients/go/mxgateway/session.go:526-532` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `newCorrelationID` returns an empty string when `crypto/rand.Read` fails, silently producing an `MxCommandRequest` with no correlation id. `rand.Read` failure is rare, but the failure mode (untraceable command, no error surfaced) is worse than failing loud, and the empty-id path is untested.
|
||||
|
||||
**Recommendation:** Either propagate the error up through `invokeCommand`, or fall back to a time/counter-based id rather than an empty string.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** Resolved 2026-05-18: confirmed `newCorrelationID` returned `""` on a `rand.Read` failure. It now falls back to a non-empty `"fallback-<unixnano>-<counter>"` id built from `time.Now().UnixNano()` and a process-wide `atomic.Uint64` monotonic counter, so every command stays traceable even without entropy. The `crypto/rand` call was routed through a `randRead` package variable so the failure path is testable; `TestNewCorrelationIDFallsBackOnRandFailure` simulates a `rand.Read` failure and asserts the fallback id is non-empty, `fallback-` prefixed, and unique, and `TestNewCorrelationIDUsesRandEntropy` pins the happy path.
|
||||
|
||||
### Client.Go-008
|
||||
|
||||
@@ -138,13 +138,13 @@
|
||||
| Severity | Low |
|
||||
| Category | Testing coverage |
|
||||
| Location | `clients/go/mxgateway/` (test files) |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** Several critical paths are untested: TLS credential resolution in `resolveTransportCredentials` (only the `Plaintext` path is exercised); the `callContext` deadline-shortening logic (`client.go:198-204`) including the negative-timeout disable case; and `NativeValue`/`NativeArray` for the array, raw-bytes, null, and unsupported-kind branches.
|
||||
|
||||
**Recommendation:** Add unit tests for `resolveTransportCredentials` precedence, `callContext` deadline arithmetic, and `NativeValue`/`NativeArray` round-trips for every kind.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** Resolved 2026-05-18: added `clients/go/mxgateway/coverage_test.go`. `TestResolveTransportCredentialsPrecedence` exercises every branch (explicit `TransportCredentials`, `Plaintext`, missing `CACertFile` error, `TLSConfig` + `ServerNameOverride`, default TLS floor) and `TestResolveTransportCredentialsDoesNotMutateTLSConfig` confirms the supplied `*tls.Config` is cloned. `TestCallContextDeadlineArithmetic` covers zero/default, negative-disable, positive timeout, caller-deadline-sooner-kept, and caller-deadline-later-shortened. `TestNativeValueEdgeKinds`, `TestNativeArrayEdgeKinds`, and `TestNativeValueUnsupportedKind` cover the null, raw-bytes (including the no-alias copy), array, timestamp-with-nil, and unsupported-kind branches.
|
||||
|
||||
### Client.Go-009
|
||||
|
||||
@@ -153,13 +153,13 @@
|
||||
| Severity | Low |
|
||||
| Category | Code organization & conventions |
|
||||
| Location | `clients/go/mxgateway/galaxy.go:60-93,241-256`, `clients/go/mxgateway/client.go:41-74,190-205` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `DialGalaxy`/`Dial` and `GalaxyClient.callContext`/`Client.callContext` are near-identical duplicates (dial-context setup, credential resolution, dial-option assembly, deadline arithmetic). A fix to one (e.g. the Client.Go-005 dial migration) must be applied twice and can drift.
|
||||
|
||||
**Recommendation:** Extract a shared unexported `dial(ctx, opts)` and a free `callContext(opts, ctx)` function, and have both client constructors call them.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** Resolved 2026-05-18: extracted the shared unexported `dial(ctx, opts) (*grpc.ClientConn, error)` (credential resolution, dial-option assembly, `grpc.NewClient`, readiness probe) and the free `callContext(ctx, callTimeout) (context.Context, context.CancelFunc)` into `client.go`. `Dial`/`DialGalaxy` and both `(*Client).callContext`/`(*GalaxyClient).callContext` methods now delegate to them; the duplicated dial and deadline code in `galaxy.go` was removed (its now-unused `errors` import dropped). This was done together with the Client.Go-005 migration so the `grpc.NewClient` change lives in exactly one place.
|
||||
|
||||
### Client.Go-010
|
||||
|
||||
@@ -168,10 +168,10 @@
|
||||
| Severity | Low |
|
||||
| Category | Documentation & comments |
|
||||
| Location | `clients/go/mxgateway/client.go:39-40` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** The `Dial` doc comment states it configures "blocking dial cancellation from ctx." This describes the deprecated `WithBlock` behaviour; once Client.Go-005 is addressed the comment is misleading about how connection establishment and cancellation work.
|
||||
|
||||
**Recommendation:** Reword to describe the actual connect/timeout semantics after resolving Client.Go-005, and clarify that `DialTimeout` bounds the initial connect attempt.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** Resolved 2026-05-18: alongside the Client.Go-005 migration, the `Dial` doc comment was rewritten to describe the lazy `grpc.NewClient` connection, the `DialTimeout`-bounded (default 10s, or ctx deadline when sooner) readiness probe, that a briefly-unavailable gateway recovers instead of producing a hard error, and that cancelling `ctx` aborts the probe. `DialGalaxy` and the new `dial`/`waitForReady`/`callContext` helpers carry matching doc comments.
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
| Review date | 2026-05-18 |
|
||||
| Commit reviewed | `3cc53a8` |
|
||||
| Status | Reviewed |
|
||||
| Open findings | 7 |
|
||||
| Open findings | 0 |
|
||||
|
||||
## Checklist coverage
|
||||
|
||||
@@ -108,13 +108,13 @@
|
||||
| Severity | Low |
|
||||
| Category | Performance & resource management |
|
||||
| Location | `clients/java/mxgateway-client/src/main/java/com/dohertylan/mxgateway/client/MxGatewayClient.java:323-328`, `clients/java/mxgateway-client/src/main/java/com/dohertylan/mxgateway/client/GalaxyRepositoryClient.java:279-284` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `close()` (the `AutoCloseable` method invoked by try-with-resources) calls only `ownedChannel.shutdown()` and returns immediately without awaiting termination. In-flight calls and Netty event-loop threads may still be running when the caller assumes the resource is released. `closeAndAwaitTermination()` does it correctly but is not the method try-with-resources uses, and the README examples all rely on try-with-resources.
|
||||
|
||||
**Recommendation:** Have `close()` await termination for a bounded time and `shutdownNow()` on timeout (the logic already in `closeAndAwaitTermination()`), or document that try-with-resources callers should call `closeAndAwaitTermination()`.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** (2026-05-18) Confirmed against source: both `MxGatewayClient.close()` and `GalaxyRepositoryClient.close()` called only `ownedChannel.shutdown()`. `close()` in both clients now performs the bounded-wait logic previously only in `closeAndAwaitTermination()`: it shuts the channel down, waits up to the configured connect timeout for graceful termination, and calls `shutdownNow()` on timeout. Because `close()` cannot throw a checked exception, an `InterruptedException` while awaiting is handled by forcibly shutting the channel down and restoring the thread interrupt flag. `closeAndAwaitTermination()` is retained unchanged for callers who want the checked, blocking-aware variant. `clients/java/README.md` documents the new try-with-resources `close()` semantics.
|
||||
|
||||
### Client.Java-007
|
||||
|
||||
@@ -123,13 +123,13 @@
|
||||
| Severity | Low |
|
||||
| Category | Testing coverage |
|
||||
| Location | `clients/java/mxgateway-client/src/test/java/com/dohertylan/mxgateway/client/` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** The alarm surface — `acknowledgeAlarm`/`acknowledgeAlarmAsync`/`queryActiveAlarms` and `MxGatewayActiveAlarmsSubscription` — has zero test coverage. TLS channel construction, the async `streamEventsAsync` path, `MxGatewayEventSubscription` pre-start cancellation, and `MxEventStream` queue overflow are likewise untested. `JavaClientDesign.md` explicitly lists async stream-observer cancellation and status/error mapping as required tests.
|
||||
|
||||
**Recommendation:** Add in-process gRPC tests for the alarm RPCs, the async streaming/subscription cancellation paths, and at least one TLS-config construction test.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** (2026-05-18) Confirmed against source: no test referenced `acknowledgeAlarm`, `queryActiveAlarms`, `streamEventsAsync`, TLS construction, or `MxEventStream` overflow. Added `MxGatewayLowFindingsTests` (12 tests) covering: `acknowledgeAlarm`/`acknowledgeAlarmAsync` (success, typed protocol-failure, async transport-failure normalisation), `queryActiveAlarms` observer delivery, `MxGatewayActiveAlarmsSubscription` and `MxGatewayEventSubscription` pre-start cancellation, `streamEventsAsync` observer delivery, `MxEventStream` queue overflow surfacing `MxGatewayException`, TLS channel construction (missing CA file rejected with a typed exception, system-trust path builds cleanly), and the Client.Java-008 async-validator normalisation. While writing the TLS test a latent bug was found: a missing/unreadable CA file makes `GrpcSslContexts` throw `IllegalArgumentException` (not `SSLException`), which the old `catch (SSLException)` let escape unwrapped — the catch in the shared channel builder was broadened to also wrap `RuntimeException` so callers always see one typed `MxGatewayException`.
|
||||
|
||||
### Client.Java-008
|
||||
|
||||
@@ -138,13 +138,13 @@
|
||||
| Severity | Low |
|
||||
| Category | Error handling & resilience |
|
||||
| Location | `clients/java/mxgateway-client/src/main/java/com/dohertylan/mxgateway/client/MxGatewayClient.java:298-304` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `acknowledgeAlarmAsync` and `openSessionAsync` apply `ensureProtocolSuccess` inside `thenApply`. If that validator throws a non-`MxGatewayException` `RuntimeException` it is wrapped by `CompletionException` with no `fromGrpc` normalisation, unlike the synchronous paths which normalise via `try/catch`. The async and sync error surfaces are therefore inconsistent.
|
||||
|
||||
**Recommendation:** Wrap the `thenApply` body so any non-`MxGatewayException` is routed through `MxGatewayErrors.fromGrpc`, matching the synchronous methods.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** (2026-05-18) Confirmed against source: the `thenApply` validators in `openSessionAsync`, `invokeAsync`, and `acknowledgeAlarmAsync` were not normalised — in practice the gateway's own validators (`ensureProtocolSuccess`, `ensureMxAccessSuccess`, `ensureGatewayProtocolCompatible`) only ever throw `MxGatewayException`, but a stray non-`MxGatewayException` `RuntimeException` (e.g. an NPE from a malformed reply) would surface raw inside `CompletionException`. Added `MxGatewayChannels.normalisingValidator(operation, fn)`: it rethrows `MxGatewayException` unchanged and routes any other `RuntimeException` through `MxGatewayErrors.fromGrpc`, matching the synchronous `try/catch` paths. All three async `thenApply` sites now use it. Regression test: `MxGatewayLowFindingsTests.openSessionAsyncNormalisesNonGatewayRuntimeExceptionFromValidator`.
|
||||
|
||||
### Client.Java-009
|
||||
|
||||
@@ -153,13 +153,13 @@
|
||||
| Severity | Low |
|
||||
| Category | Code organization & conventions |
|
||||
| Location | `clients/java/mxgateway-client/src/main/java/com/dohertylan/mxgateway/client/GalaxyRepositoryClient.java:310-391`, `clients/java/mxgateway-client/src/main/java/com/dohertylan/mxgateway/client/MxGatewayClient.java:346-413` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `createChannel`, `withDeadline`, `withStreamDeadline`, and `toCompletable` are duplicated nearly verbatim across `MxGatewayClient` and `GalaxyRepositoryClient` (~80 lines). A fix to one will not propagate to the other.
|
||||
|
||||
**Recommendation:** Extract the channel-builder and future-adaptor helpers into a shared package-private utility class.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** (2026-05-18) Confirmed against source: the four helpers were duplicated near-verbatim. Added a package-private `MxGatewayChannels` utility class holding `createChannel(options, tlsErrorPrefix)`, `withDeadline(stub, options)`, `withStreamDeadline(stub, options)`, `toCompletable(future, operation)`, and the new `normalisingValidator` helper (Client.Java-008). Both `MxGatewayClient` and `GalaxyRepositoryClient` now delegate to it and their private copies were deleted, so a future fix lives in one place. Behavior is unchanged except the operation-name carried into `MxGatewayErrors.fromGrpc` is now the specific RPC name instead of the generic `"async call"`/`"galaxy async call"`. Verified by the full existing async test suite plus the new `MxGatewayLowFindingsTests`.
|
||||
|
||||
### Client.Java-010
|
||||
|
||||
@@ -168,13 +168,13 @@
|
||||
| Severity | Low |
|
||||
| Category | Documentation & comments |
|
||||
| Location | `clients/java/mxgateway-client/src/main/java/com/dohertylan/mxgateway/client/MxGatewayClient.java:269-272`, `clients/java/README.md:76` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** The `acknowledgeAlarm` Javadoc states the gateway authenticates against an `invoke:alarm-ack` scope, and the README states the Galaxy Repository requires a `metadata:read` scope. CLAUDE.md's documented scope set names neither — the Javadoc/README assert a scope contract the project's own auth documentation does not corroborate.
|
||||
|
||||
**Recommendation:** Reconcile the scope names with `src/MxGateway.Server/Security/` and CLAUDE.md; correct the Javadoc/README to the actual scope strings, or fix CLAUDE.md if sub-scopes were genuinely added.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** (2026-05-18) Partially re-triaged. Verified against `src/MxGateway.Server/Security/Authorization/GatewayScopes.cs` and `GatewayGrpcScopeResolver.cs`: the canonical scope catalog is `session:open`, `session:close`, `invoke:read`, `invoke:write`, `invoke:secure`, `events:read`, `metadata:read`, `admin`. (a) The README's `metadata:read` for the Galaxy Repository is **correct** — `TestConnectionRequest`/`GetLastDeployTimeRequest`/`DiscoverHierarchyRequest`/`WatchDeployEventsRequest` all resolve to `GatewayScopes.MetadataRead`; no change needed. CLAUDE.md's prose lists only coarse scope groups, but the canonical resolver does define `metadata:read`. (b) The `acknowledgeAlarm` Javadoc's `invoke:alarm-ack` is **wrong** — no such scope exists. `AcknowledgeAlarmRequest` and `QueryActiveAlarmsRequest` are not special-cased in `GatewayGrpcScopeResolver`, so they fall through the `_ => GatewayScopes.Admin` default and require the `admin` scope. The Javadoc was corrected to state the `admin` scope; `queryActiveAlarms` did not assert a scope and was left unchanged. The README does not mention alarms, so no README change was required.
|
||||
|
||||
### Client.Java-011
|
||||
|
||||
@@ -183,13 +183,13 @@
|
||||
| Severity | Low |
|
||||
| Category | Performance & resource management |
|
||||
| Location | `clients/java/mxgateway-client/src/main/java/com/dohertylan/mxgateway/client/MxEventStream.java:37-63` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** The event stream relies on default gRPC auto-inbound flow control: the async stub auto-requests messages, so the server can push faster than the 16-element bounded queue drains. A momentarily slow consumer triggers queue overflow and an immediate stream-fault cancel. This is consistent with the documented fail-fast event-backpressure design, but the client never applies real flow control, so even brief consumer stalls kill the subscription.
|
||||
|
||||
**Recommendation:** Confirm fail-fast is intended (it appears to be); if so, document it on `MxEventStream` so callers know a slow consumer terminates the stream. Optionally expose the queue capacity or opt-in flow control.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** (2026-05-18) Confirmed fail-fast is intended — CLAUDE.md ("fail-fast event backpressure") and `docs/DesignDecisions.md` make a slow consumer losing its subscription a deliberate v1 design choice, so this is documentation-only, not a behavior bug. Added an explicit "Backpressure (fail-fast)" section to the `MxEventStream` class Javadoc explaining that the adaptor uses gRPC auto-inbound flow control with a fixed 16-element buffer and no client flow control, that a consumer stall long enough to fill the buffer triggers an overflow that cancels the subscription and surfaces an `MxGatewayException`, and that consumers must drain promptly and be ready to resubscribe with a resume cursor. `clients/java/README.md` carries the same caveat. The queue capacity was intentionally left non-configurable to keep the v1 surface aligned with the gateway design; overflow behavior is covered by `MxGatewayLowFindingsTests.eventStreamQueueOverflowSurfacesExceptionFromNext`.
|
||||
|
||||
### Client.Java-012
|
||||
|
||||
@@ -198,10 +198,10 @@
|
||||
| Severity | Low |
|
||||
| Category | Correctness & logic bugs |
|
||||
| Location | `clients/java/mxgateway-cli/src/main/java/com/dohertylan/mxgateway/cli/MxGatewayCli.java:667-674` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `CommonOptions.resolved()` mutates `this` (`resolvedApiKey`, `resolvedTimeout`) and returns `this`, but `toClientOptions()` and `redactedJsonMap()` read those mutated fields. If `redactedJsonMap()` is ever called before `resolved()`, it silently emits empty-string defaults. The "return this after mutating" pattern is fragile and surprising.
|
||||
|
||||
**Recommendation:** Make `resolved()` return an immutable resolved value object, or compute `resolvedApiKey`/`resolvedTimeout` lazily in their getters so call ordering cannot produce stale output.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** (2026-05-18) Confirmed against source: `resolved()` populated the `resolvedApiKey`/`resolvedTimeout` mutable fields and `toClientOptions()`/`redactedJsonMap()` read them, so calling either before `resolved()` emitted stale empty/30s defaults. The two mutable fields were removed and replaced with side-effect-free accessor methods `resolvedApiKey()` and `resolvedTimeout()` that compute their value on each call (API key from `--api-key` or the `--api-key-env` variable; timeout via `parseDuration`). `toClientOptions()` and `redactedJsonMap()` now call those accessors directly, so call ordering can no longer produce stale output. `resolved()` is retained as a no-op returning `this` purely for call-site readability (`common.resolved()`), with its Javadoc updated to state resolution is now lazy. Pure-refactor with no runtime-behavior change for the existing call order, so no new test was added; covered by the existing `MxGatewayCliTests` JSON-redaction and option-parsing tests.
|
||||
|
||||
+39
-39
@@ -10,16 +10,16 @@ Each module's `findings.md` is the source of truth; this file is generated from
|
||||
|
||||
| Module | Reviewer | Date | Commit | Status | Open | Total |
|
||||
|---|---|---|---|---|---|---|
|
||||
| [Client.Dotnet](Client.Dotnet/findings.md) | Claude Code | 2026-05-18 | `3cc53a8` | Reviewed | 5 | 8 |
|
||||
| [Client.Go](Client.Go/findings.md) | Claude Code | 2026-05-18 | `3cc53a8` | Reviewed | 7 | 10 |
|
||||
| [Client.Java](Client.Java/findings.md) | Claude Code | 2026-05-18 | `3cc53a8` | Reviewed | 7 | 12 |
|
||||
| [Client.Dotnet](Client.Dotnet/findings.md) | Claude Code | 2026-05-18 | `3cc53a8` | Reviewed | 0 | 8 |
|
||||
| [Client.Go](Client.Go/findings.md) | Claude Code | 2026-05-18 | `3cc53a8` | Reviewed | 0 | 10 |
|
||||
| [Client.Java](Client.Java/findings.md) | Claude Code | 2026-05-18 | `3cc53a8` | Reviewed | 0 | 12 |
|
||||
| [Client.Python](Client.Python/findings.md) | Claude Code | 2026-05-18 | `3cc53a8` | Reviewed | 9 | 12 |
|
||||
| [Client.Rust](Client.Rust/findings.md) | Claude Code | 2026-05-18 | `3cc53a8` | Reviewed | 0 | 12 |
|
||||
| [Contracts](Contracts/findings.md) | Claude Code | 2026-05-18 | `6c64030` | Reviewed | 7 | 8 |
|
||||
| [IntegrationTests](IntegrationTests/findings.md) | Claude Code | 2026-05-18 | `6c64030` | Reviewed | 4 | 10 |
|
||||
| [Server](Server/findings.md) | Claude Code | 2026-05-18 | `6c64030` | Reviewed | 8 | 14 |
|
||||
| [Server](Server/findings.md) | Claude Code | 2026-05-18 | `6c64030` | Reviewed | 0 | 14 |
|
||||
| [Tests](Tests/findings.md) | Claude Code | 2026-05-18 | `6c64030` | Reviewed | 6 | 12 |
|
||||
| [Worker](Worker/findings.md) | Claude Code | 2026-05-18 | `6c64030` | Reviewed | 7 | 15 |
|
||||
| [Worker](Worker/findings.md) | Claude Code | 2026-05-18 | `6c64030` | Reviewed | 0 | 15 |
|
||||
| [Worker.Tests](Worker.Tests/findings.md) | Claude Code | 2026-05-18 | `6c64030` | Reviewed | 8 | 15 |
|
||||
|
||||
## Pending findings
|
||||
@@ -28,25 +28,6 @@ Findings with status `Open` or `In Progress`, ordered by severity.
|
||||
|
||||
| ID | Severity | Category | Location | Description |
|
||||
|---|---|---|---|---|
|
||||
| Client.Dotnet-004 | Low | Error handling & resilience | `clients/dotnet/MxGateway.Client/MxGatewayClient.cs:283-294`, `clients/dotnet/MxGateway.Client/GalaxyRepositoryClient.cs:392-403` | `ExecuteSafeUnaryAsync` wraps the whole Polly retry pipeline in a single linked CTS cancelled after `Options.DefaultCallTimeout`, while `CreateCallOptions` also stamps each individual call with a `DefaultCallTimeout` gRPC deadline. The ret… |
|
||||
| Client.Dotnet-005 | Low | Correctness & logic bugs | `clients/dotnet/MxGateway.Client/MxGatewaySession.cs:82,124,175` | `RegisterAsync`/`AddItemAsync`/`AddItem2Async` return `reply.<Typed>?.ServerHandle ?? reply.ReturnValue.Int32Value`. After `EnsureMxAccessSuccess()` passes, a missing typed payload silently falls back to `ReturnValue.Int32Value`, which for… |
|
||||
| Client.Dotnet-006 | Low | Code organization & conventions | `clients/dotnet/MxGateway.Client/MxGatewayClientOptions.cs:50`, `clients/dotnet/MxGateway.Client/MxGatewayClientContractInfo.cs:10-14` | `MxGatewayClientOptions.MaxGrpcMessageBytes` and the two `const`s in `MxGatewayClientContractInfo` are public members with no XML doc comments, inconsistent with every other public member in the assembly and with the repo's documented C# s… |
|
||||
| Client.Dotnet-007 | Low | Documentation & comments | `clients/dotnet/MxGateway.Client/MxGatewayClient.cs:185-192` | The `AcknowledgeAlarmAsync` XML comment states the gateway authenticates against an `invoke:alarm-ack` scope, but `CLAUDE.md` documents the scope set without any `invoke:alarm-ack` sub-scope. The comment may describe an intended finer-grai… |
|
||||
| Client.Dotnet-008 | Low | Correctness & logic bugs | `clients/dotnet/MxGateway.Client.Cli/MxGatewayCliSecretRedactor.cs:9-17` | The CLI redactor only removes the API key string when it was supplied via `--api-key`; `RunCoreAsync` passes `arguments.GetOptional("api-key")` to `Redact`. When the key comes from an environment variable (`--api-key-env`, the documented d… |
|
||||
| Client.Go-004 | Low | mxaccessgw conventions | `clients/go/mxgateway/alarms_test.go:153-154`, `clients/go/mxgateway/galaxy_test.go:58-59` | `gofmt -l` flags `alarms_test.go` and `galaxy_test.go` for misaligned struct-literal field padding. The Go client README lists `gofmt` as part of the workflow and the repo enforces style; unformatted committed code breaks `gofmt`-gated che… |
|
||||
| Client.Go-005 | Low | Design-document adherence | `clients/go/mxgateway/client.go:64,68`, `clients/go/mxgateway/galaxy.go:83,87` | The client uses `grpc.DialContext` with `grpc.WithBlock()`. In current grpc-go both are deprecated in favour of `grpc.NewClient` (lazy connection). `WithBlock` also changes failure semantics: a transient gateway-unavailable at dial time be… |
|
||||
| Client.Go-006 | Low | Error handling & resilience | `clients/go/mxgateway/errors.go:9-130` | `docs/ClientLibrariesDesign.md` recommends a high-level error taxonomy (`TransportError`, `AuthenticationError`, `TimeoutError`, etc.). The Go client collapses all transport/gRPC failures into a single `GatewayError` with no way to classif… |
|
||||
| Client.Go-007 | Low | Correctness & logic bugs | `clients/go/mxgateway/session.go:526-532` | `newCorrelationID` returns an empty string when `crypto/rand.Read` fails, silently producing an `MxCommandRequest` with no correlation id. `rand.Read` failure is rare, but the failure mode (untraceable command, no error surfaced) is worse… |
|
||||
| Client.Go-008 | Low | Testing coverage | `clients/go/mxgateway/` (test files) | Several critical paths are untested: TLS credential resolution in `resolveTransportCredentials` (only the `Plaintext` path is exercised); the `callContext` deadline-shortening logic (`client.go:198-204`) including the negative-timeout disa… |
|
||||
| Client.Go-009 | Low | Code organization & conventions | `clients/go/mxgateway/galaxy.go:60-93,241-256`, `clients/go/mxgateway/client.go:41-74,190-205` | `DialGalaxy`/`Dial` and `GalaxyClient.callContext`/`Client.callContext` are near-identical duplicates (dial-context setup, credential resolution, dial-option assembly, deadline arithmetic). A fix to one (e.g. the Client.Go-005 dial migrati… |
|
||||
| Client.Go-010 | Low | Documentation & comments | `clients/go/mxgateway/client.go:39-40` | The `Dial` doc comment states it configures "blocking dial cancellation from ctx." This describes the deprecated `WithBlock` behaviour; once Client.Go-005 is addressed the comment is misleading about how connection establishment and cancel… |
|
||||
| Client.Java-006 | Low | Performance & resource management | `clients/java/mxgateway-client/src/main/java/com/dohertylan/mxgateway/client/MxGatewayClient.java:323-328`, `clients/java/mxgateway-client/src/main/java/com/dohertylan/mxgateway/client/GalaxyRepositoryClient.java:279-284` | `close()` (the `AutoCloseable` method invoked by try-with-resources) calls only `ownedChannel.shutdown()` and returns immediately without awaiting termination. In-flight calls and Netty event-loop threads may still be running when the call… |
|
||||
| Client.Java-007 | Low | Testing coverage | `clients/java/mxgateway-client/src/test/java/com/dohertylan/mxgateway/client/` | The alarm surface — `acknowledgeAlarm`/`acknowledgeAlarmAsync`/`queryActiveAlarms` and `MxGatewayActiveAlarmsSubscription` — has zero test coverage. TLS channel construction, the async `streamEventsAsync` path, `MxGatewayEventSubscription`… |
|
||||
| Client.Java-008 | Low | Error handling & resilience | `clients/java/mxgateway-client/src/main/java/com/dohertylan/mxgateway/client/MxGatewayClient.java:298-304` | `acknowledgeAlarmAsync` and `openSessionAsync` apply `ensureProtocolSuccess` inside `thenApply`. If that validator throws a non-`MxGatewayException` `RuntimeException` it is wrapped by `CompletionException` with no `fromGrpc` normalisation… |
|
||||
| Client.Java-009 | Low | Code organization & conventions | `clients/java/mxgateway-client/src/main/java/com/dohertylan/mxgateway/client/GalaxyRepositoryClient.java:310-391`, `clients/java/mxgateway-client/src/main/java/com/dohertylan/mxgateway/client/MxGatewayClient.java:346-413` | `createChannel`, `withDeadline`, `withStreamDeadline`, and `toCompletable` are duplicated nearly verbatim across `MxGatewayClient` and `GalaxyRepositoryClient` (~80 lines). A fix to one will not propagate to the other. |
|
||||
| Client.Java-010 | Low | Documentation & comments | `clients/java/mxgateway-client/src/main/java/com/dohertylan/mxgateway/client/MxGatewayClient.java:269-272`, `clients/java/README.md:76` | The `acknowledgeAlarm` Javadoc states the gateway authenticates against an `invoke:alarm-ack` scope, and the README states the Galaxy Repository requires a `metadata:read` scope. CLAUDE.md's documented scope set names neither — the Javadoc… |
|
||||
| Client.Java-011 | Low | Performance & resource management | `clients/java/mxgateway-client/src/main/java/com/dohertylan/mxgateway/client/MxEventStream.java:37-63` | The event stream relies on default gRPC auto-inbound flow control: the async stub auto-requests messages, so the server can push faster than the 16-element bounded queue drains. A momentarily slow consumer triggers queue overflow and an im… |
|
||||
| Client.Java-012 | Low | Correctness & logic bugs | `clients/java/mxgateway-cli/src/main/java/com/dohertylan/mxgateway/cli/MxGatewayCli.java:667-674` | `CommonOptions.resolved()` mutates `this` (`resolvedApiKey`, `resolvedTimeout`) and returns `this`, but `toClientOptions()` and `redactedJsonMap()` read those mutated fields. If `redactedJsonMap()` is ever called before `resolved()`, it si… |
|
||||
| Client.Python-001 | Low | Documentation & comments | `clients/python/pyproject.toml:8,25`, `clients/python/src/mxgateway_cli/commands.py:25` | The package `description` in `pyproject.toml` still says "Async Python client *scaffold*" even though the client is fully implemented. Stale "scaffold" wording misrepresents maturity to anyone reading PyPI metadata. (The `mxgw-py` console-… |
|
||||
| Client.Python-002 | Low | Code organization & conventions | `clients/python/src/mxgateway/__init__.py:27` | `MxGatewayCommandError` is imported into `__init__.py` and is a documented public exception, but it is missing from `__all__`. It is the parent of `MxAccessError` and a meaningful catch target, so omitting it from the public surface is inc… |
|
||||
| Client.Python-004 | Low | Correctness & logic bugs | `clients/python/src/mxgateway_cli/commands.py:386,402-404` | In `_smoke`, the local variable `closed` is set to `False` and never reassigned; the `finally` block's `if not closed:` is therefore always true. This is dead/misleading code suggesting a removed early-close path. |
|
||||
@@ -67,27 +48,12 @@ Findings with status `Open` or `In Progress`, ordered by severity.
|
||||
| IntegrationTests-008 | Low | Code organization & conventions | `src/MxGateway.IntegrationTests/LiveLdapFactAttribute.cs`, `src/MxGateway.IntegrationTests/Galaxy/LiveGalaxyRepositoryFactAttribute.cs`, `src/MxGateway.IntegrationTests/LiveMxAccessFactAttribute.cs` | Three near-identical fact attributes each re-implement the same "compare env var to `1` with `StringComparison.Ordinal`, set `Skip` otherwise" pattern. `LiveMxAccessFactAttribute` delegates to `IntegrationTestEnvironment` while the other t… |
|
||||
| IntegrationTests-009 | Low | Documentation & comments | `src/MxGateway.IntegrationTests/WorkerLiveMxAccessSmokeTests.cs:372-375` | `TestServerCallContext` is XML-documented as a "Mock server call context," but it is a hand-written stub/fake with no mocking framework and no verification behavior. Per the style guides (accurate naming; explain why not what), calling it… |
|
||||
| IntegrationTests-010 | Low | Correctness & logic bugs | `src/MxGateway.IntegrationTests/WorkerLiveMxAccessSmokeTests.cs:366-369` | `WaitForFirstMessageAsync` accepts only a `timeout` and never observes a `CancellationToken`. There is no per-test cancellation propagation, so if the gateway/worker hangs without writing an event the test relies solely on the 15s `WaitAsy… |
|
||||
| Server-007 | Low | Performance & resource management | `src/MxGateway.Server/Galaxy/GalaxyHierarchyProjector.cs:55-70` | `Project` always iterates the full `entry.Index.ObjectViews` collection and re-applies all filters to skip `offset` matched items before collecting a page. Paging through a large Galaxy hierarchy is therefore O(total) per page and O(total²… |
|
||||
| Server-008 | Low | Performance & resource management | `src/MxGateway.Server/Grpc/GalaxyRepositoryGrpcService.cs:111-134,160-189` | `WatchDeployEvents` calls `ResolveBrowseSubtrees()` on every streamed event, and `MapDeployEvent` re-runs `GalaxyHierarchyProjector.Project` over the entire cached hierarchy (and `Sum`s attribute counts) for every event of every constraine… |
|
||||
| Server-009 | Low | Error handling & resilience | `src/MxGateway.Server/Security/Authentication/AuthSqliteConnectionFactory.cs:15-32` | Each auth-store operation opens a fresh `SqliteConnection` with no busy timeout, no WAL journal mode, and default journaling. `MarkKeyUsedAsync` runs on every authenticated request and `SqliteApiKeyAuditStore` appends on every denial; unde… |
|
||||
| Server-010 | Low | Security | `src/MxGateway.Server/Security/Authentication/SqliteApiKeyAdminStore.cs:91-114`, `src/MxGateway.Server/Dashboard/Components/Pages/ApiKeysPage.razor:168-172` | `RotateAsync` sets `revoked_utc = NULL`, so rotating a previously revoked key silently reactivates it. This is documented intentional behavior in `docs/Authentication.md:167`, but the dashboard renders the "Rotate" button unconditionally —… |
|
||||
| Server-011 | Low | Code organization & conventions | `src/MxGateway.Server/Sessions/WorkerAlarmRpcDispatcher.cs:1-46` | `WorkerAlarmRpcDispatcher` deviates from the module's conventions: it fully-qualifies `System.Guid`, `System.ArgumentNullException`, and `System.Threading` types inline instead of relying on `using` directives, and uses an explicit constru… |
|
||||
| Server-012 | Low | Documentation & comments | `CLAUDE.md` (Authentication section and `apikey create` example) | CLAUDE.md describes scopes as `session`, `invoke`, `event`, `metadata`, `admin` and shows `apikey create --scopes session,invoke,event,metadata,admin`. The actual canonical scope strings (used by `GatewayScopes`, `GatewayGrpcScopeResolver`… |
|
||||
| Server-013 | Low | Testing coverage | `src/MxGateway.Tests/Gateway/Dashboard/DashboardAuthorizationHandlerTests.cs`, `src/MxGateway.Tests/Gateway/GatewayApplicationTests.cs` | `DashboardAuthorizationHandler` is unit-tested in isolation, but no test exercises the dashboard routes end-to-end to confirm the policy is actually enforced — which is why Server-001 (policy registered but never wired) went uncaught. Ther… |
|
||||
| Server-014 | Low | Documentation & comments | `src/MxGateway.Server/Grpc/MxAccessGatewayService.cs:162-171,191-198,206-214,229-237` | The XML `<remarks>` and inline comments on `AcknowledgeAlarm` and `QueryActiveAlarms` describe the alarm path as not yet wired and say `NotWiredAlarmRpcDispatcher` is the default ("Clients calling this method today receive an OK reply with… |
|
||||
| Tests-007 | Low | Code organization & conventions | `src/MxGateway.Tests/Gateway/Grpc/MxAccessGatewayServiceTests.cs:682`, `src/MxGateway.Tests/Gateway/Grpc/GalaxyRepositoryGrpcServiceTests.cs:324`, `src/MxGateway.Tests/Gateway/GatewayEndToEndFakeWorkerSmokeTests.cs:460`, `src/MxGateway.Tests/Security/Authorization/GatewayGrpcAuthorizationInterceptorTests.cs:233` | A near-identical `TestServerCallContext` implementation is copy-pasted into at least four test files (and `AllowAllConstraintEnforcer` / `TestServerStreamWriter` / `RecordingStreamWriter` into several). Duplication risks the copies driftin… |
|
||||
| Tests-008 | Low | mxaccessgw conventions | `src/MxGateway.Tests/Gateway/Sessions/WorkerAlarmRpcDispatcherTests.cs:1-9`, `src/MxGateway.Tests/Gateway/Sessions/NotWiredAlarmRpcDispatcherTests.cs:1-3`, `src/MxGateway.Tests/Gateway/Sessions/SessionManagerAlarmAutoSubscribeTests.cs:1` | The alarm test files diverge from the project's C# style and the rest of the suite: snake_case test method names instead of the PascalCase `Method_Condition_Result` pattern; redundant explicit `using System;`/`System.Threading;` imports de… |
|
||||
| Tests-009 | Low | Documentation & comments | `src/MxGateway.Tests/Gateway/Sessions/SessionManagerTests.cs:36-37,99,365` | Several XML `<summary>` comments are copy-paste mismatches: the comment above `OpenSessionAsync_SetsInitialDefaultLease` describes correlation-ID generation; the comment above `GatewaySessionSubscribeBulkAsync_ForwardsOneBulkCommand…` desc… |
|
||||
| Tests-010 | Low | Security | `src/MxGateway.Tests/Gateway/Dashboard/DashboardAuthorizationHandlerTests.cs:26-36` | The anonymous-localhost bypass is tested only for the success case (`allowAnonymousLocalhost: true` + loopback succeeds) and the remote-unauthenticated denial. There is no test for the security-critical negatives: anonymous + loopback when… |
|
||||
| Tests-011 | Low | Correctness & logic bugs | `src/MxGateway.Tests/Gateway/GatewayEndToEndFakeWorkerSmokeTests.cs:233-301` | `GatewayEndToEndFakeWorkerSmokeTests` correctly stores and awaits `launcher.WorkerTask`, but `SessionWorkerClientFactoryFakeWorkerTests` uses `_ = RunWorkerAsync(...)` with no stored task (lines 152, 184, 220). An unhandled exception in th… |
|
||||
| Tests-012 | Low | Concurrency & thread safety | `src/MxGateway.Tests/Gateway/Workers/Fakes/FakeWorkerHarness.cs:62`, `src/MxGateway.Tests/Gateway/Workers/WorkerClientTests.cs:472` | Pipe names are uniquified per test with a GUID (good), but xUnit runs test classes in parallel by default and there is no `xunit.runner.json` or collection configuration. Tests that build a full `WebApplication` bind ephemeral ports (`--ur… |
|
||||
| Worker-009 | Low | Performance & resource management | `src/MxGateway.Worker/Ipc/WorkerFrameReader.cs:31,49`, `src/MxGateway.Worker/Ipc/WorkerFrameWriter.cs:57-58` | Every frame read allocates a fresh 4-byte length buffer and a payload `byte[]`; every write allocates `ToByteArray()` plus a 4-byte prefix. On the hot event-drain path (batches of up to 128 `WorkerEvent` frames every 25 ms) this produces s… |
|
||||
| Worker-010 | Low | Correctness & logic bugs | `src/MxGateway.Worker/Conversion/VariantConverter.cs:204-226` | `ConvertInt64Scalar` is reached for `TypeCode.UInt32` and `TypeCode.Int64`. For a `uint` with `expectedDataType == MxDataType.Time`, the value is treated as a Windows `FILETIME` via `DateTime.FromFileTimeUtc(longValue)`; a 32-bit FILETIME… |
|
||||
| Worker-011 | Low | Correctness & logic bugs | `src/MxGateway.Worker/Ipc/WorkerPipeClient.cs:169-171` | `retryAttempts` is computed as `(connectTimeout / min(connectTimeout, attemptTimeout)) - 1`. With defaults (30000 / 2000) this yields 14 retries, but each retry also incurs Polly exponential backoff. The overall `connectDeadline` (`CancelA… |
|
||||
| Worker-012 | Low | Documentation & comments | `src/MxGateway.Worker/MxAccess/MxAccessAlarmEventSink.cs:44-55`, `src/MxGateway.Worker/MxAccess/WnWrapAlarmConsumer.cs:38-43`, `src/MxGateway.Worker/MxAccess/MxAccessEventMapper.cs:106-112` | Multiple comments describe the alarm path as not-yet-wired future work ("PR A.2 — COM-side subscription scaffold … the worker advertises no alarm subscription", "the worker bootstrap will gain a thin 'run-on-STA' wrapper as part of A.3").… |
|
||||
| Worker-013 | Low | Testing coverage | `src/MxGateway.Worker/Sta/StaMessagePump.cs` | `StaMessagePump` — the heart of COM event delivery (`MsgWaitForMultipleObjectsEx` + `PeekMessage`/`DispatchMessage`) — has no direct unit tests. `StaRuntimeTests` exercises it indirectly for command wake-up but never verifies that a posted… |
|
||||
| Worker-014 | Low | Code organization & conventions | `src/MxGateway.Worker/MxAccess/AlarmCommandHandler.cs:33`, `:202` | The file declares two public types — the `AlarmCommandHandler` class and the `IAlarmCommandHandler` interface. The C# style guide and the rest of the module follow one-public-type-per-file (e.g. interfaces in their own `I*.cs` files like `… |
|
||||
| Worker-015 | Low | Correctness & logic bugs | `src/MxGateway.Worker/MxAccess/MxAccessEventQueue.cs:115-145` | On overflow, `Enqueue` records the overflow fault and throws `MxAccessEventQueueOverflowException`; `MxAccessBaseEventSink.EnqueueEvent` catches it and calls `RecordFault` again. `RecordFault` is a no-op when a fault already exists, so the… |
|
||||
| Worker.Tests-008 | Low | Documentation & comments | `src/MxGateway.Worker.Tests/Conversion/VariantConverterTests.cs:175-182` | `Redactor_WithCredentialBearingValueFields_RedactsBeforeLogging` lives in `VariantConverterTests` but asserts on `WorkerLogRedactor.RedactValue`, which has nothing to do with `VariantConverter`. It is also a near-duplicate of coverage in `… |
|
||||
| Worker.Tests-009 | Low | Code organization & conventions | `src/MxGateway.Worker.Tests/MxAccess/AlarmCommandHandlerTests.cs`, `AlarmDispatcherTests.cs`, `AlarmCommandExecutorTests.cs`, `AlarmRecordTransitionMapperTests.cs`, `WnWrapAlarmConsumerXmlTests.cs` | The alarm-related test files use `snake_case` method names while the rest of the project uses the `Method_State_Result` PascalCase convention. `docs/style-guides/CSharpStyleGuide.md` and the surrounding code establish PascalCase as the pro… |
|
||||
| Worker.Tests-010 | Low | Correctness & logic bugs | `src/MxGateway.Worker.Tests/MxAccess/MxAccessStaSessionTests.cs:230-258` | `StartAsync_WithoutAlarmCommandHandlerFactory_SubscribeAlarmsReturnsInvalidRequest` asserts `Assert.Contains("alarm", reply.DiagnosticMessage, StringComparison.OrdinalIgnoreCase)`. The XML doc claims it verifies the diagnostic says "alarm… |
|
||||
@@ -157,9 +123,43 @@ Findings with status `Resolved`, `Won't Fix`, or `Deferred`.
|
||||
| Worker.Tests-005 | Medium | Resolved | Performance & resource management | `src/MxGateway.Worker.Tests/Ipc/WorkerFrameProtocolTests.cs:20-31,103-105`, `src/MxGateway.Worker.Tests/Ipc/WorkerPipeSessionTests.cs:28-31` |
|
||||
| Worker.Tests-006 | Medium | Resolved | Performance & resource management | `src/MxGateway.Worker.Tests/MxAccess/MxAccessStaSessionTests.cs:282,305,315,323` |
|
||||
| Worker.Tests-007 | Medium | Resolved | Design-document adherence | `docs/WorkerFrameProtocol.md:38-49` |
|
||||
| Client.Dotnet-004 | Low | Resolved | Error handling & resilience | `clients/dotnet/MxGateway.Client/MxGatewayClient.cs:283-294`, `clients/dotnet/MxGateway.Client/GalaxyRepositoryClient.cs:392-403` |
|
||||
| Client.Dotnet-005 | Low | Resolved | Correctness & logic bugs | `clients/dotnet/MxGateway.Client/MxGatewaySession.cs:82,124,175` |
|
||||
| Client.Dotnet-006 | Low | Resolved | Code organization & conventions | `clients/dotnet/MxGateway.Client/MxGatewayClientOptions.cs:50`, `clients/dotnet/MxGateway.Client/MxGatewayClientContractInfo.cs:10-14` |
|
||||
| Client.Dotnet-007 | Low | Resolved | Documentation & comments | `clients/dotnet/MxGateway.Client/MxGatewayClient.cs:185-192` |
|
||||
| Client.Dotnet-008 | Low | Resolved | Correctness & logic bugs | `clients/dotnet/MxGateway.Client.Cli/MxGatewayCliSecretRedactor.cs:9-17` |
|
||||
| Client.Go-004 | Low | Resolved | mxaccessgw conventions | `clients/go/mxgateway/alarms_test.go:153-154`, `clients/go/mxgateway/galaxy_test.go:58-59` |
|
||||
| Client.Go-005 | Low | Resolved | Design-document adherence | `clients/go/mxgateway/client.go:64,68`, `clients/go/mxgateway/galaxy.go:83,87` |
|
||||
| Client.Go-006 | Low | Resolved | Error handling & resilience | `clients/go/mxgateway/errors.go:9-130` |
|
||||
| Client.Go-007 | Low | Resolved | Correctness & logic bugs | `clients/go/mxgateway/session.go:526-532` |
|
||||
| Client.Go-008 | Low | Resolved | Testing coverage | `clients/go/mxgateway/` (test files) |
|
||||
| Client.Go-009 | Low | Resolved | Code organization & conventions | `clients/go/mxgateway/galaxy.go:60-93,241-256`, `clients/go/mxgateway/client.go:41-74,190-205` |
|
||||
| Client.Go-010 | Low | Resolved | Documentation & comments | `clients/go/mxgateway/client.go:39-40` |
|
||||
| Client.Java-006 | Low | Resolved | Performance & resource management | `clients/java/mxgateway-client/src/main/java/com/dohertylan/mxgateway/client/MxGatewayClient.java:323-328`, `clients/java/mxgateway-client/src/main/java/com/dohertylan/mxgateway/client/GalaxyRepositoryClient.java:279-284` |
|
||||
| Client.Java-007 | Low | Resolved | Testing coverage | `clients/java/mxgateway-client/src/test/java/com/dohertylan/mxgateway/client/` |
|
||||
| Client.Java-008 | Low | Resolved | Error handling & resilience | `clients/java/mxgateway-client/src/main/java/com/dohertylan/mxgateway/client/MxGatewayClient.java:298-304` |
|
||||
| Client.Java-009 | Low | Resolved | Code organization & conventions | `clients/java/mxgateway-client/src/main/java/com/dohertylan/mxgateway/client/GalaxyRepositoryClient.java:310-391`, `clients/java/mxgateway-client/src/main/java/com/dohertylan/mxgateway/client/MxGatewayClient.java:346-413` |
|
||||
| Client.Java-010 | Low | Resolved | Documentation & comments | `clients/java/mxgateway-client/src/main/java/com/dohertylan/mxgateway/client/MxGatewayClient.java:269-272`, `clients/java/README.md:76` |
|
||||
| Client.Java-011 | Low | Resolved | Performance & resource management | `clients/java/mxgateway-client/src/main/java/com/dohertylan/mxgateway/client/MxEventStream.java:37-63` |
|
||||
| Client.Java-012 | Low | Resolved | Correctness & logic bugs | `clients/java/mxgateway-cli/src/main/java/com/dohertylan/mxgateway/cli/MxGatewayCli.java:667-674` |
|
||||
| Client.Rust-004 | Low | Resolved | Documentation & comments | `clients/rust/src/version.rs:7` |
|
||||
| Client.Rust-007 | Low | Resolved | Design-document adherence | `clients/rust/RustClientDesign.md:14-55` |
|
||||
| Client.Rust-008 | Low | Resolved | Performance & resource management | `clients/rust/src/value.rs:161-261` |
|
||||
| Client.Rust-009 | Low | Resolved | Testing coverage | `clients/rust/tests/client_behavior.rs`, `clients/rust/src/galaxy.rs` |
|
||||
| Client.Rust-010 | Low | Resolved | Error handling & resilience | `clients/rust/src/client.rs:255-268`, `clients/rust/src/galaxy.rs:204-216` |
|
||||
| Client.Rust-011 | Low | Resolved | mxaccessgw conventions | `clients/rust/src/session.rs:469` |
|
||||
| Server-007 | Low | Resolved | Performance & resource management | `src/MxGateway.Server/Galaxy/GalaxyHierarchyProjector.cs:55-70` |
|
||||
| Server-008 | Low | Resolved | Performance & resource management | `src/MxGateway.Server/Grpc/GalaxyRepositoryGrpcService.cs:111-134,160-189` |
|
||||
| Server-009 | Low | Resolved | Error handling & resilience | `src/MxGateway.Server/Security/Authentication/AuthSqliteConnectionFactory.cs:15-32` |
|
||||
| Server-010 | Low | Resolved | Security | `src/MxGateway.Server/Security/Authentication/SqliteApiKeyAdminStore.cs:91-114`, `src/MxGateway.Server/Dashboard/Components/Pages/ApiKeysPage.razor:168-172` |
|
||||
| Server-011 | Low | Resolved | Code organization & conventions | `src/MxGateway.Server/Sessions/WorkerAlarmRpcDispatcher.cs:1-46` |
|
||||
| Server-012 | Low | Resolved | Documentation & comments | `CLAUDE.md` (Authentication section and `apikey create` example) |
|
||||
| Server-013 | Low | Resolved | Testing coverage | `src/MxGateway.Tests/Gateway/Dashboard/DashboardAuthorizationHandlerTests.cs`, `src/MxGateway.Tests/Gateway/GatewayApplicationTests.cs` |
|
||||
| Server-014 | Low | Resolved | Documentation & comments | `src/MxGateway.Server/Grpc/MxAccessGatewayService.cs:162-171,191-198,206-214,229-237` |
|
||||
| Worker-009 | Low | Resolved | Performance & resource management | `src/MxGateway.Worker/Ipc/WorkerFrameReader.cs:31,49`, `src/MxGateway.Worker/Ipc/WorkerFrameWriter.cs:57-58` |
|
||||
| Worker-010 | Low | Resolved | Correctness & logic bugs | `src/MxGateway.Worker/Conversion/VariantConverter.cs:204-226` |
|
||||
| Worker-011 | Low | Resolved | Correctness & logic bugs | `src/MxGateway.Worker/Ipc/WorkerPipeClient.cs:169-171` |
|
||||
| Worker-012 | Low | Resolved | Documentation & comments | `src/MxGateway.Worker/MxAccess/MxAccessAlarmEventSink.cs:44-55`, `src/MxGateway.Worker/MxAccess/WnWrapAlarmConsumer.cs:38-43`, `src/MxGateway.Worker/MxAccess/MxAccessEventMapper.cs:106-112` |
|
||||
| Worker-013 | Low | Resolved | Testing coverage | `src/MxGateway.Worker/Sta/StaMessagePump.cs` |
|
||||
| Worker-014 | Low | Resolved | Code organization & conventions | `src/MxGateway.Worker/MxAccess/AlarmCommandHandler.cs:33`, `:202` |
|
||||
| Worker-015 | Low | Resolved | Correctness & logic bugs | `src/MxGateway.Worker/MxAccess/MxAccessEventQueue.cs:115-145` |
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
| Review date | 2026-05-18 |
|
||||
| Commit reviewed | `6c64030` |
|
||||
| Status | Reviewed |
|
||||
| Open findings | 8 |
|
||||
| Open findings | 0 |
|
||||
|
||||
## Checklist coverage
|
||||
|
||||
@@ -123,13 +123,13 @@
|
||||
| Severity | Low |
|
||||
| Category | Performance & resource management |
|
||||
| Location | `src/MxGateway.Server/Galaxy/GalaxyHierarchyProjector.cs:55-70` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `Project` always iterates the full `entry.Index.ObjectViews` collection and re-applies all filters to skip `offset` matched items before collecting a page. Paging through a large Galaxy hierarchy is therefore O(total) per page and O(total²/pageSize) end-to-end. The cache is in-memory so impact is bounded, but for large galaxies repeated `DiscoverHierarchy` pagination wastes CPU.
|
||||
|
||||
**Recommendation:** Precompute and cache the filtered, ordered view list per `(filterSignature, sequence)` so subsequent pages are an O(pageSize) slice; the existing filter signature already keys page tokens.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** Resolved 2026-05-18. Confirmed against source: `Project` re-scanned and re-filtered the whole `ObjectViews` list on every page. Added a `ConditionalWeakTable<GalaxyHierarchyCacheEntry, ConcurrentDictionary<string, IReadOnlyList<GalaxyObjectView>>>` memo in `GalaxyHierarchyProjector`: the first projection of a given filter signature builds the filtered, ordered view list; subsequent pages take an O(pageSize) slice via index arithmetic. The memo is keyed on the immutable cache-entry instance, so when the cache publishes a new entry the stale memo becomes unreachable and is reclaimed with it — no explicit invalidation. `ResolveRoot` still runs before the memo lookup so a missing root surfaces `NotFound` consistently. Regression tests: `GalaxyHierarchyProjectorTests` (`Project_PagedAcrossEntireHierarchy_ReturnsEveryObjectExactlyOnce`, `Project_DistinctFiltersOnSameEntry_DoNotShareMemoizedViewList`, `Project_SameFilterRepeated_ReturnsIdenticalTotals`, `Project_DistinctCacheEntries_ProjectAgainstTheirOwnData`); existing `GalaxyRepositoryGrpcServiceTests` paging tests continue to pass unchanged.
|
||||
|
||||
### Server-008
|
||||
|
||||
@@ -138,13 +138,13 @@
|
||||
| Severity | Low |
|
||||
| Category | Performance & resource management |
|
||||
| Location | `src/MxGateway.Server/Grpc/GalaxyRepositoryGrpcService.cs:111-134,160-189` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `WatchDeployEvents` calls `ResolveBrowseSubtrees()` on every streamed event, and `MapDeployEvent` re-runs `GalaxyHierarchyProjector.Project` over the entire cached hierarchy (and `Sum`s attribute counts) for every event of every constrained subscriber. `GalaxyGlobMatcher.IsMatch` also rebuilds the glob regex on each call. With many constrained subscribers and frequent deploys this is avoidable work.
|
||||
|
||||
**Recommendation:** Hoist `ResolveBrowseSubtrees()` out of the loop; compute scoped object/attribute counts once per deploy sequence and cache by `(sequence, browseSubtrees)`; cache compiled glob `Regex` instances in `GalaxyGlobMatcher`.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** Resolved 2026-05-18. Confirmed against source. Three changes: (1) `WatchDeployEvents` now resolves `ResolveBrowseSubtrees()` once before the streaming loop — the caller's identity and constraints are fixed for the stream lifetime, so per-event resolution was pure waste. (2) `GalaxyGlobMatcher` now caches compiled `Regex` instances in a `ConcurrentDictionary` keyed by glob pattern (with `RegexOptions.Compiled`), so the same handful of globs are translated once instead of on every `IsMatch` call. (3) The per-event `MapDeployEvent` re-projection is no longer a separate hot path: with finding Server-007 resolved, `GalaxyHierarchyProjector.Project` memoizes the filtered view list per `(cache entry, filter signature)`, so the scoped-count projection in `MapDeployEvent` for a constrained subscriber is O(matched-slice) after the first event of a given deploy sequence rather than a full re-scan — this subsumes the recommendation's `(sequence, browseSubtrees)` cache (the memo is keyed on the per-sequence cache-entry instance and the browse-subtree-bearing filter signature). Regression tests: `GalaxyFilterInputSafetyTests.GlobMatcher_RepeatedAndInterleavedPatterns_StayCorrect` (glob cache correctness); existing `WatchDeployEvents` and `GalaxyFilterInputSafetyTests` coverage continues to pass.
|
||||
|
||||
### Server-009
|
||||
|
||||
@@ -153,13 +153,13 @@
|
||||
| Severity | Low |
|
||||
| Category | Error handling & resilience |
|
||||
| Location | `src/MxGateway.Server/Security/Authentication/AuthSqliteConnectionFactory.cs:15-32` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** Each auth-store operation opens a fresh `SqliteConnection` with no busy timeout, no WAL journal mode, and default journaling. `MarkKeyUsedAsync` runs on every authenticated request and `SqliteApiKeyAuditStore` appends on every denial; under concurrent load these writers can collide and surface `SQLITE_BUSY` as a hard failure on the request path.
|
||||
|
||||
**Recommendation:** Set `Pooling`, a non-zero `DefaultTimeout`/`busy_timeout`, and enable WAL (`PRAGMA journal_mode=WAL`) once at startup so concurrent readers/writers degrade gracefully.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** Resolved 2026-05-18. Confirmed against source: the connection string set only `DataSource` and `Mode`. `AuthSqliteConnectionFactory.CreateConnection` now also sets `Pooling = true` and a non-zero `DefaultTimeout`. A new `OpenConnectionAsync(CancellationToken)` opens the connection and applies `PRAGMA journal_mode=WAL` and `PRAGMA busy_timeout` (5 s); WAL is a persistent database-level setting so re-applying it per connection is a cheap no-op, while `busy_timeout` is per-connection state. All nine auth-store call sites (`SqliteApiKeyAdminStore`, `SqliteApiKeyAuditStore`, `SqliteApiKeyStore`, `SqliteAuthStoreMigrator`) were switched from `CreateConnection()` + `OpenAsync()` to `OpenConnectionAsync()`. `docs/Authentication.md` updated to describe the WAL/busy-timeout behavior. Regression test: `SqliteAuthStoreTests.OpenConnectionAsync_EnablesWalJournalModeAndBusyTimeout`.
|
||||
|
||||
### Server-010
|
||||
|
||||
@@ -168,13 +168,13 @@
|
||||
| Severity | Low |
|
||||
| Category | Security |
|
||||
| Location | `src/MxGateway.Server/Security/Authentication/SqliteApiKeyAdminStore.cs:91-114`, `src/MxGateway.Server/Dashboard/Components/Pages/ApiKeysPage.razor:168-172` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `RotateAsync` sets `revoked_utc = NULL`, so rotating a previously revoked key silently reactivates it. This is documented intentional behavior in `docs/Authentication.md:167`, but the dashboard renders the "Rotate" button unconditionally — including for keys whose status badge says "Revoked" — so an operator can un-revoke a deliberately disabled key without an explicit warning.
|
||||
|
||||
**Recommendation:** Either hide/disable the Rotate action for revoked keys in `ApiKeysPage.razor`, require an explicit confirmation, or have `RotateAsync` preserve `revoked_utc` and add a separate explicit "reactivate" operation.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** Resolved 2026-05-18. Confirmed against source: `ApiKeysPage.razor` rendered the Rotate button unconditionally while Revoke was already gated on `key.RevokedUtc is null`. Took the lowest-risk recommended option — the dashboard now renders the Rotate (and Revoke) actions only for keys whose status is `Active`; a revoked key shows a "No actions" placeholder, so an operator cannot un-revoke a deliberately disabled key as a side effect of a rotation. `RotateAsync`'s store-level behavior is unchanged (rotation by `key_id` still clears `revoked_utc`, which the CLI relies on); `docs/Authentication.md` updated to document both the store behavior and the dashboard restriction. No automated test added: the change is pure conditional Razor rendering and the test project has no bUnit component-rendering harness; the underlying `DashboardApiKeyManagementService` is already unit-tested.
|
||||
|
||||
### Server-011
|
||||
|
||||
@@ -183,13 +183,13 @@
|
||||
| Severity | Low |
|
||||
| Category | Code organization & conventions |
|
||||
| Location | `src/MxGateway.Server/Sessions/WorkerAlarmRpcDispatcher.cs:1-46` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `WorkerAlarmRpcDispatcher` deviates from the module's conventions: it fully-qualifies `System.Guid`, `System.ArgumentNullException`, and `System.Threading` types inline instead of relying on `using` directives, and uses an explicit constructor with `this.`-qualified field assignment while the rest of the module (e.g. `ConstraintEnforcer`, `MxAccessGatewayService`, `GalaxyRepositoryGrpcService`) uses primary constructors. `docs/style-guides/CSharpStyleGuide.md` is authoritative for gateway code.
|
||||
|
||||
**Recommendation:** Add the needed `using` directives, drop the inline fully-qualified names, and convert to a primary constructor for consistency.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** Resolved 2026-05-18. Confirmed against source. Converted `WorkerAlarmRpcDispatcher` to a primary constructor with the standard `?? throw new ArgumentNullException(...)` field-initializer guard; dropped the inline `System.Guid` / `System.ArgumentNullException` qualifications (using implicit `using System;`); removed redundant `using System.Collections.Generic;` / `System.Threading` / `System.Threading.Tasks;` directives (covered by `ImplicitUsings`); replaced the two `if (... is null) throw new System.ArgumentNullException(...)` checks with `ArgumentNullException.ThrowIfNull`. The stale class-level `<summary>`/`<remarks>` ("Replaces NotWiredAlarmRpcDispatcher once ... wired in", "partially wired", "returns an Unimplemented diagnostic") were corrected to describe the actual GUID-vs-`Provider!Group.Tag` handling — overlapping with Server-014. No behavior change, so no new test; existing `WorkerAlarmRpcDispatcherTests` continue to pass and the project builds warning-free under `TreatWarningsAsErrors`.
|
||||
|
||||
### Server-012
|
||||
|
||||
@@ -198,13 +198,13 @@
|
||||
| Severity | Low |
|
||||
| Category | Documentation & comments |
|
||||
| Location | `CLAUDE.md` (Authentication section and `apikey create` example) |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** CLAUDE.md describes scopes as `session`, `invoke`, `event`, `metadata`, `admin` and shows `apikey create --scopes session,invoke,event,metadata,admin`. The actual canonical scope strings (used by `GatewayScopes`, `GatewayGrpcScopeResolver`, and `docs/Authorization.md`) are `session:open`, `session:close`, `invoke:read`, `invoke:write`, `invoke:secure`, `events:read`, `metadata:read`, `admin`. A key created per the CLAUDE.md example carries scopes the resolver never matches.
|
||||
|
||||
**Recommendation:** Update CLAUDE.md's scope list and the `apikey` example to the canonical `*:*` scope strings, per CLAUDE.md's own rule that docs change with the code.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** Resolved 2026-05-18. Confirmed against `GatewayScopes` (`session:open`, `session:close`, `invoke:read`, `invoke:write`, `invoke:secure`, `events:read`, `metadata:read`, `admin`). CLAUDE.md's Build/Test/Run `apikey create` example and the Authentication-section scope list were both updated to the canonical `*:*` strings. (Note: since finding Server-004 was resolved, the old example would now be actively rejected at create time rather than silently creating an unusable key, making the doc correction load-bearing.) Pure documentation change; no test.
|
||||
|
||||
### Server-013
|
||||
|
||||
@@ -213,13 +213,13 @@
|
||||
| Severity | Low |
|
||||
| Category | Testing coverage |
|
||||
| Location | `src/MxGateway.Tests/Gateway/Dashboard/DashboardAuthorizationHandlerTests.cs`, `src/MxGateway.Tests/Gateway/GatewayApplicationTests.cs` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `DashboardAuthorizationHandler` is unit-tested in isolation, but no test exercises the dashboard routes end-to-end to confirm the policy is actually enforced — which is why Server-001 (policy registered but never wired) went uncaught. There are also no tests for `WorkerExecutableValidator` (PE-header architecture parsing), `GalaxyGlobMatcher` (anchoring/escaping/empty-glob fail-open), or `GalaxyHierarchyProjector` pagination/page-token behavior.
|
||||
|
||||
**Recommendation:** Add a `WebApplicationFactory` integration test that requests a dashboard page unauthenticated and asserts the redirect/401, plus unit tests for `WorkerExecutableValidator`, `GalaxyGlobMatcher`, and projector paging.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** Resolved 2026-05-18. Re-triaged against the current test suite: three of the four named gaps were already closed. (1) The dashboard route-level enforcement test exists — `GatewayApplicationTests.Build_WhenDashboardEnabled_ComponentRoutesRequireAuthorization` (and `..._AuthEndpointsAllowAnonymousAccess`), added when Server-001 was fixed. (2) `GalaxyGlobMatcher` anchoring/escaping/empty-glob behavior is covered by `GalaxyFilterInputSafetyTests` (`GlobMatcher_TreatsSqlMetacharactersAsLiterals`, `GlobMatcher_DoesNotTreatLikeWildcardsAsWildcards`, `GlobMatcher_WithPathologicalInput_DoesNotHang`), now extended with `GlobMatcher_RepeatedAndInterleavedPatterns_StayCorrect`. (3) Projector pagination/page-token behavior is covered end-to-end by `GalaxyRepositoryGrpcServiceTests` and now directly by the new `GalaxyHierarchyProjectorTests`. The one genuine remaining gap — `WorkerExecutableValidator` PE-header parsing — was closed with the new `WorkerExecutableValidatorTests` (7 cases: matching/mismatched x86 and x64, missing `MZ` header, file too small, missing `PE` signature), exercising the validator against synthesized minimal PE fixtures.
|
||||
|
||||
### Server-014
|
||||
|
||||
@@ -228,10 +228,10 @@
|
||||
| Severity | Low |
|
||||
| Category | Documentation & comments |
|
||||
| Location | `src/MxGateway.Server/Grpc/MxAccessGatewayService.cs:162-171,191-198,206-214,229-237` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** The XML `<remarks>` and inline comments on `AcknowledgeAlarm` and `QueryActiveAlarms` describe the alarm path as not yet wired and say `NotWiredAlarmRpcDispatcher` is the default ("Clients calling this method today receive an OK reply with a 'worker alarm path not yet wired' diagnostic", "an empty stream until PR A.2"). In fact `SessionServiceCollectionExtensions.AddGatewaySessions` registers `WorkerAlarmRpcDispatcher` as `IAlarmRpcDispatcher`, so DI always injects the production dispatcher; `NotWiredAlarmRpcDispatcher` is only the null fallback. The comments are stale and misleading.
|
||||
|
||||
**Recommendation:** Update the `AcknowledgeAlarm`/`QueryActiveAlarms` remarks to reflect that `WorkerAlarmRpcDispatcher` is the wired default, and describe its actual GUID-vs-`Provider!Group.Tag` handling.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** Resolved 2026-05-18. Confirmed against source: `SessionServiceCollectionExtensions` registers `WorkerAlarmRpcDispatcher` as `IAlarmRpcDispatcher`, so the "not yet wired" / "empty stream until PR A.2" / "PR A.6/A.7 follow-up" prose in the `AcknowledgeAlarm` and `QueryActiveAlarms` `<remarks>` and inline comments was stale. Rewrote both `<remarks>` blocks and both inline comments to state that DI binds the production `WorkerAlarmRpcDispatcher`, that it routes over the worker pipe IPC, and that `AcknowledgeAlarm` handles a canonical-GUID reference (→ `AcknowledgeAlarmCommand`) and a `Provider!Group.Tag` reference (→ `AcknowledgeAlarmByNameCommand`), with `NotWiredAlarmRpcDispatcher` being only the null fallback. The matching stale `WorkerAlarmRpcDispatcher` class-level XML doc was corrected as part of Server-011. Pure documentation/comment change; no test.
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
| Review date | 2026-05-18 |
|
||||
| Commit reviewed | `6c64030` |
|
||||
| Status | Reviewed |
|
||||
| Open findings | 7 |
|
||||
| Open findings | 0 |
|
||||
|
||||
## Checklist coverage
|
||||
|
||||
@@ -157,13 +157,13 @@
|
||||
| Severity | Low |
|
||||
| Category | Performance & resource management |
|
||||
| Location | `src/MxGateway.Worker/Ipc/WorkerFrameReader.cs:31,49`, `src/MxGateway.Worker/Ipc/WorkerFrameWriter.cs:57-58` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** Every frame read allocates a fresh 4-byte length buffer and a payload `byte[]`; every write allocates `ToByteArray()` plus a 4-byte prefix. On the hot event-drain path (batches of up to 128 `WorkerEvent` frames every 25 ms) this produces steady gen-0 garbage. `WorkerFrameWriter` also effectively serializes twice (`CalculateSize()` then `ToByteArray()`).
|
||||
|
||||
**Recommendation:** Reuse a pooled buffer / `ArrayPool<byte>` for the length prefix and payload, and write directly into a pooled buffer using `CodedOutputStream`. Low priority unless event throughput is high.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** 2026-05-18 — `WorkerFrameWriter.WriteAsync` now serializes the envelope exactly once into a single frame buffer that carries the 4-byte length prefix followed by the payload, via `envelope.WriteTo(new Span<byte>(frame, sizeof(uint), payloadLength))`. This eliminates the redundant second serialization pass (`ToByteArray()` re-runs `CalculateSize()` internally), the separate length-prefix array, and the separate prefix `WriteAsync`/extra `FlushAsync` round. `WorkerFrameReader.ReadAsync` now rents its payload buffer from `ArrayPool<byte>.Shared` and returns it in a `finally` once `WorkerEnvelope.Parser.ParseFrom(payload, 0, length)` has copied what it needs; `ReadExactlyOrThrowAsync` gained an explicit `count` parameter so it honours the logical frame length rather than the (possibly larger) rented buffer length. The 4-byte length-prefix buffer is left as a per-call stack-sized allocation — pooling a 4-byte array is not worthwhile. Verified by the new regression test `WorkerFrameProtocolTests.ReadAsync_WithVaryingFrameSizes_ParsesEachFrameExactly`, which reads a large frame followed by a small frame through one reader to prove the pooled buffer is sliced to each frame's own length and never leaks stale trailing bytes; the existing round-trip, malformed-payload, and concurrent-write tests continue to pass.
|
||||
|
||||
### Worker-010
|
||||
|
||||
@@ -172,13 +172,13 @@
|
||||
| Severity | Low |
|
||||
| Category | Correctness & logic bugs |
|
||||
| Location | `src/MxGateway.Worker/Conversion/VariantConverter.cs:204-226` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `ConvertInt64Scalar` is reached for `TypeCode.UInt32` and `TypeCode.Int64`. For a `uint` with `expectedDataType == MxDataType.Time`, the value is treated as a Windows `FILETIME` via `DateTime.FromFileTimeUtc(longValue)`; a 32-bit FILETIME is never a valid full FILETIME, so this silently produces a near-epoch timestamp rather than a raw/diagnostic value. Unlikely in practice but a silent misconversion.
|
||||
|
||||
**Recommendation:** Only apply the `MxDataType.Time` FILETIME projection for 64-bit source types; for `uint` fall through to integer or raw.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** 2026-05-18 — `ConvertInt64Scalar`'s `MxDataType.Time` FILETIME projection is now gated on `value is long`. A genuine 64-bit `long` still projects to a `Timestamp` via `DateTime.FromFileTimeUtc`; a 32-bit `uint` — which can only hold the low half of a FILETIME — now falls through to the integer projection (`DataType = Integer`, `Int64Value`) instead of silently producing a bogus near-1601 timestamp. Verified by the regression test `VariantConverterTests.Convert_WithUInt32AndExpectedTime_DoesNotProjectFileTime`; the existing `Convert_WithFileTimeAndExpectedTime_ProjectsTimestamp` (a `long` FILETIME) continues to pass, confirming the 64-bit path is unchanged.
|
||||
|
||||
### Worker-011
|
||||
|
||||
@@ -187,13 +187,13 @@
|
||||
| Severity | Low |
|
||||
| Category | Correctness & logic bugs |
|
||||
| Location | `src/MxGateway.Worker/Ipc/WorkerPipeClient.cs:169-171` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `retryAttempts` is computed as `(connectTimeout / min(connectTimeout, attemptTimeout)) - 1`. With defaults (30000 / 2000) this yields 14 retries, but each retry also incurs Polly exponential backoff. The overall `connectDeadline` (`CancelAfter(connectTimeout)`) is the real bound, so the computed attempt count can be larger or smaller than the time budget allows, and the formula is opaque.
|
||||
|
||||
**Recommendation:** Drive retries purely off the `connectDeadline` token (Polly stops when cancelled) and drop the fragile attempt-count arithmetic, or add a comment explaining the intent.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** 2026-05-18 — The opaque `retryAttempts` arithmetic in `ConnectWithRetryAsync` was removed. `MaxRetryAttempts` is now `int.MaxValue`, so the retry loop is bounded solely by the `connectDeadline` linked token (`CancelAfter(_connectTimeoutMilliseconds)`): Polly stops retrying the moment that token is cancelled, making the overall connect timeout the single source of truth and correctly accounting for the exponential backoff between attempts (which the old formula ignored). A comment documents the intent. No new test was added — the change does not alter observable behavior (the deadline was always the real bound; the old formula always permitted more attempts than fit the budget), and the existing `WorkerPipeClientTests.RunAsync_RetriesUntilPipeServerAppears` (server appears mid-retry) and `RunAsync_WhenPipeNeverAppears_ThrowsTimeoutException` (deadline ends the loop) already cover both retry-until-success and deadline-bounded termination.
|
||||
|
||||
### Worker-012
|
||||
|
||||
@@ -202,13 +202,15 @@
|
||||
| Severity | Low |
|
||||
| Category | Documentation & comments |
|
||||
| Location | `src/MxGateway.Worker/MxAccess/MxAccessAlarmEventSink.cs:44-55`, `src/MxGateway.Worker/MxAccess/WnWrapAlarmConsumer.cs:38-43`, `src/MxGateway.Worker/MxAccess/MxAccessEventMapper.cs:106-112` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** Multiple comments describe the alarm path as not-yet-wired future work ("PR A.2 — COM-side subscription scaffold … the worker advertises no alarm subscription", "the worker bootstrap will gain a thin 'run-on-STA' wrapper as part of A.3"). As of commit 6c64030 the alarm command handler, STA poll loop, and `SubscribeAlarms`/`AcknowledgeAlarm`/`QueryActiveAlarms` are all wired. These comments are stale and misleading.
|
||||
|
||||
**Recommendation:** Update the XML docs/comments to describe the shipped behavior; remove the "future PR" framing.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Re-triage:** The `WnWrapAlarmConsumer.cs:38-43` citation is inaccurate — those lines were rewritten by Worker-001 and already describe the shipped no-internal-timer threading model correctly; nothing stale there. Conversely, two stale comments the finding did *not* cite were found on the same alarm path and fixed under the same root cause: `AlarmDispatcher.cs`'s `<remarks>` still framed the dispatcher as "the in-process slice of A.3" with a "companion follow-up PR" adding the (now-shipped) `SubscribeAlarmsCommand`/`AcknowledgeAlarmCommand`/`QueryActiveAlarmsCommand`, and stated the consumer "polls on a `System.Threading.Timer` thread today" — a claim made false by Worker-001's removal of that timer; and `AlarmCommandHandler.cs`'s `<remarks>` likewise asserted "the wnwrap consumer's polling timer fires on a thread-pool thread". The discovery document `docs/AlarmClientDiscovery.md` (referenced by the source comments) was deliberately left untouched: it is a historical research log of the investigation that chose the shipped design, not API/contract/lifecycle prose, and the source comments cite only its still-accurate "Option A — captured" payload schema.
|
||||
|
||||
**Resolution:** 2026-05-18 — Rewrote the stale alarm-path comments to describe shipped behavior with no "future PR / A.2 / A.3" framing. `MxAccessAlarmEventSink`: the class `<remarks>` and the `Attach` comment now explain that `AlarmDispatcher` owns the consumer→sink→queue wire-up and that `Attach` carries only the session id (no COM-event subscription is needed because the polled wnwrap consumer raises transition events itself). `MxAccessEventMapper.CreateOnAlarmTransition`'s XML summary now states the worker drives it from `MxAccessAlarmEventSink.EnqueueTransition` once `AlarmDispatcher` decodes a wnwrap transition. `AlarmDispatcher` and `AlarmCommandHandler` `<remarks>` were corrected to describe the shipped command surface and the no-internal-timer / STA-driven polling model (the `System.Threading.Timer` claims were factually wrong post-Worker-001). Pure documentation change — no behavior altered, no test needed; the build stays green.
|
||||
|
||||
### Worker-013
|
||||
|
||||
@@ -217,13 +219,15 @@
|
||||
| Severity | Low |
|
||||
| Category | Testing coverage |
|
||||
| Location | `src/MxGateway.Worker/Sta/StaMessagePump.cs` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `StaMessagePump` — the heart of COM event delivery (`MsgWaitForMultipleObjectsEx` + `PeekMessage`/`DispatchMessage`) — has no direct unit tests. `StaRuntimeTests` exercises it indirectly for command wake-up but never verifies that a posted Windows message actually wakes the wait and is dispatched, nor that `PumpPendingMessages` returns a correct count. The alarm poll-loop lifecycle in `MxAccessStaSession` (start/cancel/await on shutdown) also has no test. These are the most failure-sensitive paths in the module.
|
||||
|
||||
**Recommendation:** Add tests that post a message to the STA thread and assert it is pumped, and tests covering alarm poll-loop start/stop and shutdown ordering.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Re-triage:** This finding is stale as of the reviewed branch — the coverage it asks for already exists. `src/MxGateway.Worker.Tests/Sta/StaMessagePumpTests.cs` contains direct `StaMessagePump` tests covering null-argument validation, waking on a signalled event, returning on timeout, the zero-timeout conversion branch, `PumpPendingMessages` returning the correct count for messages posted to the STA thread (`PumpPendingMessages_MessagesPostedToStaThread_ReturnsCountProcessed`, `PumpPendingMessages_NoMessagesPosted_ReturnsZero`), and `WaitForWorkOrMessages` waking on a posted Windows message (`WaitForWorkOrMessages_WindowsMessagePosted_ReturnsForInputAvailable`) — exactly the "post a message and assert it is pumped" test the recommendation asks for. The alarm poll-loop lifecycle is covered by `MxAccessStaSessionTests.StartAsync_WithAlarmCommandHandlerFactory_PollOnceCalledViaSta` (start → poll runs on the STA) and `Dispose_StopsAlarmPollLoop` (Dispose joins the poll task; no further polls). The finding was raised against a stale view of the test project; no source or test change is required. Re-triaged as already resolved rather than fixed.
|
||||
|
||||
**Resolution:** 2026-05-18 — No code change. Re-triaged: the requested direct `StaMessagePump` tests (including posted-message dispatch and pump count) and the alarm poll-loop start/stop lifecycle tests already exist in `StaMessagePumpTests.cs` and `MxAccessStaSessionTests.cs`. See the re-triage note above for the specific test names.
|
||||
|
||||
### Worker-014
|
||||
|
||||
@@ -232,13 +236,13 @@
|
||||
| Severity | Low |
|
||||
| Category | Code organization & conventions |
|
||||
| Location | `src/MxGateway.Worker/MxAccess/AlarmCommandHandler.cs:33`, `:202` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** The file declares two public types — the `AlarmCommandHandler` class and the `IAlarmCommandHandler` interface. The C# style guide and the rest of the module follow one-public-type-per-file (e.g. interfaces in their own `I*.cs` files like `IMxAccessAlarmConsumer.cs`).
|
||||
|
||||
**Recommendation:** Move `IAlarmCommandHandler` to its own `IAlarmCommandHandler.cs` for consistency.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** 2026-05-18 — The `IAlarmCommandHandler` interface (with its XML docs) was moved verbatim out of `AlarmCommandHandler.cs` into a new `src/MxGateway.Worker/MxAccess/IAlarmCommandHandler.cs`, with its own `using` directives (`System`, `System.Collections.Generic`, `MxGateway.Contracts.Proto`). `AlarmCommandHandler.cs` now declares one public type, matching the module's one-public-type-per-file convention (cf. `IMxAccessAlarmConsumer.cs`). Pure file-organization change — no API surface, behavior, or namespace changed; no test needed. The worker build is clean with zero warnings (no unused usings left behind in `AlarmCommandHandler.cs`).
|
||||
|
||||
### Worker-015
|
||||
|
||||
@@ -247,10 +251,10 @@
|
||||
| Severity | Low |
|
||||
| Category | Correctness & logic bugs |
|
||||
| Location | `src/MxGateway.Worker/MxAccess/MxAccessEventQueue.cs:115-145` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** On overflow, `Enqueue` records the overflow fault and throws `MxAccessEventQueueOverflowException`; `MxAccessBaseEventSink.EnqueueEvent` catches it and calls `RecordFault` again. `RecordFault` is a no-op when a fault already exists, so the second call is harmless — but the intent is muddled, and there is no test asserting the dropped-event behavior. This is acceptable per the fail-fast design but undocumented at the call site.
|
||||
|
||||
**Recommendation:** Add a brief comment in `EnqueueEvent` clarifying that an overflow exception is expected and already self-records its fault, so the catch is intentionally a near no-op.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** 2026-05-18 — Added a comment in `MxAccessBaseEventSink.EnqueueEvent`'s catch block (per the finding's recommendation) explaining that two distinct fail-fast failures land there: a conversion failure from `createEvent()` (recorded here as an `MxaccessEventConversionFailed` fault) and an `MxAccessEventQueueOverflowException` from `Enqueue` at capacity, which — per the fail-fast backpressure design in `docs/DesignDecisions.md` — drops the event and has *already* self-recorded a `QueueOverflow` fault inside `Enqueue`. Because `MxAccessEventQueue.RecordFault` keeps only the first fault, the catch's `RecordFault` call is then a deliberate near no-op rather than a second, conflicting fault. Pure comment change as recommended — no behavior altered. `docs/DesignDecisions.md` already documents the fail-fast event backpressure rule, so no doc change was required.
|
||||
|
||||
+11
-18
@@ -107,29 +107,20 @@ The gateway keeps API key state in a dedicated SQLite database. SQLite is suffic
|
||||
|
||||
### Connection factory
|
||||
|
||||
`AuthSqliteConnectionFactory` reads `GatewayOptions.Authentication.SqlitePath`, ensures the parent directory exists, and opens the connection in `ReadWriteCreate` mode so first-run installations can create the file without manual provisioning:
|
||||
`AuthSqliteConnectionFactory` reads `GatewayOptions.Authentication.SqlitePath`, ensures the parent directory exists, and builds a connection string in `ReadWriteCreate` mode so first-run installations can create the file without manual provisioning. Connection pooling is enabled and the connection string carries a non-zero `DefaultTimeout`:
|
||||
|
||||
```csharp
|
||||
public SqliteConnection CreateConnection()
|
||||
SqliteConnectionStringBuilder builder = new()
|
||||
{
|
||||
string sqlitePath = options.Value.Authentication.SqlitePath;
|
||||
string? directory = Path.GetDirectoryName(sqlitePath);
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(directory))
|
||||
{
|
||||
Directory.CreateDirectory(directory);
|
||||
}
|
||||
|
||||
SqliteConnectionStringBuilder builder = new()
|
||||
{
|
||||
DataSource = sqlitePath,
|
||||
Mode = SqliteOpenMode.ReadWriteCreate
|
||||
};
|
||||
|
||||
return new SqliteConnection(builder.ToString());
|
||||
}
|
||||
DataSource = sqlitePath,
|
||||
Mode = SqliteOpenMode.ReadWriteCreate,
|
||||
Pooling = true,
|
||||
DefaultTimeout = (int)BusyTimeout.TotalSeconds,
|
||||
};
|
||||
```
|
||||
|
||||
Every store opens its connection through `OpenConnectionAsync`, which opens the connection and then applies `PRAGMA journal_mode=WAL` and `PRAGMA busy_timeout`. WAL is a persistent database-level setting so re-applying it per connection is a cheap no-op; `busy_timeout` is per-connection state. Because `MarkKeyUsedAsync` runs on every authenticated request and `SqliteApiKeyAuditStore` appends on every denial, this lets concurrent readers and writers retry briefly instead of surfacing `SQLITE_BUSY` as a hard failure on the request path.
|
||||
|
||||
### Schema
|
||||
|
||||
`SqliteAuthSchema` declares table names and the current schema version as constants. Three tables are involved:
|
||||
@@ -166,6 +157,8 @@ public static ApiKeyRecord Read(SqliteDataReader reader)
|
||||
|
||||
`SqliteApiKeyAdminStore` (`IApiKeyAdminStore`) implements administrative mutations: `CreateAsync` accepts an `ApiKeyCreateRequest`, `RevokeAsync` sets `revoked_utc` only when not already revoked, and `RotateAsync` replaces `secret_hash`, clears `last_used_utc`, and clears `revoked_utc` so a rotated key is immediately usable.
|
||||
|
||||
Because `RotateAsync` clears `revoked_utc`, rotating a previously revoked key reactivates it. The dashboard API Keys page therefore offers the Rotate (and Revoke) action only for keys whose status is `Active`; a revoked key shows no actions, so an operator cannot un-revoke a deliberately disabled key as a side effect of a rotation.
|
||||
|
||||
### Audit trail
|
||||
|
||||
`SqliteApiKeyAuditStore` (`IApiKeyAuditStore`) appends `ApiKeyAuditEntry` values to the `api_key_audit` table and stamps each row with a UTC timestamp inside the store rather than trusting the caller. `ListRecentAsync` returns the most recent rows ordered by `audit_id` descending and projects them into `ApiKeyAuditRecord`. Rows are kept even after the referenced key is revoked because the audit history is the durable record of administrative action; the `key_id` column is nullable to accommodate non-key-scoped events such as `init-db`.
|
||||
|
||||
@@ -165,19 +165,26 @@ else
|
||||
{
|
||||
<td>
|
||||
<div class="btn-group btn-group-sm" role="group" aria-label="API key actions">
|
||||
<button type="button" class="btn btn-outline-secondary"
|
||||
disabled="@IsBusy"
|
||||
@onclick="() => RotateApiKeyAsync(key.KeyId)">
|
||||
Rotate
|
||||
</button>
|
||||
@if (key.RevokedUtc is null)
|
||||
{
|
||||
@* Rotate clears revoked_utc, which would silently reactivate a
|
||||
deliberately revoked key. Only offer it for active keys so a
|
||||
revoked key is not un-revoked as a side effect of rotation. *@
|
||||
<button type="button" class="btn btn-outline-secondary"
|
||||
disabled="@IsBusy"
|
||||
@onclick="() => RotateApiKeyAsync(key.KeyId)">
|
||||
Rotate
|
||||
</button>
|
||||
<button type="button" class="btn btn-outline-danger"
|
||||
disabled="@IsBusy"
|
||||
@onclick="() => RevokeApiKeyAsync(key.KeyId)">
|
||||
Revoke
|
||||
</button>
|
||||
}
|
||||
else
|
||||
{
|
||||
<span class="text-muted small">No actions</span>
|
||||
}
|
||||
</div>
|
||||
</td>
|
||||
}
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
using System.Collections.Concurrent;
|
||||
using System.Text;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
@@ -5,6 +6,14 @@ namespace MxGateway.Server.Galaxy;
|
||||
|
||||
public static class GalaxyGlobMatcher
|
||||
{
|
||||
/// <summary>
|
||||
/// Compiled-regex cache keyed by glob pattern. <c>IsMatch</c> is called once per
|
||||
/// object per <c>DiscoverHierarchy</c>/<c>WatchDeployEvents</c> evaluation, so the
|
||||
/// same handful of glob patterns are translated repeatedly; caching avoids
|
||||
/// rebuilding and recompiling the regex on every call.
|
||||
/// </summary>
|
||||
private static readonly ConcurrentDictionary<string, Regex> RegexCache = new(StringComparer.Ordinal);
|
||||
|
||||
public static bool IsMatch(string value, string glob)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(glob))
|
||||
@@ -12,11 +21,15 @@ public static class GalaxyGlobMatcher
|
||||
return true;
|
||||
}
|
||||
|
||||
return Regex.IsMatch(
|
||||
value ?? string.Empty,
|
||||
BuildRegex(glob),
|
||||
RegexOptions.CultureInvariant | RegexOptions.IgnoreCase,
|
||||
TimeSpan.FromMilliseconds(100));
|
||||
return GetOrCreateRegex(glob).IsMatch(value ?? string.Empty);
|
||||
}
|
||||
|
||||
private static Regex GetOrCreateRegex(string glob)
|
||||
{
|
||||
return RegexCache.GetOrAdd(glob, static pattern => new Regex(
|
||||
BuildRegex(pattern),
|
||||
RegexOptions.CultureInvariant | RegexOptions.IgnoreCase | RegexOptions.Compiled,
|
||||
TimeSpan.FromMilliseconds(100)));
|
||||
}
|
||||
|
||||
private static string BuildRegex(string glob)
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
using System.Collections.Concurrent;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Security.Cryptography;
|
||||
using System.Text;
|
||||
using Grpc.Core;
|
||||
@@ -7,6 +9,18 @@ namespace MxGateway.Server.Galaxy;
|
||||
|
||||
public static class GalaxyHierarchyProjector
|
||||
{
|
||||
/// <summary>
|
||||
/// Per-cache-entry memo of filtered, ordered <see cref="GalaxyObjectView"/> lists
|
||||
/// keyed by filter signature. Without it, paging through a large hierarchy
|
||||
/// re-applies every filter and re-scans the full <see cref="GalaxyHierarchyIndex.ObjectViews"/>
|
||||
/// collection on every page — O(total) per page, O(total²/pageSize) end-to-end.
|
||||
/// With it, the first page builds the filtered list and each subsequent page is an
|
||||
/// O(pageSize) slice. The table is keyed on the immutable cache-entry instance, so
|
||||
/// when the cache publishes a new entry the stale memo becomes unreachable and is
|
||||
/// reclaimed with it — no explicit invalidation needed.
|
||||
/// </summary>
|
||||
private static readonly ConditionalWeakTable<GalaxyHierarchyCacheEntry, ConcurrentDictionary<string, IReadOnlyList<GalaxyObjectView>>> FilteredViewCache = new();
|
||||
|
||||
public static GalaxyHierarchyQueryResult Project(
|
||||
GalaxyHierarchyCacheEntry entry,
|
||||
DiscoverHierarchyRequest request,
|
||||
@@ -39,8 +53,6 @@ public static class GalaxyHierarchyProjector
|
||||
throw new ArgumentOutOfRangeException(nameof(pageSize), pageSize, "Page size must be greater than zero.");
|
||||
}
|
||||
|
||||
IReadOnlyList<GalaxyObjectView> views = entry.Index.ObjectViews;
|
||||
GalaxyObjectView? root = ResolveRoot(request, views);
|
||||
int? maxDepth = request.MaxDepth;
|
||||
if (maxDepth < 0)
|
||||
{
|
||||
@@ -49,30 +61,61 @@ public static class GalaxyHierarchyProjector
|
||||
"DiscoverHierarchy max_depth must be greater than or equal to zero when provided."));
|
||||
}
|
||||
|
||||
List<GalaxyObject> page = [];
|
||||
int matchedCount = 0;
|
||||
string filterSignature = ComputeFilterSignature(request, browseSubtreeGlobs);
|
||||
IReadOnlyList<GalaxyObjectView> matchedViews = GetFilteredViews(
|
||||
entry,
|
||||
request,
|
||||
browseSubtreeGlobs,
|
||||
maxDepth,
|
||||
filterSignature);
|
||||
|
||||
bool includeAttributes = IncludeAttributes(request);
|
||||
foreach (GalaxyObjectView view in views)
|
||||
List<GalaxyObject> page = new(Math.Min(pageSize, Math.Max(0, matchedViews.Count - offset)));
|
||||
int end = (int)Math.Min((long)offset + pageSize, matchedViews.Count);
|
||||
for (int index = offset; index < end; index++)
|
||||
{
|
||||
if (!MatchesRoot(view, root, maxDepth)
|
||||
|| !MatchesBrowseSubtrees(view, browseSubtreeGlobs)
|
||||
|| !MatchesFilters(view.Object, request))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (matchedCount >= offset && page.Count < pageSize)
|
||||
{
|
||||
page.Add(CloneObject(view.Object, includeAttributes));
|
||||
}
|
||||
|
||||
matchedCount++;
|
||||
page.Add(CloneObject(matchedViews[index].Object, includeAttributes));
|
||||
}
|
||||
|
||||
return new GalaxyHierarchyQueryResult(
|
||||
page,
|
||||
matchedCount,
|
||||
ComputeFilterSignature(request, browseSubtreeGlobs));
|
||||
matchedViews.Count,
|
||||
filterSignature);
|
||||
}
|
||||
|
||||
private static IReadOnlyList<GalaxyObjectView> GetFilteredViews(
|
||||
GalaxyHierarchyCacheEntry entry,
|
||||
DiscoverHierarchyRequest request,
|
||||
IReadOnlyList<string>? browseSubtreeGlobs,
|
||||
int? maxDepth,
|
||||
string filterSignature)
|
||||
{
|
||||
// ResolveRoot can throw RpcException(NotFound); run it before consulting the
|
||||
// memo so a bad root surfaces consistently regardless of cache state.
|
||||
IReadOnlyList<GalaxyObjectView> views = entry.Index.ObjectViews;
|
||||
GalaxyObjectView? root = ResolveRoot(request, views);
|
||||
|
||||
ConcurrentDictionary<string, IReadOnlyList<GalaxyObjectView>> memo =
|
||||
FilteredViewCache.GetValue(entry, static _ => new ConcurrentDictionary<string, IReadOnlyList<GalaxyObjectView>>(StringComparer.Ordinal));
|
||||
|
||||
return memo.GetOrAdd(
|
||||
filterSignature,
|
||||
static (_, state) =>
|
||||
{
|
||||
List<GalaxyObjectView> matched = [];
|
||||
foreach (GalaxyObjectView view in state.Views)
|
||||
{
|
||||
if (MatchesRoot(view, state.Root, state.MaxDepth)
|
||||
&& MatchesBrowseSubtrees(view, state.BrowseSubtreeGlobs)
|
||||
&& MatchesFilters(view.Object, state.Request))
|
||||
{
|
||||
matched.Add(view);
|
||||
}
|
||||
}
|
||||
|
||||
return matched;
|
||||
},
|
||||
(Views: views, Root: root, MaxDepth: maxDepth, BrowseSubtreeGlobs: browseSubtreeGlobs, Request: request));
|
||||
}
|
||||
|
||||
public static GalaxyObject? FindObjectForTag(
|
||||
|
||||
@@ -115,6 +115,11 @@ public sealed class GalaxyRepositoryGrpcService(
|
||||
{
|
||||
DateTimeOffset? lastSeen = request.LastSeenDeployTime?.ToDateTimeOffset();
|
||||
|
||||
// The caller's identity (and therefore its browse-subtree constraints) is fixed
|
||||
// for the lifetime of the stream, so resolve the subtrees once rather than per
|
||||
// streamed event.
|
||||
IReadOnlyList<string> browseSubtrees = ResolveBrowseSubtrees();
|
||||
|
||||
await foreach (GalaxyDb.GalaxyDeployEventInfo info in notifier
|
||||
.SubscribeAsync(context.CancellationToken)
|
||||
.ConfigureAwait(false))
|
||||
@@ -129,7 +134,7 @@ public sealed class GalaxyRepositoryGrpcService(
|
||||
}
|
||||
lastSeen = null;
|
||||
|
||||
await responseStream.WriteAsync(MapDeployEvent(info, ResolveBrowseSubtrees()), context.CancellationToken).ConfigureAwait(false);
|
||||
await responseStream.WriteAsync(MapDeployEvent(info, browseSubtrees), context.CancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -161,13 +161,14 @@ public sealed class MxAccessGatewayService(
|
||||
|
||||
/// <inheritdoc />
|
||||
/// <remarks>
|
||||
/// PR A.3 — surfaces the public AcknowledgeAlarm RPC. The gateway resolves the
|
||||
/// session and returns a successful reply; the actual worker-side ack call ships
|
||||
/// in <c>PR A.2</c> which adds the MxAccess alarm subscription + worker command
|
||||
/// handler. Clients calling this method today receive an OK reply with a
|
||||
/// "worker alarm path not yet wired" diagnostic — no PERMISSION_DENIED, no
|
||||
/// UNIMPLEMENTED, so the .NET / Python / Go / Java / Rust SDK call sites land
|
||||
/// on a stable surface.
|
||||
/// Surfaces the public AcknowledgeAlarm RPC. The gateway validates the request,
|
||||
/// resolves the session, and delegates to the registered
|
||||
/// <see cref="IAlarmRpcDispatcher"/>. DI binds the production
|
||||
/// <see cref="MxGateway.Server.Sessions.WorkerAlarmRpcDispatcher"/>, which routes
|
||||
/// the ack through the worker pipe IPC: an <c>alarm_full_reference</c> that parses
|
||||
/// as a canonical GUID forwards to <c>AcknowledgeAlarmCommand</c>; a
|
||||
/// <c>Provider!Group.Tag</c> reference forwards to <c>AcknowledgeAlarmByNameCommand</c>;
|
||||
/// anything else returns an <c>InvalidRequest</c> diagnostic.
|
||||
/// </remarks>
|
||||
public override async Task<AcknowledgeAlarmReply> AcknowledgeAlarm(
|
||||
AcknowledgeAlarmRequest request,
|
||||
@@ -189,11 +190,11 @@ public sealed class MxAccessGatewayService(
|
||||
// gRPC NotFound by the caller's MapException.
|
||||
_ = ResolveSession(request.SessionId);
|
||||
|
||||
// PR A.6 — delegate to the alarm dispatcher. NotWiredAlarmRpcDispatcher
|
||||
// (default) returns OK + a worker-pending diagnostic. Production
|
||||
// WorkerAlarmRpcDispatcher (dev-rig follow-up) routes through the
|
||||
// worker IPC to AlarmClient.AlarmAckByGUID with full operator-identity
|
||||
// fidelity.
|
||||
// Delegate to the registered alarm dispatcher. DI binds the production
|
||||
// WorkerAlarmRpcDispatcher, which routes the ack over the worker IPC by
|
||||
// GUID (AcknowledgeAlarmCommand) or by Provider!Group.Tag reference
|
||||
// (AcknowledgeAlarmByNameCommand). NotWiredAlarmRpcDispatcher is only the
|
||||
// null fallback used when no dispatcher is registered.
|
||||
return await alarmRpcDispatcher.AcknowledgeAsync(request, context.CancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
}
|
||||
@@ -205,12 +206,12 @@ public sealed class MxAccessGatewayService(
|
||||
|
||||
/// <inheritdoc />
|
||||
/// <remarks>
|
||||
/// PR A.3 — surfaces the public QueryActiveAlarms RPC as an empty stream until
|
||||
/// PR A.2 adds the worker-side QueryActiveAlarmsCommand that walks the
|
||||
/// MxAccess active-alarm collection. Clients can call the RPC and iterate the
|
||||
/// stream; today the stream completes immediately. Once A.2 ships, this
|
||||
/// handler will translate the request into a WorkerCommand and stream the
|
||||
/// resulting snapshots.
|
||||
/// Surfaces the public QueryActiveAlarms RPC. The gateway validates the request,
|
||||
/// resolves the session, and delegates to the registered
|
||||
/// <see cref="IAlarmRpcDispatcher"/>. DI binds the production
|
||||
/// <see cref="MxGateway.Server.Sessions.WorkerAlarmRpcDispatcher"/>, which issues a
|
||||
/// <c>QueryActiveAlarmsCommand</c> over the worker pipe IPC and streams each
|
||||
/// <c>ActiveAlarmSnapshot</c> from the worker reply.
|
||||
/// </remarks>
|
||||
public override async Task QueryActiveAlarms(
|
||||
QueryActiveAlarmsRequest request,
|
||||
@@ -226,11 +227,11 @@ public sealed class MxAccessGatewayService(
|
||||
}
|
||||
_ = ResolveSession(request.SessionId);
|
||||
|
||||
// PR A.7 — delegate to the alarm dispatcher. NotWiredAlarmRpcDispatcher
|
||||
// (default) yields an empty stream. Production WorkerAlarmRpcDispatcher
|
||||
// (dev-rig follow-up) walks the worker's IMxAccessAlarmConsumer
|
||||
// SnapshotActiveAlarms output and translates each AlarmRecord into an
|
||||
// ActiveAlarmSnapshot.
|
||||
// Delegate to the registered alarm dispatcher. DI binds the production
|
||||
// WorkerAlarmRpcDispatcher, which issues a QueryActiveAlarmsCommand over the
|
||||
// worker IPC and streams each ActiveAlarmSnapshot from the worker reply.
|
||||
// NotWiredAlarmRpcDispatcher is only the null fallback used when no
|
||||
// dispatcher is registered.
|
||||
await foreach (ActiveAlarmSnapshot snapshot in alarmRpcDispatcher
|
||||
.QueryActiveAlarmsAsync(request, context.CancellationToken)
|
||||
.WithCancellation(context.CancellationToken)
|
||||
|
||||
@@ -10,7 +10,17 @@ namespace MxGateway.Server.Security.Authentication;
|
||||
public sealed class AuthSqliteConnectionFactory(IOptions<GatewayOptions> options)
|
||||
{
|
||||
/// <summary>
|
||||
/// Creates and configures a SQLite connection to the auth database.
|
||||
/// Busy timeout applied to every auth-store connection. SQLite retries a busy
|
||||
/// database for this long before surfacing <c>SQLITE_BUSY</c>, so the concurrent
|
||||
/// <c>MarkKeyUsedAsync</c> / audit-append writers degrade gracefully under load
|
||||
/// instead of failing the request path.
|
||||
/// </summary>
|
||||
private static readonly TimeSpan BusyTimeout = TimeSpan.FromSeconds(5);
|
||||
|
||||
/// <summary>
|
||||
/// Creates an unopened SQLite connection to the auth database. Prefer
|
||||
/// <see cref="OpenConnectionAsync"/>, which also applies WAL journaling and the
|
||||
/// busy timeout.
|
||||
/// </summary>
|
||||
public SqliteConnection CreateConnection()
|
||||
{
|
||||
@@ -25,9 +35,44 @@ public sealed class AuthSqliteConnectionFactory(IOptions<GatewayOptions> options
|
||||
SqliteConnectionStringBuilder builder = new()
|
||||
{
|
||||
DataSource = sqlitePath,
|
||||
Mode = SqliteOpenMode.ReadWriteCreate
|
||||
Mode = SqliteOpenMode.ReadWriteCreate,
|
||||
Pooling = true,
|
||||
DefaultTimeout = (int)BusyTimeout.TotalSeconds,
|
||||
};
|
||||
|
||||
return new SqliteConnection(builder.ToString());
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a SQLite connection, opens it, and configures WAL journaling and a
|
||||
/// non-zero busy timeout so concurrent readers and writers degrade gracefully
|
||||
/// rather than surfacing <c>SQLITE_BUSY</c> as a hard failure.
|
||||
/// </summary>
|
||||
public async Task<SqliteConnection> OpenConnectionAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
SqliteConnection connection = CreateConnection();
|
||||
try
|
||||
{
|
||||
await connection.OpenAsync(cancellationToken).ConfigureAwait(false);
|
||||
await ConfigureConnectionAsync(connection, cancellationToken).ConfigureAwait(false);
|
||||
return connection;
|
||||
}
|
||||
catch
|
||||
{
|
||||
await connection.DisposeAsync().ConfigureAwait(false);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
private static async Task ConfigureConnectionAsync(
|
||||
SqliteConnection connection,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
// WAL is a persistent, database-level setting; re-applying it per connection
|
||||
// is cheap and a no-op once set. busy_timeout is per-connection state.
|
||||
await using SqliteCommand command = connection.CreateCommand();
|
||||
command.CommandText =
|
||||
$"PRAGMA journal_mode=WAL; PRAGMA busy_timeout={(int)BusyTimeout.TotalMilliseconds};";
|
||||
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -10,8 +10,7 @@ public sealed class SqliteApiKeyAdminStore(AuthSqliteConnectionFactory connectio
|
||||
/// <inheritdoc />
|
||||
public async Task CreateAsync(ApiKeyCreateRequest request, CancellationToken cancellationToken)
|
||||
{
|
||||
await using SqliteConnection connection = connectionFactory.CreateConnection();
|
||||
await connection.OpenAsync(cancellationToken).ConfigureAwait(false);
|
||||
await using SqliteConnection connection = await connectionFactory.OpenConnectionAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
await using SqliteCommand command = connection.CreateCommand();
|
||||
command.CommandText = """
|
||||
@@ -44,8 +43,7 @@ public sealed class SqliteApiKeyAdminStore(AuthSqliteConnectionFactory connectio
|
||||
/// <inheritdoc />
|
||||
public async Task<IReadOnlyList<ApiKeyRecord>> ListAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
await using SqliteConnection connection = connectionFactory.CreateConnection();
|
||||
await connection.OpenAsync(cancellationToken).ConfigureAwait(false);
|
||||
await using SqliteConnection connection = await connectionFactory.OpenConnectionAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
await using SqliteCommand command = connection.CreateCommand();
|
||||
command.CommandText = """
|
||||
@@ -70,8 +68,7 @@ public sealed class SqliteApiKeyAdminStore(AuthSqliteConnectionFactory connectio
|
||||
/// <inheritdoc />
|
||||
public async Task<bool> RevokeAsync(string keyId, DateTimeOffset revokedUtc, CancellationToken cancellationToken)
|
||||
{
|
||||
await using SqliteConnection connection = connectionFactory.CreateConnection();
|
||||
await connection.OpenAsync(cancellationToken).ConfigureAwait(false);
|
||||
await using SqliteConnection connection = await connectionFactory.OpenConnectionAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
await using SqliteCommand command = connection.CreateCommand();
|
||||
command.CommandText = """
|
||||
@@ -94,8 +91,7 @@ public sealed class SqliteApiKeyAdminStore(AuthSqliteConnectionFactory connectio
|
||||
DateTimeOffset rotatedUtc,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
await using SqliteConnection connection = connectionFactory.CreateConnection();
|
||||
await connection.OpenAsync(cancellationToken).ConfigureAwait(false);
|
||||
await using SqliteConnection connection = await connectionFactory.OpenConnectionAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
await using SqliteCommand command = connection.CreateCommand();
|
||||
command.CommandText = """
|
||||
|
||||
@@ -7,8 +7,7 @@ public sealed class SqliteApiKeyAuditStore(AuthSqliteConnectionFactory connectio
|
||||
/// <inheritdoc />
|
||||
public async Task AppendAsync(ApiKeyAuditEntry entry, CancellationToken cancellationToken)
|
||||
{
|
||||
await using SqliteConnection connection = connectionFactory.CreateConnection();
|
||||
await connection.OpenAsync(cancellationToken).ConfigureAwait(false);
|
||||
await using SqliteConnection connection = await connectionFactory.OpenConnectionAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
await using SqliteCommand command = connection.CreateCommand();
|
||||
command.CommandText = """
|
||||
@@ -32,8 +31,7 @@ public sealed class SqliteApiKeyAuditStore(AuthSqliteConnectionFactory connectio
|
||||
return [];
|
||||
}
|
||||
|
||||
await using SqliteConnection connection = connectionFactory.CreateConnection();
|
||||
await connection.OpenAsync(cancellationToken).ConfigureAwait(false);
|
||||
await using SqliteConnection connection = await connectionFactory.OpenConnectionAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
await using SqliteCommand command = connection.CreateCommand();
|
||||
command.CommandText = """
|
||||
|
||||
@@ -20,8 +20,7 @@ public sealed class SqliteApiKeyStore(AuthSqliteConnectionFactory connectionFact
|
||||
/// <inheritdoc />
|
||||
public async Task MarkKeyUsedAsync(string keyId, DateTimeOffset usedUtc, CancellationToken cancellationToken)
|
||||
{
|
||||
await using SqliteConnection connection = connectionFactory.CreateConnection();
|
||||
await connection.OpenAsync(cancellationToken).ConfigureAwait(false);
|
||||
await using SqliteConnection connection = await connectionFactory.OpenConnectionAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
await using SqliteCommand command = connection.CreateCommand();
|
||||
command.CommandText = """
|
||||
@@ -40,8 +39,7 @@ public sealed class SqliteApiKeyStore(AuthSqliteConnectionFactory connectionFact
|
||||
bool requireActive,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
await using SqliteConnection connection = connectionFactory.CreateConnection();
|
||||
await connection.OpenAsync(cancellationToken).ConfigureAwait(false);
|
||||
await using SqliteConnection connection = await connectionFactory.OpenConnectionAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
await using SqliteCommand command = connection.CreateCommand();
|
||||
command.CommandText = requireActive
|
||||
|
||||
@@ -8,8 +8,7 @@ public sealed class SqliteAuthStoreMigrator(AuthSqliteConnectionFactory connecti
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
public async Task MigrateAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
await using SqliteConnection connection = connectionFactory.CreateConnection();
|
||||
await connection.OpenAsync(cancellationToken).ConfigureAwait(false);
|
||||
await using SqliteConnection connection = await connectionFactory.OpenConnectionAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
await using SqliteTransaction transaction =
|
||||
(SqliteTransaction)await connection.BeginTransactionAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
@@ -1,7 +1,4 @@
|
||||
using System.Collections.Generic;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Google.Protobuf.WellKnownTypes;
|
||||
using MxGateway.Contracts.Proto;
|
||||
using MxGateway.Server.Grpc;
|
||||
@@ -11,39 +8,33 @@ namespace MxGateway.Server.Sessions;
|
||||
/// <summary>
|
||||
/// Production <see cref="IAlarmRpcDispatcher"/> that routes the public
|
||||
/// <c>AcknowledgeAlarm</c> + <c>QueryActiveAlarms</c> RPCs through the
|
||||
/// worker pipe IPC. Replaces <see cref="NotWiredAlarmRpcDispatcher"/>
|
||||
/// once the worker AlarmCommandHandler is wired in.
|
||||
/// worker pipe IPC. DI binds this dispatcher; <see cref="NotWiredAlarmRpcDispatcher"/>
|
||||
/// is only the null fallback used when no dispatcher is registered.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// <c>QueryActiveAlarms</c> is fully wired: issues a
|
||||
/// <c>QueryActiveAlarms</c> issues a
|
||||
/// <see cref="QueryActiveAlarmsCommand"/> over the pipe and yields
|
||||
/// each <see cref="ActiveAlarmSnapshot"/> from the
|
||||
/// <see cref="QueryActiveAlarmsReplyPayload"/>.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// <c>AcknowledgeAlarm</c> is partially wired: the public RPC's
|
||||
/// <see cref="AcknowledgeAlarmRequest.AlarmFullReference"/> is a
|
||||
/// <c>Provider!Group.Tag</c> string, but the worker's wnwrap consumer
|
||||
/// acks by GUID. When the supplied reference parses as a GUID
|
||||
/// directly, the dispatcher forwards it as-is. Otherwise it
|
||||
/// returns an <c>Unimplemented</c> diagnostic. Resolving
|
||||
/// reference→GUID requires an additional worker IPC command
|
||||
/// (e.g. <c>AlarmAckByName</c> wrapping
|
||||
/// <c>wwAlarmConsumerClass.AlarmAckByName</c>) and is tracked as
|
||||
/// a follow-up.
|
||||
/// <c>AcknowledgeAlarm</c> accepts either form of
|
||||
/// <see cref="AcknowledgeAlarmRequest.AlarmFullReference"/>: a canonical
|
||||
/// GUID forwards as an <see cref="AcknowledgeAlarmCommand"/>; a
|
||||
/// <c>Provider!Group.Tag</c> reference is parsed by
|
||||
/// <see cref="TryParseAlarmReference"/> and forwarded as an
|
||||
/// <see cref="AcknowledgeAlarmByNameCommand"/>. Any other reference
|
||||
/// returns an <c>InvalidRequest</c> diagnostic.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public sealed class WorkerAlarmRpcDispatcher : IAlarmRpcDispatcher
|
||||
public sealed class WorkerAlarmRpcDispatcher(
|
||||
ISessionRegistry sessionRegistry,
|
||||
TimeProvider? timeProvider = null) : IAlarmRpcDispatcher
|
||||
{
|
||||
private readonly ISessionRegistry sessionRegistry;
|
||||
private readonly TimeProvider timeProvider;
|
||||
|
||||
public WorkerAlarmRpcDispatcher(ISessionRegistry sessionRegistry, TimeProvider? timeProvider = null)
|
||||
{
|
||||
this.sessionRegistry = sessionRegistry ?? throw new System.ArgumentNullException(nameof(sessionRegistry));
|
||||
this.timeProvider = timeProvider ?? TimeProvider.System;
|
||||
}
|
||||
private readonly ISessionRegistry sessionRegistry = sessionRegistry
|
||||
?? throw new ArgumentNullException(nameof(sessionRegistry));
|
||||
private readonly TimeProvider timeProvider = timeProvider ?? TimeProvider.System;
|
||||
|
||||
/// <summary>
|
||||
/// Parse a full alarm reference of the form <c>Provider!Group.Tag</c>
|
||||
@@ -83,7 +74,7 @@ public sealed class WorkerAlarmRpcDispatcher : IAlarmRpcDispatcher
|
||||
AcknowledgeAlarmRequest request,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
if (request is null) throw new System.ArgumentNullException(nameof(request));
|
||||
ArgumentNullException.ThrowIfNull(request);
|
||||
|
||||
if (!sessionRegistry.TryGet(request.SessionId, out GatewaySession session))
|
||||
{
|
||||
@@ -98,7 +89,7 @@ public sealed class WorkerAlarmRpcDispatcher : IAlarmRpcDispatcher
|
||||
}
|
||||
|
||||
WorkerCommand workerCommand;
|
||||
if (System.Guid.TryParse(request.AlarmFullReference, out System.Guid guid))
|
||||
if (Guid.TryParse(request.AlarmFullReference, out Guid guid))
|
||||
{
|
||||
workerCommand = new WorkerCommand
|
||||
{
|
||||
@@ -193,7 +184,7 @@ public sealed class WorkerAlarmRpcDispatcher : IAlarmRpcDispatcher
|
||||
QueryActiveAlarmsRequest request,
|
||||
[EnumeratorCancellation] CancellationToken cancellationToken)
|
||||
{
|
||||
if (request is null) throw new System.ArgumentNullException(nameof(request));
|
||||
ArgumentNullException.ThrowIfNull(request);
|
||||
|
||||
if (!sessionRegistry.TryGet(request.SessionId, out GatewaySession session))
|
||||
{
|
||||
|
||||
@@ -88,6 +88,27 @@ public sealed class GalaxyFilterInputSafetyTests
|
||||
Assert.True(GalaxyGlobMatcher.IsMatch("Pump_001", "Pump_00?"));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Regression guard for finding Server-008: <see cref="GalaxyGlobMatcher"/> caches
|
||||
/// the compiled regex per glob pattern. Repeated calls with the same pattern, and
|
||||
/// interleaved calls with different patterns, must keep returning the correct
|
||||
/// literal-vs-wildcard result rather than a stale cached match.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public void GlobMatcher_RepeatedAndInterleavedPatterns_StayCorrect()
|
||||
{
|
||||
for (int i = 0; i < 5; i++)
|
||||
{
|
||||
Assert.True(GalaxyGlobMatcher.IsMatch("Pump_001", "Pump_*"));
|
||||
Assert.False(GalaxyGlobMatcher.IsMatch("Valve_001", "Pump_*"));
|
||||
Assert.True(GalaxyGlobMatcher.IsMatch("Valve_001", "Valve_00?"));
|
||||
Assert.False(GalaxyGlobMatcher.IsMatch("Pump_001", "Valve_00?"));
|
||||
// A glob equal to a SQL metacharacter still matches only its literal.
|
||||
Assert.True(GalaxyGlobMatcher.IsMatch("%", "%"));
|
||||
Assert.False(GalaxyGlobMatcher.IsMatch("anything", "%"));
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verifies a pathological glob does not cause catastrophic regex backtracking —
|
||||
/// <see cref="GalaxyGlobMatcher"/> escapes every literal character and applies a
|
||||
|
||||
@@ -0,0 +1,136 @@
|
||||
using MxGateway.Contracts.Proto.Galaxy;
|
||||
using MxGateway.Server.Dashboard;
|
||||
using MxGateway.Server.Galaxy;
|
||||
|
||||
namespace MxGateway.Tests.Galaxy;
|
||||
|
||||
/// <summary>
|
||||
/// Direct coverage for <see cref="GalaxyHierarchyProjector"/> paging.
|
||||
/// <para>
|
||||
/// Regression guard for finding Server-007: the projector memoizes the filtered,
|
||||
/// ordered view list per <c>(cache entry, filter signature)</c> so paging is an
|
||||
/// O(pageSize) slice rather than an O(total) re-scan per page. These tests confirm
|
||||
/// the memo does not change paging results, does not bleed between distinct filter
|
||||
/// signatures, and is scoped to a single cache-entry instance.
|
||||
/// </para>
|
||||
/// </summary>
|
||||
public sealed class GalaxyHierarchyProjectorTests
|
||||
{
|
||||
[Fact]
|
||||
public void Project_PagedAcrossEntireHierarchy_ReturnsEveryObjectExactlyOnce()
|
||||
{
|
||||
GalaxyHierarchyCacheEntry entry = CreateEntry(CreateObjects(25));
|
||||
|
||||
List<string> collected = [];
|
||||
int totalReported = -1;
|
||||
for (int offset = 0; offset < 25; offset += 4)
|
||||
{
|
||||
GalaxyHierarchyQueryResult result = GalaxyHierarchyProjector.Project(
|
||||
entry,
|
||||
new DiscoverHierarchyRequest(),
|
||||
browseSubtreeGlobs: null,
|
||||
offset,
|
||||
pageSize: 4);
|
||||
|
||||
totalReported = result.TotalObjectCount;
|
||||
collected.AddRange(result.Objects.Select(obj => obj.TagName));
|
||||
}
|
||||
|
||||
Assert.Equal(25, totalReported);
|
||||
Assert.Equal(25, collected.Count);
|
||||
Assert.Equal(collected.Count, collected.Distinct(StringComparer.Ordinal).Count());
|
||||
Assert.Equal("Object_001", collected[0]);
|
||||
Assert.Equal("Object_025", collected[^1]);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Project_DistinctFiltersOnSameEntry_DoNotShareMemoizedViewList()
|
||||
{
|
||||
GalaxyHierarchyCacheEntry entry = CreateEntry(CreateObjects(10));
|
||||
|
||||
GalaxyHierarchyQueryResult globbed = GalaxyHierarchyProjector.Project(
|
||||
entry,
|
||||
new DiscoverHierarchyRequest { TagNameGlob = "Object_00?" });
|
||||
GalaxyHierarchyQueryResult unfiltered = GalaxyHierarchyProjector.Project(
|
||||
entry,
|
||||
new DiscoverHierarchyRequest());
|
||||
|
||||
// Distinct filter signatures must each get their own filtered list.
|
||||
Assert.Equal(9, globbed.TotalObjectCount);
|
||||
Assert.Equal(10, unfiltered.TotalObjectCount);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Project_SameFilterRepeated_ReturnsIdenticalTotals()
|
||||
{
|
||||
GalaxyHierarchyCacheEntry entry = CreateEntry(CreateObjects(12));
|
||||
|
||||
GalaxyHierarchyQueryResult first = GalaxyHierarchyProjector.Project(
|
||||
entry,
|
||||
new DiscoverHierarchyRequest(),
|
||||
browseSubtreeGlobs: null,
|
||||
offset: 0,
|
||||
pageSize: 5);
|
||||
GalaxyHierarchyQueryResult second = GalaxyHierarchyProjector.Project(
|
||||
entry,
|
||||
new DiscoverHierarchyRequest(),
|
||||
browseSubtreeGlobs: null,
|
||||
offset: 5,
|
||||
pageSize: 5);
|
||||
|
||||
Assert.Equal(first.TotalObjectCount, second.TotalObjectCount);
|
||||
Assert.Equal(first.FilterSignature, second.FilterSignature);
|
||||
Assert.Equal(5, first.Objects.Count);
|
||||
Assert.Equal(5, second.Objects.Count);
|
||||
Assert.NotEqual(first.Objects[0].TagName, second.Objects[0].TagName);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Project_DistinctCacheEntries_ProjectAgainstTheirOwnData()
|
||||
{
|
||||
GalaxyHierarchyCacheEntry small = CreateEntry(CreateObjects(3));
|
||||
GalaxyHierarchyCacheEntry large = CreateEntry(CreateObjects(40));
|
||||
|
||||
GalaxyHierarchyQueryResult smallResult = GalaxyHierarchyProjector.Project(
|
||||
small,
|
||||
new DiscoverHierarchyRequest());
|
||||
GalaxyHierarchyQueryResult largeResult = GalaxyHierarchyProjector.Project(
|
||||
large,
|
||||
new DiscoverHierarchyRequest());
|
||||
|
||||
// Each entry instance keys its own memo; the second projection must not reuse the
|
||||
// first entry's filtered view list.
|
||||
Assert.Equal(3, smallResult.TotalObjectCount);
|
||||
Assert.Equal(40, largeResult.TotalObjectCount);
|
||||
}
|
||||
|
||||
private static GalaxyHierarchyCacheEntry CreateEntry(IReadOnlyList<GalaxyObject> objects)
|
||||
{
|
||||
return GalaxyHierarchyCacheEntry.Empty with
|
||||
{
|
||||
Status = GalaxyCacheStatus.Healthy,
|
||||
Sequence = 1,
|
||||
LastSuccessAt = DateTimeOffset.UtcNow,
|
||||
Objects = objects,
|
||||
Index = GalaxyHierarchyIndex.Build(objects),
|
||||
DashboardSummary = DashboardGalaxySummary.Unknown with
|
||||
{
|
||||
Status = DashboardGalaxyStatus.Healthy,
|
||||
ObjectCount = objects.Count,
|
||||
},
|
||||
ObjectCount = objects.Count,
|
||||
};
|
||||
}
|
||||
|
||||
private static IReadOnlyList<GalaxyObject> CreateObjects(int count)
|
||||
{
|
||||
return Enumerable.Range(1, count)
|
||||
.Select(index => new GalaxyObject
|
||||
{
|
||||
GobjectId = index,
|
||||
TagName = $"Object_{index:000}",
|
||||
BrowseName = $"Object_{index:000}",
|
||||
})
|
||||
.ToArray();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,141 @@
|
||||
using System.Buffers.Binary;
|
||||
using MxGateway.Server.Configuration;
|
||||
using MxGateway.Server.Workers;
|
||||
|
||||
namespace MxGateway.Tests.Gateway.Workers;
|
||||
|
||||
/// <summary>
|
||||
/// Coverage for <see cref="WorkerExecutableValidator"/> PE-header architecture parsing
|
||||
/// (finding Server-013). The validator reads the DOS <c>MZ</c> stub, follows the PE
|
||||
/// header offset at <c>0x3c</c>, checks the <c>PE\0\0</c> signature, and compares the
|
||||
/// machine field against the required <see cref="WorkerArchitecture"/>.
|
||||
/// </summary>
|
||||
public sealed class WorkerExecutableValidatorTests : IDisposable
|
||||
{
|
||||
private const ushort ImageFileMachineI386 = 0x014c;
|
||||
private const ushort ImageFileMachineAmd64 = 0x8664;
|
||||
|
||||
private readonly List<string> _tempFiles = [];
|
||||
|
||||
[Fact]
|
||||
public void Validate_X86ExecutableMatchingRequiredArchitecture_DoesNotThrow()
|
||||
{
|
||||
string path = WritePeFile(ImageFileMachineI386);
|
||||
|
||||
WorkerExecutableValidator.Validate(path, WorkerArchitecture.X86);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Validate_X64ExecutableMatchingRequiredArchitecture_DoesNotThrow()
|
||||
{
|
||||
string path = WritePeFile(ImageFileMachineAmd64);
|
||||
|
||||
WorkerExecutableValidator.Validate(path, WorkerArchitecture.X64);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Validate_X64ExecutableWhenX86Required_ThrowsInvalidExecutable()
|
||||
{
|
||||
string path = WritePeFile(ImageFileMachineAmd64);
|
||||
|
||||
WorkerProcessLaunchException exception = Assert.Throws<WorkerProcessLaunchException>(
|
||||
() => WorkerExecutableValidator.Validate(path, WorkerArchitecture.X86));
|
||||
|
||||
Assert.Equal(WorkerProcessLaunchErrorCode.InvalidExecutable, exception.ErrorCode);
|
||||
Assert.Contains("architecture", exception.Message, StringComparison.OrdinalIgnoreCase);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Validate_X86ExecutableWhenX64Required_ThrowsInvalidExecutable()
|
||||
{
|
||||
string path = WritePeFile(ImageFileMachineI386);
|
||||
|
||||
WorkerProcessLaunchException exception = Assert.Throws<WorkerProcessLaunchException>(
|
||||
() => WorkerExecutableValidator.Validate(path, WorkerArchitecture.X64));
|
||||
|
||||
Assert.Equal(WorkerProcessLaunchErrorCode.InvalidExecutable, exception.ErrorCode);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Validate_FileWithoutMzHeader_ThrowsInvalidExecutable()
|
||||
{
|
||||
byte[] bytes = new byte[0x80];
|
||||
// Leave the first two bytes as zero so the MZ signature check fails.
|
||||
string path = WriteTempFile(bytes);
|
||||
|
||||
WorkerProcessLaunchException exception = Assert.Throws<WorkerProcessLaunchException>(
|
||||
() => WorkerExecutableValidator.Validate(path, WorkerArchitecture.X86));
|
||||
|
||||
Assert.Equal(WorkerProcessLaunchErrorCode.InvalidExecutable, exception.ErrorCode);
|
||||
Assert.Contains("MZ", exception.Message, StringComparison.Ordinal);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Validate_FileTooSmallForPeHeader_ThrowsInvalidExecutable()
|
||||
{
|
||||
string path = WriteTempFile([(byte)'M', (byte)'Z']);
|
||||
|
||||
WorkerProcessLaunchException exception = Assert.Throws<WorkerProcessLaunchException>(
|
||||
() => WorkerExecutableValidator.Validate(path, WorkerArchitecture.X86));
|
||||
|
||||
Assert.Equal(WorkerProcessLaunchErrorCode.InvalidExecutable, exception.ErrorCode);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Validate_FileWithoutPeSignature_ThrowsInvalidExecutable()
|
||||
{
|
||||
// Build a valid MZ header pointing at a PE offset that holds a wrong signature.
|
||||
byte[] bytes = new byte[0x100];
|
||||
bytes[0] = (byte)'M';
|
||||
bytes[1] = (byte)'Z';
|
||||
BinaryPrimitives.WriteInt32LittleEndian(bytes.AsSpan(0x3c, sizeof(int)), 0x80);
|
||||
// PE region left as zeros — the "PE\0\0" signature check fails.
|
||||
string path = WriteTempFile(bytes);
|
||||
|
||||
WorkerProcessLaunchException exception = Assert.Throws<WorkerProcessLaunchException>(
|
||||
() => WorkerExecutableValidator.Validate(path, WorkerArchitecture.X86));
|
||||
|
||||
Assert.Equal(WorkerProcessLaunchErrorCode.InvalidExecutable, exception.ErrorCode);
|
||||
Assert.Contains("PE", exception.Message, StringComparison.Ordinal);
|
||||
}
|
||||
|
||||
private string WritePeFile(ushort machine)
|
||||
{
|
||||
const int peHeaderOffset = 0x80;
|
||||
byte[] bytes = new byte[peHeaderOffset + 6];
|
||||
bytes[0] = (byte)'M';
|
||||
bytes[1] = (byte)'Z';
|
||||
BinaryPrimitives.WriteInt32LittleEndian(bytes.AsSpan(0x3c, sizeof(int)), peHeaderOffset);
|
||||
bytes[peHeaderOffset] = (byte)'P';
|
||||
bytes[peHeaderOffset + 1] = (byte)'E';
|
||||
bytes[peHeaderOffset + 2] = 0;
|
||||
bytes[peHeaderOffset + 3] = 0;
|
||||
BinaryPrimitives.WriteUInt16LittleEndian(bytes.AsSpan(peHeaderOffset + 4, sizeof(ushort)), machine);
|
||||
return WriteTempFile(bytes);
|
||||
}
|
||||
|
||||
private string WriteTempFile(byte[] bytes)
|
||||
{
|
||||
string path = Path.Combine(Path.GetTempPath(), $"mxgw-pe-{Guid.NewGuid():N}.bin");
|
||||
File.WriteAllBytes(path, bytes);
|
||||
_tempFiles.Add(path);
|
||||
return path;
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
foreach (string path in _tempFiles)
|
||||
{
|
||||
try
|
||||
{
|
||||
File.Delete(path);
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
// Best-effort cleanup of the temp PE fixtures.
|
||||
}
|
||||
}
|
||||
|
||||
_tempFiles.Clear();
|
||||
}
|
||||
}
|
||||
@@ -150,6 +150,32 @@ public sealed class SqliteAuthStoreTests : IDisposable
|
||||
Assert.Equal("matched active key", record.Details);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verifies that <see cref="AuthSqliteConnectionFactory.OpenConnectionAsync"/> opens
|
||||
/// the auth database in WAL journal mode so concurrent readers and writers degrade
|
||||
/// gracefully instead of surfacing <c>SQLITE_BUSY</c> on the request path.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task OpenConnectionAsync_EnablesWalJournalModeAndBusyTimeout()
|
||||
{
|
||||
string databasePath = CreateTempDatabasePath();
|
||||
await using ServiceProvider services = BuildAuthServices(databasePath);
|
||||
AuthSqliteConnectionFactory factory = services.GetRequiredService<AuthSqliteConnectionFactory>();
|
||||
|
||||
await using SqliteConnection connection = await factory.OpenConnectionAsync(CancellationToken.None);
|
||||
|
||||
await using SqliteCommand journalModeCommand = connection.CreateCommand();
|
||||
journalModeCommand.CommandText = "PRAGMA journal_mode;";
|
||||
string? journalMode = (string?)await journalModeCommand.ExecuteScalarAsync(CancellationToken.None);
|
||||
|
||||
await using SqliteCommand busyTimeoutCommand = connection.CreateCommand();
|
||||
busyTimeoutCommand.CommandText = "PRAGMA busy_timeout;";
|
||||
long busyTimeout = (long)(await busyTimeoutCommand.ExecuteScalarAsync(CancellationToken.None) ?? 0L);
|
||||
|
||||
Assert.Equal("wal", journalMode, ignoreCase: true);
|
||||
Assert.True(busyTimeout > 0, $"Expected a non-zero busy_timeout but found {busyTimeout}.");
|
||||
}
|
||||
|
||||
private static ServiceProvider BuildAuthServices(string databasePath)
|
||||
{
|
||||
IConfigurationRoot configuration = new ConfigurationBuilder()
|
||||
|
||||
@@ -60,6 +60,26 @@ public sealed class VariantConverterTests
|
||||
Assert.Equal("VT_I8", converted.VariantType);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Worker-010 regression: a 32-bit <see cref="uint"/> with an expected
|
||||
/// data type of <see cref="MxDataType.Time"/> must not be projected as a
|
||||
/// Windows FILETIME. A uint can only hold the low 32 bits of a FILETIME,
|
||||
/// which would silently render as a near-1601 timestamp; the converter
|
||||
/// must fall through to an integer projection instead.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public void Convert_WithUInt32AndExpectedTime_DoesNotProjectFileTime()
|
||||
{
|
||||
const uint value = 123456789u;
|
||||
|
||||
MxValue converted = _converter.Convert(value, MxDataType.Time);
|
||||
|
||||
Assert.Equal(MxDataType.Integer, converted.DataType);
|
||||
Assert.Equal(MxValue.KindOneofCase.Int64Value, converted.KindCase);
|
||||
Assert.Equal(value, converted.Int64Value);
|
||||
Assert.Equal("VT_UI4", converted.VariantType);
|
||||
}
|
||||
|
||||
/// <summary>Verifies that null-like values preserve their null semantics and variant type.</summary>
|
||||
/// <param name="value">Null-like value to convert.</param>
|
||||
/// <param name="expectedVariantType">Expected variant type string.</param>
|
||||
|
||||
@@ -118,6 +118,39 @@ public sealed class WorkerFrameProtocolTests
|
||||
Assert.Equal(new ulong[] { 1, 2, 3 }, new[] { first.Sequence, second.Sequence, third.Sequence }.OrderBy(sequence => sequence));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Worker-009 regression: the reader rents its payload buffer from a
|
||||
/// shared pool, so a rented buffer can be larger than the current frame
|
||||
/// and may carry bytes from a previous, larger frame. Reading frames of
|
||||
/// differing sizes back-to-back through one reader must parse each frame
|
||||
/// using only its own payload length, never trailing pooled bytes.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task ReadAsync_WithVaryingFrameSizes_ParsesEachFrameExactly()
|
||||
{
|
||||
WorkerFrameProtocolOptions options = CreateOptions();
|
||||
using MemoryStream stream = new();
|
||||
WorkerFrameWriter writer = new(stream, options);
|
||||
|
||||
// A large-payload frame followed by a small-payload frame: if the
|
||||
// reader reused a pooled buffer without honouring the second frame's
|
||||
// length, the small frame would parse with stale trailing bytes.
|
||||
WorkerEnvelope large = CreateGatewayHelloEnvelope(sequence: 1);
|
||||
large.GatewayHello.GatewayVersion = new string('x', 4096);
|
||||
WorkerEnvelope small = CreateGatewayHelloEnvelope(sequence: 2);
|
||||
|
||||
await writer.WriteAsync(large);
|
||||
await writer.WriteAsync(small);
|
||||
stream.Position = 0;
|
||||
|
||||
WorkerFrameReader reader = new(stream, options);
|
||||
WorkerEnvelope firstParsed = await reader.ReadAsync();
|
||||
WorkerEnvelope secondParsed = await reader.ReadAsync();
|
||||
|
||||
Assert.Equal(large, firstParsed);
|
||||
Assert.Equal(small, secondParsed);
|
||||
}
|
||||
|
||||
private static WorkerFrameProtocolOptions CreateOptions()
|
||||
{
|
||||
return new WorkerFrameProtocolOptions(
|
||||
|
||||
@@ -207,7 +207,14 @@ public sealed class VariantConverter
|
||||
MxDataType expectedDataType)
|
||||
{
|
||||
long longValue = System.Convert.ToInt64(value, CultureInfo.InvariantCulture);
|
||||
if (expectedDataType == MxDataType.Time)
|
||||
|
||||
// The MxDataType.Time projection treats the source as a Windows FILETIME
|
||||
// (a 64-bit 100-ns tick count since 1601). Only a genuine 64-bit source
|
||||
// (long) can carry a valid full FILETIME; a uint can only hold the low
|
||||
// 32 bits, which DateTime.FromFileTimeUtc would silently render as a
|
||||
// near-1601 timestamp. For uint sources fall through to the integer
|
||||
// projection rather than producing a bogus timestamp.
|
||||
if (expectedDataType == MxDataType.Time && value is long)
|
||||
{
|
||||
return new MxValue
|
||||
{
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
using System;
|
||||
using System.Buffers;
|
||||
using System.IO;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
@@ -29,7 +30,7 @@ public sealed class WorkerFrameReader
|
||||
public async Task<WorkerEnvelope> ReadAsync(CancellationToken cancellationToken = default)
|
||||
{
|
||||
byte[] lengthPrefix = new byte[sizeof(uint)];
|
||||
await ReadExactlyOrThrowAsync(lengthPrefix, cancellationToken).ConfigureAwait(false);
|
||||
await ReadExactlyOrThrowAsync(lengthPrefix, lengthPrefix.Length, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
uint payloadLength = ReadUInt32LittleEndian(lengthPrefix);
|
||||
if (payloadLength == 0)
|
||||
@@ -46,20 +47,32 @@ public sealed class WorkerFrameReader
|
||||
$"Worker frame payload length {payloadLength} exceeds the configured maximum of {_options.MaxMessageBytes} bytes.");
|
||||
}
|
||||
|
||||
byte[] payload = new byte[payloadLength];
|
||||
await ReadExactlyOrThrowAsync(payload, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
// Rent the payload buffer from the shared pool rather than allocating
|
||||
// a fresh byte[] per frame. ParseFrom copies whatever it needs into
|
||||
// the parsed message, so the rented buffer can be returned as soon as
|
||||
// parsing completes.
|
||||
int length = checked((int)payloadLength);
|
||||
byte[] payload = ArrayPool<byte>.Shared.Rent(length);
|
||||
WorkerEnvelope envelope;
|
||||
try
|
||||
{
|
||||
envelope = WorkerEnvelope.Parser.ParseFrom(payload);
|
||||
await ReadExactlyOrThrowAsync(payload, length, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
try
|
||||
{
|
||||
envelope = WorkerEnvelope.Parser.ParseFrom(payload, 0, length);
|
||||
}
|
||||
catch (InvalidProtocolBufferException exception)
|
||||
{
|
||||
throw new WorkerFrameProtocolException(
|
||||
WorkerFrameProtocolErrorCode.InvalidEnvelope,
|
||||
"Worker frame payload is not a valid WorkerEnvelope protobuf message.",
|
||||
exception);
|
||||
}
|
||||
}
|
||||
catch (InvalidProtocolBufferException exception)
|
||||
finally
|
||||
{
|
||||
throw new WorkerFrameProtocolException(
|
||||
WorkerFrameProtocolErrorCode.InvalidEnvelope,
|
||||
"Worker frame payload is not a valid WorkerEnvelope protobuf message.",
|
||||
exception);
|
||||
ArrayPool<byte>.Shared.Return(payload);
|
||||
}
|
||||
|
||||
WorkerEnvelopeValidator.Validate(envelope, _options);
|
||||
@@ -77,13 +90,14 @@ public sealed class WorkerFrameReader
|
||||
|
||||
private async Task ReadExactlyOrThrowAsync(
|
||||
byte[] buffer,
|
||||
int count,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
int offset = 0;
|
||||
while (offset < buffer.Length)
|
||||
while (offset < count)
|
||||
{
|
||||
int bytesRead = await _stream
|
||||
.ReadAsync(buffer, offset, buffer.Length - offset, cancellationToken)
|
||||
.ReadAsync(buffer, offset, count - offset, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
if (bytesRead == 0)
|
||||
|
||||
@@ -54,15 +54,20 @@ public sealed class WorkerFrameWriter
|
||||
$"Worker envelope payload length {payloadLength} exceeds the configured maximum of {_options.MaxMessageBytes} bytes.");
|
||||
}
|
||||
|
||||
byte[] payload = envelope.ToByteArray();
|
||||
byte[] lengthPrefix = new byte[sizeof(uint)];
|
||||
WriteUInt32LittleEndian(lengthPrefix, (uint)payloadLength);
|
||||
// Serialize once into a single buffer that carries the 4-byte
|
||||
// length prefix followed by the payload, then issue one stream write.
|
||||
// This avoids a second serialization pass (envelope.ToByteArray()
|
||||
// would re-run CalculateSize internally), a separate prefix array,
|
||||
// and a separate prefix write.
|
||||
int frameLength = sizeof(uint) + payloadLength;
|
||||
byte[] frame = new byte[frameLength];
|
||||
WriteUInt32LittleEndian(frame, (uint)payloadLength);
|
||||
envelope.WriteTo(new Span<byte>(frame, sizeof(uint), payloadLength));
|
||||
|
||||
await _writeLock.WaitAsync(cancellationToken).ConfigureAwait(false);
|
||||
try
|
||||
{
|
||||
await _stream.WriteAsync(lengthPrefix, 0, lengthPrefix.Length, cancellationToken).ConfigureAwait(false);
|
||||
await _stream.WriteAsync(payload, 0, payload.Length, cancellationToken).ConfigureAwait(false);
|
||||
await _stream.WriteAsync(frame, 0, frameLength, cancellationToken).ConfigureAwait(false);
|
||||
await _stream.FlushAsync(cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
finally
|
||||
|
||||
@@ -166,14 +166,17 @@ public sealed class WorkerPipeClient : IWorkerPipeClient
|
||||
string pipeName,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
int retryAttempts = Math.Max(
|
||||
0,
|
||||
(_connectTimeoutMilliseconds / Math.Min(_connectTimeoutMilliseconds, _connectAttemptTimeoutMilliseconds)) - 1);
|
||||
|
||||
// The real bound on connection attempts is the connectDeadline token
|
||||
// below (CancelAfter(connectTimeout)): Polly stops retrying as soon as
|
||||
// that token is cancelled. Driving retries purely off the deadline —
|
||||
// rather than a fragile attempt-count formula that ignored the
|
||||
// exponential backoff between attempts — keeps the time budget the
|
||||
// single source of truth. MaxRetryAttempts is set to its maximum so it
|
||||
// never ends the retry loop before the deadline does.
|
||||
ResiliencePipeline<NamedPipeClientStream> pipeline = new ResiliencePipelineBuilder<NamedPipeClientStream>()
|
||||
.AddRetry(new RetryStrategyOptions<NamedPipeClientStream>
|
||||
{
|
||||
MaxRetryAttempts = retryAttempts,
|
||||
MaxRetryAttempts = int.MaxValue,
|
||||
BackoffType = DelayBackoffType.Exponential,
|
||||
UseJitter = true,
|
||||
Delay = TimeSpan.FromMilliseconds(250),
|
||||
|
||||
@@ -24,10 +24,12 @@ namespace MxGateway.Worker.MxAccess;
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// Threading: invoked from <see cref="MxAccessCommandExecutor"/>
|
||||
/// which runs on the STA. The wnwrap consumer's polling timer
|
||||
/// fires on a thread-pool thread; the only cross-thread surface
|
||||
/// is the <see cref="AlarmDispatcher"/>'s event handler, which
|
||||
/// hand-offs into the thread-safe <see cref="MxAccessEventQueue"/>.
|
||||
/// which runs on the STA. The wnwrap consumer owns no internal
|
||||
/// timer — the worker's STA drives <see cref="PollOnce"/> via
|
||||
/// <c>StaRuntime.InvokeAsync</c>, so the consumer's transition
|
||||
/// events fire on the same STA. The
|
||||
/// <see cref="AlarmDispatcher"/>'s event handler hands transitions
|
||||
/// into the thread-safe <see cref="MxAccessEventQueue"/>.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public sealed class AlarmCommandHandler : IAlarmCommandHandler
|
||||
@@ -191,58 +193,3 @@ public sealed class AlarmCommandHandler : IAlarmCommandHandler
|
||||
Unsubscribe();
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Per-session interface routing the worker's alarm IPC commands —
|
||||
/// <c>SubscribeAlarmsCommand</c>, <c>AcknowledgeAlarmCommand</c>,
|
||||
/// <c>QueryActiveAlarmsCommand</c>, <c>UnsubscribeAlarmsCommand</c> —
|
||||
/// to the underlying <see cref="AlarmDispatcher"/>. Production binding
|
||||
/// is <see cref="AlarmCommandHandler"/>; tests substitute a fake.
|
||||
/// </summary>
|
||||
public interface IAlarmCommandHandler : IDisposable
|
||||
{
|
||||
/// <summary>Begin a subscription against the supplied AVEVA alarm-provider expression.</summary>
|
||||
void Subscribe(string subscription, string sessionId);
|
||||
|
||||
/// <summary>Tear down the active subscription. No-op if not subscribed.</summary>
|
||||
void Unsubscribe();
|
||||
|
||||
/// <summary>Acknowledge a single alarm by GUID. Returns AVEVA's native status (0 = success).</summary>
|
||||
int Acknowledge(
|
||||
Guid alarmGuid,
|
||||
string comment,
|
||||
string operatorUser,
|
||||
string operatorNode,
|
||||
string operatorDomain,
|
||||
string operatorFullName);
|
||||
|
||||
/// <summary>
|
||||
/// Acknowledge a single alarm by (name, provider, group) — used when
|
||||
/// the caller has the human-readable reference but not the GUID.
|
||||
/// </summary>
|
||||
int AcknowledgeByName(
|
||||
string alarmName,
|
||||
string providerName,
|
||||
string groupName,
|
||||
string comment,
|
||||
string operatorUser,
|
||||
string operatorNode,
|
||||
string operatorDomain,
|
||||
string operatorFullName);
|
||||
|
||||
/// <summary>
|
||||
/// Snapshot the currently-active alarm set, optionally scoped to a
|
||||
/// prefix matched against <c>AlarmFullReference</c>.
|
||||
/// </summary>
|
||||
IReadOnlyList<ActiveAlarmSnapshot> QueryActive(string? alarmFilterPrefix);
|
||||
|
||||
/// <summary>
|
||||
/// Drives a single poll of the underlying alarm consumer on the
|
||||
/// caller's thread. This is a no-op when there is no active
|
||||
/// subscription. In production the caller is the worker's STA
|
||||
/// (marshalled via <c>StaRuntime.InvokeAsync</c>), which satisfies
|
||||
/// the <c>ThreadingModel=Apartment</c> requirement of
|
||||
/// <c>wwAlarmConsumerClass</c>.
|
||||
/// </summary>
|
||||
void PollOnce();
|
||||
}
|
||||
|
||||
@@ -14,22 +14,20 @@ namespace MxGateway.Worker.MxAccess;
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// This is the in-process slice of A.3 — it proves the
|
||||
/// consumer→sink→queue pipeline end-to-end without touching the
|
||||
/// worker's IPC command framing. The companion follow-up PR adds
|
||||
/// <c>SubscribeAlarmsCommand</c> / <c>AcknowledgeAlarmCommand</c> /
|
||||
/// <c>QueryActiveAlarmsCommand</c> proto entries plus the gateway-
|
||||
/// side <c>WorkerAlarmRpcDispatcher</c> that issues them.
|
||||
/// The dispatcher carries the consumer→sink→queue pipeline. The
|
||||
/// worker's IPC layer issues <c>SubscribeAlarmsCommand</c> /
|
||||
/// <c>AcknowledgeAlarmCommand</c> / <c>QueryActiveAlarmsCommand</c>
|
||||
/// through <see cref="AlarmCommandHandler"/>, which owns one
|
||||
/// dispatcher per session.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// Threading: <see cref="WnWrapAlarmConsumer"/> polls on a
|
||||
/// <see cref="System.Threading.Timer"/> thread today; production
|
||||
/// hosting should marshal the consumer onto the worker's STA via
|
||||
/// <c>StaRuntime.InvokeAsync</c>. The dispatcher itself is purely
|
||||
/// a pass-through, so it inherits whatever thread the consumer's
|
||||
/// event handler fires on. Fan-out into <c>EnqueueTransition</c>
|
||||
/// uses <see cref="MxAccessEventQueue.Enqueue"/> which is
|
||||
/// thread-safe.
|
||||
/// Threading: <see cref="WnWrapAlarmConsumer"/> owns no internal
|
||||
/// timer — the worker's STA drives polling via
|
||||
/// <c>StaRuntime.InvokeAsync(() => PollOnce())</c>, so the
|
||||
/// consumer's <c>AlarmTransitionEmitted</c> event fires on the STA.
|
||||
/// The dispatcher is purely a pass-through, so it inherits that
|
||||
/// thread. Fan-out into <c>EnqueueTransition</c> uses the
|
||||
/// thread-safe <see cref="MxAccessEventQueue.Enqueue"/>.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public sealed class AlarmDispatcher : IDisposable
|
||||
|
||||
@@ -0,0 +1,60 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using MxGateway.Contracts.Proto;
|
||||
|
||||
namespace MxGateway.Worker.MxAccess;
|
||||
|
||||
/// <summary>
|
||||
/// Per-session interface routing the worker's alarm IPC commands —
|
||||
/// <c>SubscribeAlarmsCommand</c>, <c>AcknowledgeAlarmCommand</c>,
|
||||
/// <c>QueryActiveAlarmsCommand</c>, <c>UnsubscribeAlarmsCommand</c> —
|
||||
/// to the underlying <see cref="AlarmDispatcher"/>. Production binding
|
||||
/// is <see cref="AlarmCommandHandler"/>; tests substitute a fake.
|
||||
/// </summary>
|
||||
public interface IAlarmCommandHandler : IDisposable
|
||||
{
|
||||
/// <summary>Begin a subscription against the supplied AVEVA alarm-provider expression.</summary>
|
||||
void Subscribe(string subscription, string sessionId);
|
||||
|
||||
/// <summary>Tear down the active subscription. No-op if not subscribed.</summary>
|
||||
void Unsubscribe();
|
||||
|
||||
/// <summary>Acknowledge a single alarm by GUID. Returns AVEVA's native status (0 = success).</summary>
|
||||
int Acknowledge(
|
||||
Guid alarmGuid,
|
||||
string comment,
|
||||
string operatorUser,
|
||||
string operatorNode,
|
||||
string operatorDomain,
|
||||
string operatorFullName);
|
||||
|
||||
/// <summary>
|
||||
/// Acknowledge a single alarm by (name, provider, group) — used when
|
||||
/// the caller has the human-readable reference but not the GUID.
|
||||
/// </summary>
|
||||
int AcknowledgeByName(
|
||||
string alarmName,
|
||||
string providerName,
|
||||
string groupName,
|
||||
string comment,
|
||||
string operatorUser,
|
||||
string operatorNode,
|
||||
string operatorDomain,
|
||||
string operatorFullName);
|
||||
|
||||
/// <summary>
|
||||
/// Snapshot the currently-active alarm set, optionally scoped to a
|
||||
/// prefix matched against <c>AlarmFullReference</c>.
|
||||
/// </summary>
|
||||
IReadOnlyList<ActiveAlarmSnapshot> QueryActive(string? alarmFilterPrefix);
|
||||
|
||||
/// <summary>
|
||||
/// Drives a single poll of the underlying alarm consumer on the
|
||||
/// caller's thread. This is a no-op when there is no active
|
||||
/// subscription. In production the caller is the worker's STA
|
||||
/// (marshalled via <c>StaRuntime.InvokeAsync</c>), which satisfies
|
||||
/// the <c>ThreadingModel=Apartment</c> requirement of
|
||||
/// <c>wwAlarmConsumerClass</c>.
|
||||
/// </summary>
|
||||
void PollOnce();
|
||||
}
|
||||
@@ -11,13 +11,15 @@ namespace MxGateway.Worker.MxAccess;
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// The dispatcher subscribes the consumer's
|
||||
/// <see cref="AlarmDispatcher"/> owns the wire-up: it constructs the
|
||||
/// consumer/sink pair, calls <see cref="Attach"/> to propagate the
|
||||
/// session id, and subscribes the consumer's
|
||||
/// <see cref="IMxAccessAlarmConsumer.AlarmTransitionEmitted"/> event
|
||||
/// to <see cref="EnqueueTransition"/> at session attach time. The
|
||||
/// <see cref="Attach"/> override here is a stub kept for the data-
|
||||
/// session shape; the actual wire-up between consumer and sink
|
||||
/// lives in the A.3 dispatcher (one step up the stack). Captured
|
||||
/// payload schema and consumer threading discipline are described in
|
||||
/// so each decoded transition reaches <see cref="EnqueueTransition"/>.
|
||||
/// The <see cref="Attach"/> method here carries only the session id —
|
||||
/// the alarm path needs no COM-event subscription of its own because
|
||||
/// the consumer already polls and raises transition events. The
|
||||
/// captured payload schema is described in
|
||||
/// <c>docs/AlarmClientDiscovery.md</c> "Option A — captured".
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
@@ -47,10 +49,10 @@ public sealed class MxAccessAlarmEventSink : IMxAccessEventSink
|
||||
if (mxAccessComObject is null) throw new ArgumentNullException(nameof(mxAccessComObject));
|
||||
this.sessionId = sessionId ?? string.Empty;
|
||||
|
||||
// PR A.2 — COM-side subscription scaffold. The MXAccess Toolkit alarm
|
||||
// event source is pinned during dev-rig validation. Until then, the
|
||||
// worker advertises no alarm subscription; data-change behaviour is
|
||||
// unaffected.
|
||||
// The alarm path needs no COM-event subscription here: the wnwrap
|
||||
// consumer is polled by the worker's STA and raises transition events
|
||||
// that AlarmDispatcher routes into EnqueueTransition. Attach only
|
||||
// records the session id stamped onto every emitted MxEvent.
|
||||
attached = true;
|
||||
}
|
||||
|
||||
|
||||
@@ -158,6 +158,16 @@ public sealed class MxAccessBaseEventSink : IMxAccessEventSink
|
||||
}
|
||||
catch (Exception exception)
|
||||
{
|
||||
// Two distinct failures land here, both intentionally fail-fast:
|
||||
// - A conversion failure from createEvent() — recorded here as an
|
||||
// MxaccessEventConversionFailed fault.
|
||||
// - An MxAccessEventQueueOverflowException from Enqueue when the
|
||||
// queue is at capacity. Per the fail-fast backpressure design
|
||||
// (docs/DesignDecisions.md) the event is dropped and the queue
|
||||
// has *already* self-recorded a QueueOverflow fault. Because
|
||||
// MxAccessEventQueue.RecordFault keeps only the first fault,
|
||||
// this catch's RecordFault call is then a deliberate near
|
||||
// no-op rather than a second, conflicting fault.
|
||||
eventQueue.RecordFault(CreateEventConversionFault(exception));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -103,12 +103,11 @@ public sealed class MxAccessEventMapper
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates an OnAlarmTransition event from MXAccess COM alarm-event arguments.
|
||||
/// PR A.2 — proto-build path is mechanical and unit-testable; the COM-side
|
||||
/// subscription that calls into this method (registering an
|
||||
/// <c>IAlarmEventSink</c> against the MXAccess Toolkit's alarm provider) is
|
||||
/// pinned during dev-rig validation since the exact MXAccess Toolkit version
|
||||
/// installed on the worker host determines the API shape.
|
||||
/// Creates an OnAlarmTransition event from MXAccess alarm-event arguments.
|
||||
/// The worker's alarm path drives this method from
|
||||
/// <see cref="MxAccessAlarmEventSink.EnqueueTransition"/> once
|
||||
/// <see cref="AlarmDispatcher"/> decodes a transition raised by the
|
||||
/// wnwrap-backed <see cref="WnWrapAlarmConsumer"/>.
|
||||
/// </summary>
|
||||
/// <param name="sessionId">Identifier of the session.</param>
|
||||
/// <param name="alarmFullReference">Fully-qualified MxAccess alarm reference (e.g. "Tank01.Level.HiHi").</param>
|
||||
|
||||
Reference in New Issue
Block a user